「这是我参与11月更文挑战的第33天,活动详情查看:2021最后一次更文挑战」。
一、HBase API
1.1、环境准备
新建项目后在 pom.xml 中添加依赖:
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.3.1</version>
</dependency>
1.2、HBase API
1.2.1、获取 Configuration 对象
private Configuration conf;
@Before
public void init() {
//使用 HBaseConfiguration 的单例方法实例化
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "hadoop102,hadoop103,hadoop104");
conf.set("hbase.zookeeper.property.clientPort", "2181");
}
1.2.2、判断表是否存在
@Test
public void isTableExist() throws Exception {
System.out.println(isTableExist("student"));
}
private boolean isTableExist(String tableName) throws Exception {
//在 HBase 中管理、访问表需要先创建 HBaseAdmin 对象
//Connection connection =
ConnectionFactory.createConnection(conf);
//HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
HBaseAdmin admin = new HBaseAdmin(conf);
return admin.tableExists(tableName);
}
1.2.3、创建表
@Test
public void createTable() throws Exception {
createTable("student", "info");
}
private void createTable(String tableName, String... columnFamily) throws Exception {
HBaseAdmin admin = new HBaseAdmin(conf);
//判断表是否存在
if (isTableExist(tableName)) {
System.out.println("表" + tableName + "已存在");
//System.exit(0);
} else {
//创建表属性对象,表名需要转字节
HTableDescriptor descriptor = new HTableDescriptor(TableName.valueOf(tableName));
//创建多个列族
for (String cf : columnFamily) {
descriptor.addFamily(new HColumnDescriptor(cf));
}
//根据对表的配置,创建表
admin.createTable(descriptor);
System.out.println("表" + tableName + "创建成功!");
}
}
1.2.4、删除表
@Test
public void dropTable() throws Exception {
dropTable("student");
}
private void dropTable(String tableName) throws Exception {
HBaseAdmin admin = new HBaseAdmin(conf);
if (isTableExist(tableName)) {
admin.disableTable(tableName);
admin.deleteTable(tableName);
System.out.println("表" + tableName + "删除成功!");
} else {
System.out.println("表" + tableName + "不存在!");
}
}
1.2.5、向表中插入数据
@Test
public void addRowData() throws Exception {
addRowData("student", "1001", "info", "name", "moe");
}
private void addRowData(String tableName, String rowKey, String columnFamily, String column, String value) throws Exception {
//创建 HTable 对象
HTable hTable = new HTable(conf, tableName);
//向表中插入数据
Put put = new Put(Bytes.toBytes(rowKey));
//向 Put 对象中组装数据
put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value));
hTable.put(put);
hTable.close();
System.out.println("插入数据成功");
}
1.2.6、获取所有数据
@Test
public void getAllRows() throws Exception {
getAllRows("student");
}
private void getAllRows(String tableName) throws Exception {
HTable hTable = new HTable(conf, tableName);
//得到用于扫描 region 的对象
Scan scan = new Scan();
//使用 HTable 得到 resultScanner 实现类的对象
ResultScanner resultScanner = hTable.getScanner(scan);
for (Result result : resultScanner) {
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
//得到 rowkey
System.out.println(" 行 键 :" + Bytes.toString(CellUtil.cloneRow(cell)));
//得到列族
System.out.println(" 列 族 " + Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println(" 列 :" + Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println(" 值 :" + Bytes.toString(CellUtil.cloneValue(cell)));
}
}
}
1.2.7、获取某一行数据
@Test
public void getRow() throws Exception {
getRow("student", "1001");
}
private void getRow(String tableName, String rowKey) throws Exception {
HTable table = new HTable(conf, tableName);
Get get = new Get(Bytes.toBytes(rowKey));
//get.setMaxVersions();显示所有版本
//get.setTimeStamp();显示指定时间戳的版本
Result result = table.get(get);
for (Cell cell : result.rawCells()) {
System.out.println(" 行 键 :" + Bytes.toString(result.getRow()));
System.out.println(" 列 族 " + Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println(" 列 :" + Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println(" 值 :" + Bytes.toString(CellUtil.cloneValue(cell)));
System.out.println("时间戳:" + cell.getTimestamp());
}
}
1.2.8、获取某一行指定“列族:列”的数据
@Test
public void getRowQualifier() throws Exception {
getRowQualifier("student", "1001", "info", "name");
}
private void getRowQualifier(String tableName, String rowKey, String family, String qualifier) throws Exception {
HTable table = new HTable(conf, tableName);
Get get = new Get(Bytes.toBytes(rowKey));
get.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
Result result = table.get(get);
for (Cell cell : result.rawCells()) {
System.out.println(" 行 键 :" + Bytes.toString(result.getRow()));
System.out.println(" 列 族 " + Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println(" 列 :" + Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println(" 值 :" + Bytes.toString(CellUtil.cloneValue(cell)));
1. }
}
1.2.9、删除多行数据
@Test
public void deleteMultiRow() throws Exception {
deleteMultiRow("student", "1001");
}
private void deleteMultiRow(String tableName, String... rows) throws Exception {
HTable hTable = new HTable(conf, tableName);
List<Delete> deleteList = new ArrayList<>();
for (String row : rows) {
Delete delete = new Delete(Bytes.toBytes(row));
deleteList.add(delete);
}
hTable.delete(deleteList);
hTable.close();
}
1.3、MapReduce
通过 HBase 的相关 JavaAPI,我们可以实现伴随 HBase 操作的 MapReduce 过程,比如使用MapReduce 将数据从本地文件系统导入到 HBase 的表中,比如我们从 HBase 中读取一些原始数据后使用 MapReduce 做数据分析。
1.3.1、官方 HBase-MapReduce
-
查看 HBase 的 MapReduce 任务的执行
bin/hbase mapredcp -
环境变量的导入
-
执行环境变量的导入(临时生效,在命令行执行下述操作)
export HBASE_HOME=/opt/module/hbase export HADOOP_HOME=/opt/module/hadoop-3.1.3 export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp` -
永久生效:在/etc/profile.d/my_env.sh 配置
export HBASE_HOME=/opt/module/hbase export HADOOP_HOME=/opt/module/hadoop-3.1.3 -
并在 hadoop-env.sh 中配置:
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/*
-
-
运行官方的 MapReduce 任务
-
案例一:统计 Student 表中有多少行数据
[moe@hadoop102 hbase]$ /opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar rowcounter student -
案例二:使用 MapReduce 将本地数据导入到 HBase
-
在本地创建一个 tsv 格式的文件:fruit.tsv
1001 Apple Red 1002 Pear Yellow 1003 Pineapple Yellow -
创建 Hbase 表
hbase(main):001:0> create 'fruit','info' -
在 HDFS 中上传 fruit.tsv 文件
[moe@hadoop102 hbase]$ hadoop dfs -put fruit.tsv / -
执行 MapReduce 到 HBase 的 fruit 表中
/opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar importtsv \ -Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:color fruit \ hdfs://hadoop102:8020/fruit.tsv -
使用 scan 命令查看导入后的结果
hbase(main):002:0> scan 'fruit'
-
-
1.3.2、自定义 HBase-MapReduce1
目标:将 hdfs上 fruit.tsv 中的数据,通过 MR 迁入到 hbase fruit1 表中。
-
构建FruitMapper类,用于读取fruit.tsv中的数据
public class FruitMapper extends Mapper<LongWritable, Text, LongWritable, Text> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { context.write(key, value); } } -
构建FruitReduce类,用于将读取到的fruit.tsv中的每行数据写入到hbase fruit1表中
public class FruitReducer extends TableReducer<LongWritable, Text, NullWritable> { @Override protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text value : values) { // 1001 Apple Red String line = value.toString(); String[] fields = line.split("\t"); Put put = new Put(Bytes.toBytes(fields[0])); put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(fields[1])); put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("color"), Bytes.toBytes(fields[2])); context.write(NullWritable.get(), put); } } } -
构建FruitDriver类,用于组装Job任务
public class FruitDriver implements Tool { private Configuration conf = null; @Override public int run(String[] args) throws Exception { // 1.获取Job对象 Job job = Job.getInstance(conf); // 2.设置驱动类路径 job.setJarByClass(FruitDriver.class); // 3.设置Mapper&Mapper输出的KV类型 job.setMapperClass(FruitMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); // 4.设置Reduce类 TableMapReduceUtil.initTableReducerJob(args[1], FruitReducer.class, job); // 5.设置输入参数 FileInputFormat.setInputPaths(job, new Path(args[0])); // 6.提交任务 boolean result = job.waitForCompletion(true); return result ? 0 : 1; } @Override public void setConf(Configuration configuration) { this.conf = configuration; } @Override public Configuration getConf() { return conf; } public static void main(String[] args) { try { Configuration conf = new Configuration(); int run = ToolRunner.run(conf, new FruitDriver(), args); System.exit(run); } catch (Exception e) { e.printStackTrace(); } } } -
主函数中调用运行该 Job 任务
public static void main(String[] args) { try { Configuration conf = new Configuration(); int run = ToolRunner.run(conf, new FruitDriver(), args); System.exit(run); } catch (Exception e) { e.printStackTrace(); } } -
打包运行任务
[moe@hadoop102 test]$ yarn jar hbase-1.0-SNAPSHOT.jar com.moe.mr1.FruitDriver /fruit.tsv fruit1提示:运行任务前,如果待数据导入的表不存在,则需要提前创建。
1.3.3、自定义 Hbase-MapReduce2
目标:实现将HBase中fruit1表中的name列中的数据导入到fruit2表中。
-
构建Fruit2Mapper读取fruit1表中的数据
public class Fruit2Mapper extends TableMapper<ImmutableBytesWritable, Put> { @Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { // 构建put对象 Put put = new Put(key.get()); for (Cell cell : value.rawCells()) { if ("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) { put.add(cell); } } context.write(key, put); } } -
构建Fruit2Reduce,将数据写入到fruit2中
public class Fruit2Reduce extends TableReducer<ImmutableBytesWritable, Put, NullWritable> { @Override protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException { for (Put put : values) { context.write(NullWritable.get(), put); } } } -
构建Fruit2Driver,用于组装Job任务
public class Fruit2Driver implements Tool { private Configuration conf = null; @Override public int run(String[] args) throws Exception { Job job = Job.getInstance(conf); job.setJarByClass(Fruit2Driver.class); // args[0]:表1 TableMapReduceUtil.initTableMapperJob("fruit1", new Scan(), Fruit2Mapper.class, ImmutableBytesWritable.class, Put.class, job); // args[1]:表2 TableMapReduceUtil.initTableReducerJob("fruit2", Fruit2Reduce.class, job); boolean result = job.waitForCompletion(true); return result ? 0 : 1; } @Override public void setConf(Configuration configuration) { this.conf = configuration; } @Override public Configuration getConf() { return conf; } public static void main(String[] args) { try { // Configuration conf = new Configuration(); //使用需要打包扔到集群中进行测试 // 本地连接HBase,直接进行测试,需要hbase-site.xml配置文件 Configuration conf = HBaseConfiguration.create(); int run = ToolRunner.run(conf, new Fruit2Driver(), args); System.exit(run); } catch (Exception e) { e.printStackTrace(); } } } -
主函数中调用运行该 Job 任务
public static void main(String[] args) { try { // Configuration conf = new Configuration(); //使用需要打包扔到集群中进行测试 // 本地连接HBase,直接进行测试,需要hbase-site.xml配置文件 Configuration conf = HBaseConfiguration.create(); int run = ToolRunner.run(conf, new Fruit2Driver(), args); System.exit(run); } catch (Exception e) { e.printStackTrace(); } }