「这是我参与11月更文挑战的第33天，活动详情查看：2021最后一次更文挑战」。

一、HBase API

1.1、环境准备

新建项目后在 pom.xml 中添加依赖：

<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-server</artifactId>
    <version>1.3.1</version>
    </dependency>
<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-client</artifactId>
    <version>1.3.1</version>
</dependency>

1.2、HBase API

1.2.1、获取 Configuration 对象

private Configuration conf;

@Before
public void init() {
    //使用 HBaseConfiguration 的单例方法实例化
    conf = HBaseConfiguration.create();
    conf.set("hbase.zookeeper.quorum", "hadoop102,hadoop103,hadoop104");
    conf.set("hbase.zookeeper.property.clientPort", "2181");
}

1.2.2、判断表是否存在

@Test
public void isTableExist() throws Exception {
    System.out.println(isTableExist("student"));
}

private boolean isTableExist(String tableName) throws Exception {
    //在 HBase 中管理、访问表需要先创建 HBaseAdmin 对象
    //Connection connection =
    ConnectionFactory.createConnection(conf);
    //HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
    HBaseAdmin admin = new HBaseAdmin(conf);
    return admin.tableExists(tableName);
}

1.2.3、创建表

@Test
public void createTable() throws Exception {
    createTable("student", "info");
}

private void createTable(String tableName, String... columnFamily) throws Exception {
    HBaseAdmin admin = new HBaseAdmin(conf);
    //判断表是否存在
    if (isTableExist(tableName)) {
        System.out.println("表" + tableName + "已存在");
        //System.exit(0);
    } else {
        //创建表属性对象,表名需要转字节
        HTableDescriptor descriptor = new HTableDescriptor(TableName.valueOf(tableName));
        //创建多个列族
        for (String cf : columnFamily) {
            descriptor.addFamily(new HColumnDescriptor(cf));
        }
        //根据对表的配置，创建表
        admin.createTable(descriptor);
        System.out.println("表" + tableName + "创建成功！");
    }
}

1.2.4、删除表

@Test
public void dropTable() throws Exception {
    dropTable("student");
}

private void dropTable(String tableName) throws Exception {
    HBaseAdmin admin = new HBaseAdmin(conf);
    if (isTableExist(tableName)) {
        admin.disableTable(tableName);
        admin.deleteTable(tableName);
        System.out.println("表" + tableName + "删除成功！");
    } else {
        System.out.println("表" + tableName + "不存在！");
    }
}

1.2.5、向表中插入数据

@Test
public void addRowData() throws Exception {
    addRowData("student", "1001", "info", "name", "moe");
}

private void addRowData(String tableName, String rowKey, String columnFamily, String column, String value) throws Exception {
    //创建 HTable 对象
    HTable hTable = new HTable(conf, tableName);
    //向表中插入数据
    Put put = new Put(Bytes.toBytes(rowKey));
    //向 Put 对象中组装数据
    put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value));
    hTable.put(put);
    hTable.close();
    System.out.println("插入数据成功");
}

1.2.6、获取所有数据

@Test
public void getAllRows() throws Exception {
    getAllRows("student");
}

private void getAllRows(String tableName) throws Exception {
    HTable hTable = new HTable(conf, tableName);
    //得到用于扫描 region 的对象
    Scan scan = new Scan();
    //使用 HTable 得到 resultScanner 实现类的对象
    ResultScanner resultScanner = hTable.getScanner(scan);
    for (Result result : resultScanner) {
        Cell[] cells = result.rawCells();
        for (Cell cell : cells) {
            //得到 rowkey
            System.out.println(" 行 键 :" + Bytes.toString(CellUtil.cloneRow(cell)));
            //得到列族
            System.out.println(" 列 族 " + Bytes.toString(CellUtil.cloneFamily(cell)));
            System.out.println(" 列 :" + Bytes.toString(CellUtil.cloneQualifier(cell)));
            System.out.println(" 值 :" + Bytes.toString(CellUtil.cloneValue(cell)));
        }
    }
}

1.2.7、获取某一行数据

@Test
public void getRow() throws Exception {
    getRow("student", "1001");
}

private void getRow(String tableName, String rowKey) throws Exception {
    HTable table = new HTable(conf, tableName);
    Get get = new Get(Bytes.toBytes(rowKey));
    //get.setMaxVersions();显示所有版本
    //get.setTimeStamp();显示指定时间戳的版本
    Result result = table.get(get);
    for (Cell cell : result.rawCells()) {
        System.out.println(" 行 键 :" + Bytes.toString(result.getRow()));
        System.out.println(" 列 族 " + Bytes.toString(CellUtil.cloneFamily(cell)));
        System.out.println(" 列 :" + Bytes.toString(CellUtil.cloneQualifier(cell)));
        System.out.println(" 值 :" + Bytes.toString(CellUtil.cloneValue(cell)));
        System.out.println("时间戳:" + cell.getTimestamp());
    }
}

1.2.8、获取某一行指定“列族:列”的数据

@Test
public void getRowQualifier() throws Exception {
    getRowQualifier("student", "1001", "info", "name");
}

private void getRowQualifier(String tableName, String rowKey, String family, String qualifier) throws Exception {
    HTable table = new HTable(conf, tableName);
    Get get = new Get(Bytes.toBytes(rowKey));
    get.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
    Result result = table.get(get);
    for (Cell cell : result.rawCells()) {
        System.out.println(" 行 键 :" + Bytes.toString(result.getRow()));
        System.out.println(" 列 族 " + Bytes.toString(CellUtil.cloneFamily(cell)));
        System.out.println(" 列 :" + Bytes.toString(CellUtil.cloneQualifier(cell)));
        System.out.println(" 值 :" + Bytes.toString(CellUtil.cloneValue(cell)));
1.     }
}

1.2.9、删除多行数据

@Test
public void deleteMultiRow() throws Exception {
    deleteMultiRow("student", "1001");
}

private void deleteMultiRow(String tableName, String... rows) throws Exception {
    HTable hTable = new HTable(conf, tableName);
    List<Delete> deleteList = new ArrayList<>();
    for (String row : rows) {
        Delete delete = new Delete(Bytes.toBytes(row));
        deleteList.add(delete);
    }
    hTable.delete(deleteList);
    hTable.close();
}

1.3、MapReduce

通过 HBase 的相关 JavaAPI，我们可以实现伴随 HBase 操作的 MapReduce 过程，比如使用MapReduce 将数据从本地文件系统导入到 HBase 的表中，比如我们从 HBase 中读取一些原始数据后使用 MapReduce 做数据分析。

1.3.1、官方 HBase-MapReduce

查看 HBase 的 MapReduce 任务的执行
```
bin/hbase mapredcp
```

环境变量的导入

执行环境变量的导入（临时生效，在命令行执行下述操作）

export HBASE_HOME=/opt/module/hbase
export HADOOP_HOME=/opt/module/hadoop-3.1.3
export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`

永久生效：在/etc/profile.d/my_env.sh 配置

export HBASE_HOME=/opt/module/hbase
export HADOOP_HOME=/opt/module/hadoop-3.1.3

并在 hadoop-env.sh 中配置：

export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/*

运行官方的 MapReduce 任务

案例一：统计 Student 表中有多少行数据

[moe@hadoop102 hbase]$ /opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar rowcounter student

案例二：使用 MapReduce 将本地数据导入到 HBase

在本地创建一个 tsv 格式的文件：fruit.tsv

1001	Apple	Red
1002	Pear	Yellow
1003	Pineapple	Yellow

创建 Hbase 表

hbase(main):001:0> create 'fruit','info'

在 HDFS 中上传 fruit.tsv 文件

[moe@hadoop102 hbase]$ hadoop dfs -put fruit.tsv /

执行 MapReduce 到 HBase 的 fruit 表中

/opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar importtsv \
-Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:color fruit \
hdfs://hadoop102:8020/fruit.tsv

使用 scan 命令查看导入后的结果
```
hbase(main):002:0> scan 'fruit'
```

1.3.2、自定义 HBase-MapReduce1

目标：将 hdfs上 fruit.tsv 中的数据，通过 MR 迁入到 hbase fruit1 表中。

构建FruitMapper类，用于读取fruit.tsv中的数据

public class FruitMapper extends Mapper<LongWritable, Text, LongWritable, Text> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        context.write(key, value);
    }
}

构建FruitReduce类，用于将读取到的fruit.tsv中的每行数据写入到hbase fruit1表中

public class FruitReducer extends TableReducer<LongWritable, Text, NullWritable> {

    @Override
    protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        for (Text value : values) {
            // 1001    Apple  Red
            String line = value.toString();
            String[] fields = line.split("\t");
            Put put = new Put(Bytes.toBytes(fields[0]));
            put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(fields[1]));
            put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("color"), Bytes.toBytes(fields[2]));
            context.write(NullWritable.get(), put);
        }
    }
}

构建FruitDriver类，用于组装Job任务

public class FruitDriver implements Tool {

    private Configuration conf = null;

    @Override
    public int run(String[] args) throws Exception {
        // 1.获取Job对象
        Job job = Job.getInstance(conf);
        // 2.设置驱动类路径
        job.setJarByClass(FruitDriver.class);
        // 3.设置Mapper&Mapper输出的KV类型
        job.setMapperClass(FruitMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);
        // 4.设置Reduce类
        TableMapReduceUtil.initTableReducerJob(args[1], FruitReducer.class, job);
        // 5.设置输入参数
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        // 6.提交任务
        boolean result = job.waitForCompletion(true);
        return result ? 0 : 1;
    }

    @Override
    public void setConf(Configuration configuration) {
        this.conf = configuration;
    }

    @Override
    public Configuration getConf() {
        return conf;
    }

    public static void main(String[] args) {
        try {
            Configuration conf = new Configuration();
            int run = ToolRunner.run(conf, new FruitDriver(), args);
            System.exit(run);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

主函数中调用运行该 Job 任务

public static void main(String[] args) {
    try {
        Configuration conf = new Configuration();
        int run = ToolRunner.run(conf, new FruitDriver(), args);
        System.exit(run);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

打包运行任务
```
[moe@hadoop102 test]$ yarn jar hbase-1.0-SNAPSHOT.jar com.moe.mr1.FruitDriver /fruit.tsv fruit1
```
提示：运行任务前，如果待数据导入的表不存在，则需要提前创建。

1.3.3、自定义 Hbase-MapReduce2

目标：实现将HBase中fruit1表中的name列中的数据导入到fruit2表中。

构建Fruit2Mapper读取fruit1表中的数据

public class Fruit2Mapper extends TableMapper<ImmutableBytesWritable, Put> {

    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
        // 构建put对象
        Put put = new Put(key.get());
        for (Cell cell : value.rawCells()) {
            if ("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
                put.add(cell);
            }
        }
        context.write(key, put);
    }
}

构建Fruit2Reduce，将数据写入到fruit2中

public class Fruit2Reduce extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {

    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
        for (Put put : values) {
            context.write(NullWritable.get(), put);
        }
    }
}

构建Fruit2Driver，用于组装Job任务

public class Fruit2Driver implements Tool {

    private Configuration conf = null;

    @Override
    public int run(String[] args) throws Exception {
        Job job = Job.getInstance(conf);
        job.setJarByClass(Fruit2Driver.class);
        // args[0]：表1
        TableMapReduceUtil.initTableMapperJob("fruit1",
                new Scan(),
                Fruit2Mapper.class,
                ImmutableBytesWritable.class,
                Put.class, job);
        // args[1]：表2
        TableMapReduceUtil.initTableReducerJob("fruit2", Fruit2Reduce.class, job);
        boolean result = job.waitForCompletion(true);
        return result ? 0 : 1;
    }

    @Override
    public void setConf(Configuration configuration) {
        this.conf = configuration;
    }

    @Override
    public Configuration getConf() {
        return conf;
    }

    public static void main(String[] args) {
        try {
            // Configuration conf = new Configuration(); //使用需要打包扔到集群中进行测试
            // 本地连接HBase，直接进行测试，需要hbase-site.xml配置文件
            Configuration conf = HBaseConfiguration.create();
            int run = ToolRunner.run(conf, new Fruit2Driver(), args);
            System.exit(run);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}