大数据HBase学习之旅第二篇

381 阅读4分钟

「这是我参与11月更文挑战的第33天,活动详情查看:2021最后一次更文挑战」。

一、HBase API

1.1、环境准备

新建项目后在 pom.xml 中添加依赖:

<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-server</artifactId>
    <version>1.3.1</version>
    </dependency>
<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-client</artifactId>
    <version>1.3.1</version>
</dependency>

1.2、HBase API

1.2.1、获取 Configuration 对象

private Configuration conf;

@Before
public void init() {
    //使用 HBaseConfiguration 的单例方法实例化
    conf = HBaseConfiguration.create();
    conf.set("hbase.zookeeper.quorum", "hadoop102,hadoop103,hadoop104");
    conf.set("hbase.zookeeper.property.clientPort", "2181");
}

1.2.2、判断表是否存在

@Test
public void isTableExist() throws Exception {
    System.out.println(isTableExist("student"));
}

private boolean isTableExist(String tableName) throws Exception {
    //在 HBase 中管理、访问表需要先创建 HBaseAdmin 对象
    //Connection connection =
    ConnectionFactory.createConnection(conf);
    //HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
    HBaseAdmin admin = new HBaseAdmin(conf);
    return admin.tableExists(tableName);
}

1.2.3、创建表

@Test
public void createTable() throws Exception {
    createTable("student", "info");
}

private void createTable(String tableName, String... columnFamily) throws Exception {
    HBaseAdmin admin = new HBaseAdmin(conf);
    //判断表是否存在
    if (isTableExist(tableName)) {
        System.out.println("表" + tableName + "已存在");
        //System.exit(0);
    } else {
        //创建表属性对象,表名需要转字节
        HTableDescriptor descriptor = new HTableDescriptor(TableName.valueOf(tableName));
        //创建多个列族
        for (String cf : columnFamily) {
            descriptor.addFamily(new HColumnDescriptor(cf));
        }
        //根据对表的配置,创建表
        admin.createTable(descriptor);
        System.out.println("表" + tableName + "创建成功!");
    }
}

1.2.4、删除表

@Test
public void dropTable() throws Exception {
    dropTable("student");
}

private void dropTable(String tableName) throws Exception {
    HBaseAdmin admin = new HBaseAdmin(conf);
    if (isTableExist(tableName)) {
        admin.disableTable(tableName);
        admin.deleteTable(tableName);
        System.out.println("表" + tableName + "删除成功!");
    } else {
        System.out.println("表" + tableName + "不存在!");
    }
}

1.2.5、向表中插入数据

@Test
public void addRowData() throws Exception {
    addRowData("student", "1001", "info", "name", "moe");
}

private void addRowData(String tableName, String rowKey, String columnFamily, String column, String value) throws Exception {
    //创建 HTable 对象
    HTable hTable = new HTable(conf, tableName);
    //向表中插入数据
    Put put = new Put(Bytes.toBytes(rowKey));
    //向 Put 对象中组装数据
    put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value));
    hTable.put(put);
    hTable.close();
    System.out.println("插入数据成功");
}

1.2.6、获取所有数据

@Test
public void getAllRows() throws Exception {
    getAllRows("student");
}

private void getAllRows(String tableName) throws Exception {
    HTable hTable = new HTable(conf, tableName);
    //得到用于扫描 region 的对象
    Scan scan = new Scan();
    //使用 HTable 得到 resultScanner 实现类的对象
    ResultScanner resultScanner = hTable.getScanner(scan);
    for (Result result : resultScanner) {
        Cell[] cells = result.rawCells();
        for (Cell cell : cells) {
            //得到 rowkey
            System.out.println(" 行 键 :" + Bytes.toString(CellUtil.cloneRow(cell)));
            //得到列族
            System.out.println(" 列 族 " + Bytes.toString(CellUtil.cloneFamily(cell)));
            System.out.println(" 列 :" + Bytes.toString(CellUtil.cloneQualifier(cell)));
            System.out.println(" 值 :" + Bytes.toString(CellUtil.cloneValue(cell)));
        }
    }
}

1.2.7、获取某一行数据

@Test
public void getRow() throws Exception {
    getRow("student", "1001");
}

private void getRow(String tableName, String rowKey) throws Exception {
    HTable table = new HTable(conf, tableName);
    Get get = new Get(Bytes.toBytes(rowKey));
    //get.setMaxVersions();显示所有版本
    //get.setTimeStamp();显示指定时间戳的版本
    Result result = table.get(get);
    for (Cell cell : result.rawCells()) {
        System.out.println(" 行 键 :" + Bytes.toString(result.getRow()));
        System.out.println(" 列 族 " + Bytes.toString(CellUtil.cloneFamily(cell)));
        System.out.println(" 列 :" + Bytes.toString(CellUtil.cloneQualifier(cell)));
        System.out.println(" 值 :" + Bytes.toString(CellUtil.cloneValue(cell)));
        System.out.println("时间戳:" + cell.getTimestamp());
    }
}

1.2.8、获取某一行指定“列族:列”的数据

@Test
public void getRowQualifier() throws Exception {
    getRowQualifier("student", "1001", "info", "name");
}

private void getRowQualifier(String tableName, String rowKey, String family, String qualifier) throws Exception {
    HTable table = new HTable(conf, tableName);
    Get get = new Get(Bytes.toBytes(rowKey));
    get.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
    Result result = table.get(get);
    for (Cell cell : result.rawCells()) {
        System.out.println(" 行 键 :" + Bytes.toString(result.getRow()));
        System.out.println(" 列 族 " + Bytes.toString(CellUtil.cloneFamily(cell)));
        System.out.println(" 列 :" + Bytes.toString(CellUtil.cloneQualifier(cell)));
        System.out.println(" 值 :" + Bytes.toString(CellUtil.cloneValue(cell)));
1.     }
}

1.2.9、删除多行数据

@Test
public void deleteMultiRow() throws Exception {
    deleteMultiRow("student", "1001");
}

private void deleteMultiRow(String tableName, String... rows) throws Exception {
    HTable hTable = new HTable(conf, tableName);
    List<Delete> deleteList = new ArrayList<>();
    for (String row : rows) {
        Delete delete = new Delete(Bytes.toBytes(row));
        deleteList.add(delete);
    }
    hTable.delete(deleteList);
    hTable.close();
}

1.3、MapReduce

通过 HBase 的相关 JavaAPI,我们可以实现伴随 HBase 操作的 MapReduce 过程,比如使用MapReduce 将数据从本地文件系统导入到 HBase 的表中,比如我们从 HBase 中读取一些原始数据后使用 MapReduce 做数据分析。

1.3.1、官方 HBase-MapReduce

  1. 查看 HBase 的 MapReduce 任务的执行

    bin/hbase mapredcp
    
  2. 环境变量的导入

    • 执行环境变量的导入(临时生效,在命令行执行下述操作)

      export HBASE_HOME=/opt/module/hbase
      export HADOOP_HOME=/opt/module/hadoop-3.1.3
      export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`
      
    • 永久生效:在/etc/profile.d/my_env.sh 配置

      export HBASE_HOME=/opt/module/hbase
      export HADOOP_HOME=/opt/module/hadoop-3.1.3
      
    • 并在 hadoop-env.sh 中配置:

      export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/*
      
  3. 运行官方的 MapReduce 任务

    • 案例一:统计 Student 表中有多少行数据

      [moe@hadoop102 hbase]$ /opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar rowcounter student
      

      image.png

    • 案例二:使用 MapReduce 将本地数据导入到 HBase

      • 在本地创建一个 tsv 格式的文件:fruit.tsv

        1001	Apple	Red
        1002	Pear	Yellow
        1003	Pineapple	Yellow
        
      • 创建 Hbase 表

        hbase(main):001:0> create 'fruit','info'
        
      • 在 HDFS 中上传 fruit.tsv 文件

        [moe@hadoop102 hbase]$ hadoop dfs -put fruit.tsv /
        
      • 执行 MapReduce 到 HBase 的 fruit 表中

        /opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar importtsv \
        -Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:color fruit \
        hdfs://hadoop102:8020/fruit.tsv
        
      • 使用 scan 命令查看导入后的结果

        hbase(main):002:0> scan 'fruit'
        

        image.png

1.3.2、自定义 HBase-MapReduce1

目标:将 hdfs上 fruit.tsv 中的数据,通过 MR 迁入到 hbase fruit1 表中。

  1. 构建FruitMapper类,用于读取fruit.tsv中的数据

    public class FruitMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
    
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            context.write(key, value);
        }
    }
    
  2. 构建FruitReduce类,用于将读取到的fruit.tsv中的每行数据写入到hbase fruit1表中

    public class FruitReducer extends TableReducer<LongWritable, Text, NullWritable> {
    
        @Override
        protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            for (Text value : values) {
                // 1001    Apple  Red
                String line = value.toString();
                String[] fields = line.split("\t");
                Put put = new Put(Bytes.toBytes(fields[0]));
                put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(fields[1]));
                put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("color"), Bytes.toBytes(fields[2]));
                context.write(NullWritable.get(), put);
            }
        }
    }
    
  3. 构建FruitDriver类,用于组装Job任务

    public class FruitDriver implements Tool {
    
        private Configuration conf = null;
    
        @Override
        public int run(String[] args) throws Exception {
            // 1.获取Job对象
            Job job = Job.getInstance(conf);
            // 2.设置驱动类路径
            job.setJarByClass(FruitDriver.class);
            // 3.设置Mapper&Mapper输出的KV类型
            job.setMapperClass(FruitMapper.class);
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(Text.class);
            // 4.设置Reduce类
            TableMapReduceUtil.initTableReducerJob(args[1], FruitReducer.class, job);
            // 5.设置输入参数
            FileInputFormat.setInputPaths(job, new Path(args[0]));
            // 6.提交任务
            boolean result = job.waitForCompletion(true);
            return result ? 0 : 1;
        }
    
        @Override
        public void setConf(Configuration configuration) {
            this.conf = configuration;
        }
    
        @Override
        public Configuration getConf() {
            return conf;
        }
    
        public static void main(String[] args) {
            try {
                Configuration conf = new Configuration();
                int run = ToolRunner.run(conf, new FruitDriver(), args);
                System.exit(run);
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    
  4. 主函数中调用运行该 Job 任务

    public static void main(String[] args) {
        try {
            Configuration conf = new Configuration();
            int run = ToolRunner.run(conf, new FruitDriver(), args);
            System.exit(run);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    
  5. 打包运行任务

    [moe@hadoop102 test]$ yarn jar hbase-1.0-SNAPSHOT.jar com.moe.mr1.FruitDriver /fruit.tsv fruit1
    

    提示:运行任务前,如果待数据导入的表不存在,则需要提前创建。

1.3.3、自定义 Hbase-MapReduce2

目标:实现将HBase中fruit1表中的name列中的数据导入到fruit2表中。

  1. 构建Fruit2Mapper读取fruit1表中的数据

    public class Fruit2Mapper extends TableMapper<ImmutableBytesWritable, Put> {
    
        @Override
        protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
            // 构建put对象
            Put put = new Put(key.get());
            for (Cell cell : value.rawCells()) {
                if ("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
                    put.add(cell);
                }
            }
            context.write(key, put);
        }
    }
    
  2. 构建Fruit2Reduce,将数据写入到fruit2中

    public class Fruit2Reduce extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
    
        @Override
        protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
            for (Put put : values) {
                context.write(NullWritable.get(), put);
            }
        }
    }
    
  3. 构建Fruit2Driver,用于组装Job任务

    public class Fruit2Driver implements Tool {
    
        private Configuration conf = null;
    
        @Override
        public int run(String[] args) throws Exception {
            Job job = Job.getInstance(conf);
            job.setJarByClass(Fruit2Driver.class);
            // args[0]:表1
            TableMapReduceUtil.initTableMapperJob("fruit1",
                    new Scan(),
                    Fruit2Mapper.class,
                    ImmutableBytesWritable.class,
                    Put.class, job);
            // args[1]:表2
            TableMapReduceUtil.initTableReducerJob("fruit2", Fruit2Reduce.class, job);
            boolean result = job.waitForCompletion(true);
            return result ? 0 : 1;
        }
    
        @Override
        public void setConf(Configuration configuration) {
            this.conf = configuration;
        }
    
        @Override
        public Configuration getConf() {
            return conf;
        }
    
        public static void main(String[] args) {
            try {
                // Configuration conf = new Configuration(); //使用需要打包扔到集群中进行测试
                // 本地连接HBase,直接进行测试,需要hbase-site.xml配置文件
                Configuration conf = HBaseConfiguration.create();
                int run = ToolRunner.run(conf, new Fruit2Driver(), args);
                System.exit(run);
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    
  4. 主函数中调用运行该 Job 任务

    public static void main(String[] args) {
        try {
            // Configuration conf = new Configuration(); //使用需要打包扔到集群中进行测试
            // 本地连接HBase,直接进行测试,需要hbase-site.xml配置文件
            Configuration conf = HBaseConfiguration.create();
            int run = ToolRunner.run(conf, new Fruit2Driver(), args);
            System.exit(run);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    

    image.png

二、友情链接

大数据HBase学习之旅第一篇