Java API 使用
这里也会测试使用Protobuf存储数据,可以参考"Protobuf简介"中准备protobuf环境
phone.proto
package org.example.hbase;
message PhoneDetail
{
required string toNum = 1;
required string length = 2;
required string type = 3;
}
# 将这里生成的Phone.java拷贝到下面的maven项目中
protoc phone.proto --java_out=./
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example.hbase</groupId>
<artifactId>hbase-test</artifactId>
<version>1.0.0</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.5.8</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13</version>
</dependency>
</dependencies>
<build>
<finalName>${project.name}</finalName>
<resources>
<resource>
<directory>src/main/resources</directory>
<filtering>true</filtering>
</resource>
</resources>
<plugins>
<!--代码编译指定版本插件-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<target>${maven.compiler.target}</target>
<source>${maven.compiler.source}</source>
<encoding>UTF-8</encoding>
<skip>true</skip>
</configuration>
</plugin>
</plugins>
</build>
</project>
package org.example.hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
// https://hbase.apache.org/book.html 官网中 Apache HBase APIs
public class HBaseDemo {
Configuration conf = null;
Connection conn = null;
// 表的管理对象
Admin admin = null;
Table table;
TableName tableName = TableName.valueOf("phone");
@Before
public void init() throws IOException {
// 创建配置文件对象
conf = HBaseConfiguration.create();
// 加载zookeeper的配置
conf.set("hbase.zookeeper.quorum", "node02,node03,node04");
// 获取连接
conn = ConnectionFactory.createConnection(conf);
// 获取对象
admin = conn.getAdmin();
// 获取数据操作对象
table = conn.getTable(tableName);
}
// 创建表
@Test
public void createTable() throws IOException {
// 定义表描述对象
TableDescriptorBuilder tableDescriptorBuilder = TableDescriptorBuilder.newBuilder(tableName);
// 定义列族描述对象
ColumnFamilyDescriptorBuilder cfBuilder1 = ColumnFamilyDescriptorBuilder.newBuilder("cf1".getBytes());
ColumnFamilyDescriptorBuilder cfBuilder2 = ColumnFamilyDescriptorBuilder.newBuilder("cf2".getBytes());
// 添加列族信息给表
tableDescriptorBuilder.setColumnFamilies(Arrays.asList(cfBuilder1.build(), cfBuilder2.build()));
if (admin.tableExists(tableName)) {
// 禁用表,然后删除表
admin.disableTable(tableName);
admin.deleteTable(tableName);
}
// 创建表
admin.createTable(tableDescriptorBuilder.build());
}
// 插入数据
@Test
public void put() throws IOException {
// 指定row
Put put = new Put(Bytes.toBytes("1"));
byte[] cf1Bytes = Bytes.toBytes("cf1");
// addColumn参数分别是 列族、列名、值
put.addColumn(cf1Bytes, Bytes.toBytes("name"), Bytes.toBytes("lisi"));
put.addColumn(cf1Bytes, Bytes.toBytes("age"), Bytes.toBytes("20"));
put.addColumn(cf1Bytes, Bytes.toBytes("sex"), Bytes.toBytes("man"));
table.put(put);
}
// 查询一条数据
@Test
public void get() throws IOException {
// 指定row
Get get = new Get(Bytes.toBytes("1"));
byte[] cf1Bytes = Bytes.toBytes("cf1");
byte[] nameBytes = Bytes.toBytes("name");
byte[] ageBytes = Bytes.toBytes("age");
byte[] sexBytes = Bytes.toBytes("sex");
// 在服务端做数据过滤,挑选出符合需求的列(hbase中一行存储的列非常多,可能会达到百万级别)
get.addColumn(cf1Bytes, nameBytes);
get.addColumn(cf1Bytes, ageBytes);
get.addColumn(cf1Bytes, sexBytes);
Result result = table.get(get);
Cell cell1 = result.getColumnLatestCell(cf1Bytes, nameBytes);
Cell cell2 = result.getColumnLatestCell(cf1Bytes, ageBytes);
Cell cell3 = result.getColumnLatestCell(cf1Bytes, sexBytes);
String nameStr = Bytes.toString(CellUtil.cloneValue(cell1));
String ageStr = Bytes.toString(CellUtil.cloneValue(cell2));
String sexStr = Bytes.toString(CellUtil.cloneValue(cell3));
Assert.assertEquals(nameStr, "lisi");
Assert.assertEquals(ageStr, "20");
Assert.assertEquals(sexStr, "man");
}
// 查询全表数据
@Test
public void scan() throws IOException {
Scan scan = new Scan();
// 可以指定开始和结束的RowKey,即查询指定范围的数据
// scan.withStartRow();
// scan.withStopRow();
ResultScanner resultScanner = table.getScanner(scan);
byte[] cf1Bytes = Bytes.toBytes("cf1");
for (Result rs : resultScanner) {
Cell cell1 = rs.getColumnLatestCell(cf1Bytes, Bytes.toBytes("name"));
Cell cell2 = rs.getColumnLatestCell(cf1Bytes, Bytes.toBytes("age"));
Cell cell3 = rs.getColumnLatestCell(cf1Bytes, Bytes.toBytes("sex"));
String name = Bytes.toString(CellUtil.cloneValue(cell1));
String age = Bytes.toString(CellUtil.cloneValue(cell2));
String sex = Bytes.toString(CellUtil.cloneValue(cell3));
// 这里测试的只有这一条数据
Assert.assertEquals(name, "lisi");
Assert.assertEquals(age, "20");
Assert.assertEquals(sex, "man");
}
}
Random random = new Random();
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
String onePhoneNumber = "15883348450";
/**
* 假设有通话记录数据是:手机号 对方号码 通话时间 通话时长 类型(主叫/被叫)
* 需求是:需要经常查询某一个人手机号在指定时间范围的通话记录
* hbase中数据是按照RowKey排序的,因此数据存储到hbase中的关键是如何设计RowKey(必须对查询需求特别了解,才能设计出好的rowKey)
* 这里的RowKey,第一反应可能有这些设计:手机号+通话时间、通话时间+手机号、手机号+通话时间+对方号码、手机号+对方号码+通话时间
* 这里的需求是查询手机号在指定时间范围的通话记录,因此应该把一个手机号的通讯记录存储在一起且按照时间倒序,这样查询数据时就可以顺序读取,且最新的数据在最前面
* 因此这里采用"手机号+通话时间"作为RowKey,为了保证最新的数据在最前面,通话时间需要倒序排序,可以使用"Long.MAX_VALUE-时间戳",
* 手机号前几位是有规律的,为了防止数据倾斜,可以将手机号通过哈希算法给打散,这里直接将手机号反转
* RowKey已经确定好,剩下的列分别是:对方号码 通话时长 类型,通话时间在row中有可以不用作为列存储
*/
@Test
public void putListData() throws Exception {
// 假设有10个用户,每个用户一年产生10000条记录
List<Put> puts = new ArrayList<>();
byte[] cf1Bytes = Bytes.toBytes("cf1");
byte[] toNumBytes = Bytes.toBytes("toNum");
byte[] lengthBytes = Bytes.toBytes("length");
byte[] typeBytes = Bytes.toBytes("type");
for (int i = 0; i < 10; i++) {
String phoneNumber;
if (i == 5) {
// 插入一个固定的手机号,方便下面查询
phoneNumber = onePhoneNumber;
} else {
phoneNumber = getNumber("158");
}
for (int j = 0; j < 10000; j++) {
// 对方号码
String toNum = getNumber("177");
// 通话时长
String length = String.valueOf(random.nextInt(100));
// 类型(主叫/被叫)
String type = String.valueOf(random.nextInt(2));
// 通话时间
String date = getDate("2023");
// row: 手机号反转_(Long.MAX_VALUE-时间戳)
String row = reverse(phoneNumber) + "_" + (Long.MAX_VALUE - sdf.parse(date).getTime());
Put put = new Put(Bytes.toBytes(row));
put.addColumn(cf1Bytes, toNumBytes, Bytes.toBytes(toNum));
put.addColumn(cf1Bytes, lengthBytes, Bytes.toBytes(length));
put.addColumn(cf1Bytes, typeBytes, Bytes.toBytes(type));
puts.add(put);
}
}
table.put(puts);
}
private String getDate(String str) {
return str + String.format("%02d%02d%02d%02d%02d", random.nextInt(12) + 1, random.nextInt(31),
random.nextInt(24), random.nextInt(60), random.nextInt(60));
}
private String getNumber(String str) {
return str + String.format("%08d", random.nextInt(99999999));
}
private String reverse(String str) {
if (str == null || str.length() <= 1) {
return str;
}
char[] chars = str.toCharArray();
int arrLen = chars.length;
char[] res = new char[arrLen];
for (int i = 0; i < arrLen; i++) {
res[arrLen - 1 - i] = chars[i];
}
return new String(res);
}
/**
* 查询某一个手机号3月份的通话记录
* 在hbase shell中可以使用 count 'phone' 查看一些row范围内的数量
*/
@Test
public void scanByCondition() throws Exception {
Scan scan = new Scan();
String startRow = reverse(onePhoneNumber) + "_" + (Long.MAX_VALUE - sdf.parse("20230331000000").getTime());
String stopRow = reverse(onePhoneNumber) + "_" + (Long.MAX_VALUE - sdf.parse("20230301000000").getTime());
scan.withStartRow(Bytes.toBytes(startRow));
scan.withStopRow(Bytes.toBytes(stopRow));
ResultScanner tableScanner = table.getScanner(scan);
byte[] cf1Bytes = Bytes.toBytes("cf1");
byte[] toNumBytes = Bytes.toBytes("toNum");
byte[] lengthBytes = Bytes.toBytes("length");
byte[] typeBytes = Bytes.toBytes("type");
for (Result rs : tableScanner) {
System.out.print(Bytes.toString(CellUtil.cloneValue(rs.getColumnLatestCell(cf1Bytes, toNumBytes))));
System.out.print("--" + Bytes.toString(CellUtil.cloneValue(rs.getColumnLatestCell(cf1Bytes, lengthBytes))));
System.out.print("--" + Bytes.toString(CellUtil.cloneValue(rs.getColumnLatestCell(cf1Bytes, typeBytes))));
String row = Bytes.toString(rs.getRow());
long timestamp = Long.MAX_VALUE - Long.parseLong(row.split("_")[1]);
System.out.println("--" + sdf.format(timestamp));
}
}
/**
* 查询某个手机号所有的主叫电话(type=0)
* 有两个条件要同时满足:手机号,在row中,对row进行过滤;type是列,对列进行过滤
*/
@Test
public void getType() throws IOException {
byte[] cf1Bytes = Bytes.toBytes("cf1");
byte[] toNumBytes = Bytes.toBytes("toNum");
byte[] lengthBytes = Bytes.toBytes("length");
byte[] typeBytes = Bytes.toBytes("type");
Scan scan = new Scan();
// 创建过滤器集合,过滤条件是且的关系
FilterList filters = new FilterList(FilterList.Operator.MUST_PASS_ALL);
// 前缀过滤器,用于对row进行过滤,这里过滤指定的反转手机号数据
PrefixFilter filter1 = new PrefixFilter(Bytes.toBytes(reverse(onePhoneNumber)));
filters.addFilter(filter1);
// 单列值过滤器,用于对列进行过滤,这里过滤type=0的数据
SingleColumnValueFilter filter2 = new SingleColumnValueFilter(cf1Bytes, typeBytes, CompareOperator.EQUAL, Bytes.toBytes("0"));
filters.addFilter(filter2);
scan.setFilter(filters);
ResultScanner tableScanner = table.getScanner(scan);
for (Result rs : tableScanner) {
System.out.print(Bytes.toString(CellUtil.cloneValue(rs.getColumnLatestCell(cf1Bytes, toNumBytes))));
System.out.print("--" + Bytes.toString(CellUtil.cloneValue(rs.getColumnLatestCell(cf1Bytes, lengthBytes))));
System.out.print("--" + Bytes.toString(CellUtil.cloneValue(rs.getColumnLatestCell(cf1Bytes, typeBytes))));
String row = Bytes.toString(rs.getRow());
long timestamp = Long.MAX_VALUE - Long.parseLong(row.split("_")[1]);
System.out.println("--" + sdf.format(timestamp));
}
}
// 删除数据
@Test
public void delete() throws IOException {
Delete delete = new Delete("1".getBytes());
table.delete(delete);
}
@Test
public void putListByProtoBuf() throws ParseException, IOException {
List<Put> puts = new ArrayList<>();
byte[] cf2Bytes = Bytes.toBytes("cf2");
byte[] phoneBytes = Bytes.toBytes("phone");
for (int i = 0; i < 10; i++) {
String phoneNumber;
if (i == 5) {
// 插入一个固定的手机号,方便下面查询
phoneNumber = onePhoneNumber;
} else {
phoneNumber = getNumber("158");
}
for (int j = 0; j < 10000; j++) {
// 对方号码
String toNum = getNumber("177");
// 通话时长
String length = String.valueOf(random.nextInt(100));
// 类型(主叫/被叫)
String type = String.valueOf(random.nextInt(2));
// 通话时间
String date = getDate("2023");
Phone.PhoneDetail.Builder builder = Phone.PhoneDetail.newBuilder();
builder.setToNum(toNum);
builder.setLength(length);
builder.setType(type);
// row: 手机号反转_(Long.MAX_VALUE-时间戳)
String row = reverse(phoneNumber) + "_" + (Long.MAX_VALUE - sdf.parse(date).getTime());
Put put = new Put(Bytes.toBytes(row));
put.addColumn(cf2Bytes, phoneBytes, builder.build().toByteArray());
puts.add(put);
}
}
table.put(puts);
}
/**
* 查询某一个手机号3月份的通话记录
*/
@Test
public void scanByConditionOfProtoBuf() throws Exception {
byte[] cf2Bytes = Bytes.toBytes("cf2");
byte[] phoneBytes = Bytes.toBytes("phone");
Scan scan = new Scan();
String startRow = reverse(onePhoneNumber) + "_" + (Long.MAX_VALUE - sdf.parse("20230331000000").getTime());
String stopRow = reverse(onePhoneNumber) + "_" + (Long.MAX_VALUE - sdf.parse("20230301000000").getTime());
scan.withStartRow(Bytes.toBytes(startRow));
scan.withStopRow(Bytes.toBytes(stopRow));
// 读取指定的cf2列族
scan.setFilter(new FamilyFilter(CompareOperator.EQUAL, new BinaryComponentComparator(cf2Bytes, 0)));
ResultScanner tableScanner = table.getScanner(scan);
for (Result rs : tableScanner) {
byte[] protobufBytes = CellUtil.cloneValue(rs.getColumnLatestCell(cf2Bytes, phoneBytes));
Phone.PhoneDetail phoneDetail = Phone.PhoneDetail.parseFrom(protobufBytes);
System.out.print(phoneDetail.getToNum());
System.out.print("--" + phoneDetail.getLength());
System.out.print("--" + phoneDetail.getType());
String row = Bytes.toString(rs.getRow());
long timestamp = Long.MAX_VALUE - Long.parseLong(row.split("_")[1]);
System.out.println("--" + sdf.format(timestamp));
}
}
@After
public void destroy() {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
admin.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
conn.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
hbase中存储的是'K: 1/cf1:age/1712933324542/Put/vlen=2/seqid=4 V: 20'这样的键值对数据,当值(列存储的值)很小时,此时键存储占用的空间可能是值的好几倍,而且键中有比较多的数据冗余(记录的row、列族、列等可能都是一样的),此时可以考虑将一行数据作为一个对象存储在一个列中,这样可以大大减少键冗余的数据(实际就是减少了键的存储),并且对象可以使用像protobuf这样的技术存储,可以进一步减少数据占用的空间并且速度也很快。但是这也有弊端,多个数据存储在一个列中了,就不能像上面那样简单地对某个字段进行过滤了(例如上面的对type进行过滤)。实际还是可以使用自定义过滤器来实现,但是自定义过滤器可能不能有效地利用索引,执行效率慢,而且当对象比较大时,只读取其中的一小部分数据,也会导致执行效率变慢
# 在 hbase shell 中执行 flush 'phone' 将数据刷新hdfs中
# 查看hdfs中数据文件,这里使用protobuf存储之后,数据占用的空间不到之前的一半
hdfs dfs -ls -R -h /hbase/data/default/phone/*/cf*
-rw-r--r-- 3 bigdata supergroup 18.6 M 2024-04-12 19:44 /hbase/data/default/phone/1ca11091c461f35690cc95ff1e7ee39f/cf1/688be5a7149f45e2beded2ea856d1eb4
-rw-r--r-- 3 bigdata supergroup 7.8 M 2024-04-12 19:44 /hbase/data/default/phone/1ca11091c461f35690cc95ff1e7ee39f/cf2/172558e5094b4bbea220eb1d1ba79831
下面给出一个作为DAO层使用的案例
package org.example.hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.ColumnValueFilter;
import org.apache.hadoop.hbase.filter.FilterBase;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class HBaseDAOImpl {
static Configuration conf = null;
Connection conn;
public HBaseDAOImpl() {
conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "node02,node03,node04");
try {
conn = ConnectionFactory.createConnection(conf);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
// 创建表
public void createTable(String tableName, String[] columnFamilies) {
try (Admin admin = conn.getAdmin()) {
TableName tn = TableName.valueOf(tableName);
if (admin.tableExists(tn)) {
System.out.println("此表,已存在,先删除表");
admin.disableTable(tn);
admin.deleteTable(tn);
}
TableDescriptorBuilder tableDesc = TableDescriptorBuilder.newBuilder(tn);
for (String columnFamily : columnFamilies) {
tableDesc.setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(columnFamily.getBytes()).build());
}
admin.createTable(tableDesc.build());
System.out.println("建表成功!");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// 插入单个put
public void save(Put put, String tableName) {
try (Table table = conn.getTable(TableName.valueOf(tableName))) {
table.put(put);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// 插入put集合
public void saveList(List<Put> puts, String tableName) {
try (Table table = conn.getTable(TableName.valueOf(tableName))) {
table.put(puts);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// 插入一个cell
public void insertCell(String tableName, String rowKey, String family,
String qualifier, String value) {
try (Table table = conn.getTable(TableName.valueOf(tableName))) {
Put put = new Put(rowKey.getBytes());
put.addColumn(family.getBytes(), qualifier.getBytes(), value.getBytes());
table.put(put);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// 在一个列族下插入多个cell
public void insertCells(String tableName, String rowKey, String family, String[] qualifier, String[] value) {
try (Table table = conn.getTable(TableName.valueOf(tableName))) {
Put put = new Put(rowKey.getBytes());
// 批量添加
for (int i = 0; i < qualifier.length; i++) {
String col = qualifier[i];
String val = value[i];
put.addColumn(family.getBytes(), col.getBytes(), val.getBytes());
}
table.put(put);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// 根据rowKey获取表中的一行数据
public Result getOneRow(String tableName, String rowKey) {
try (Table table = conn.getTable(TableName.valueOf(tableName))) {
Get get = new Get(rowKey.getBytes());
return table.get(get);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// 最常用的方法,优化查询;查询一行数据
public Result getOneRowAndMultiColumn(String tableName, String rowKey, String family, String[] cols) {
try (Table table = conn.getTable(TableName.valueOf(tableName))) {
Get get = new Get(rowKey.getBytes());
byte[] familyBytes = family.getBytes();
for (String col : cols) {
get.addColumn(familyBytes, col.getBytes());
}
return table.get(get);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// 根据rowKey进行前缀匹配,并过滤出指定列的值
public List<Result> getRows(String tableName, String rowKeyLike, String family, String qualifier, String value) {
try (Table table = conn.getTable(TableName.valueOf(tableName))) {
FilterList fl = new FilterList(FilterList.Operator.MUST_PASS_ALL);
FilterBase filter1 = new PrefixFilter(rowKeyLike.getBytes());
// ColumnValueFilter 会过滤出指定列值的行,SingleColumnValueFilter 会过滤出指定列值的行和该列没有值的行
FilterBase filter2 = new ColumnValueFilter(
family.getBytes(), qualifier.getBytes(), CompareOperator.EQUAL, value.getBytes()
);
fl.addFilter(filter1);
fl.addFilter(filter2);
Scan scan = new Scan();
scan.setFilter(fl);
ResultScanner scanner = table.getScanner(scan);
List<Result> list = new ArrayList<>();
for (Result rs : scanner) {
list.add(rs);
}
return list;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// 根据前缀匹配rowKey且获取部分列数据
public List<Result> getRows(String tableName, String rowKeyLike, String family, String[] cols) {
try (Table table = conn.getTable(TableName.valueOf(tableName))) {
PrefixFilter filter = new PrefixFilter(rowKeyLike.getBytes());
Scan scan = new Scan();
byte[] familyBytes = family.getBytes();
for (String col : cols) {
scan.addColumn(familyBytes, col.getBytes());
}
scan.setFilter(filter);
ResultScanner scanner = table.getScanner(scan);
List<Result> list = new ArrayList<>();
for (Result rs : scanner) {
list.add(rs);
}
return list;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// 范围查询
public List<Result> getRows(String tableName, String startRow, String stopRow) {
try (Table table = conn.getTable(TableName.valueOf(tableName))) {
Scan scan = new Scan();
scan.withStartRow(startRow.getBytes());
scan.withStopRow(stopRow.getBytes());
ResultScanner scanner = table.getScanner(scan);
List<Result> list = new ArrayList<>();
for (Result rs : scanner) {
list.add(rs);
}
return list;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// 查询表中所有行
public void scanner(String tableName) {
try (Table table = conn.getTable(TableName.valueOf(tableName))) {
Scan scan = new Scan();
ResultScanner scanner = table.getScanner(scan);
for (Result rs : scanner) {
printResult(rs);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// 筛选表的部分列
public void scannerByColumn(String tableName, String family, String[] cols) {
try (Table table = conn.getTable(TableName.valueOf(tableName))) {
Scan s = new Scan();
byte[] familyBytes = family.getBytes();
for (String col : cols) {
s.addColumn(familyBytes, col.getBytes());
}
ResultScanner scanner = table.getScanner(s);
for (Result rs : scanner) {
printResult(rs);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// 删除cell
public void deleteCell(String tableName, String rowKey, String family, String col) {
try (Table table = conn.getTable(TableName.valueOf(tableName))) {
Delete del = new Delete(rowKey.getBytes());
del.addColumn(family.getBytes(), col.getBytes());
table.delete(del);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// 根据rowKey删除数据
public void deleteRecords(String tableName, String rowKeyLike) {
try (Table table = conn.getTable(TableName.valueOf(tableName))) {
PrefixFilter filter = new PrefixFilter(rowKeyLike.getBytes());
Scan scan = new Scan();
scan.setFilter(filter);
ResultScanner scanner = table.getScanner(scan);
List<Delete> list = new ArrayList<>();
for (Result rs : scanner) {
Delete del = new Delete(rs.getRow());
list.add(del);
}
table.delete(list);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// 删除一个表
public void deleteTable(String tableName) {
try (Admin admin = conn.getAdmin()) {
TableName tn = TableName.valueOf(tableName);
if (admin.tableExists(tn)) {
// 禁用表
admin.disableTable(tn);
// 删除表
admin.deleteTable(tn);
System.out.println("删除表成功!");
} else {
System.err.println("删除的表不存在!");
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static void printResult(Result rs) {
for (Cell cell : rs.rawCells()) {
System.out.println("row:" + new String(CellUtil.cloneRow(cell)));
System.out.println("timestamp:" + cell.getTimestamp());
System.out.println("family:" + new String(CellUtil.cloneFamily(cell)));
System.out.println("rowName:" + new String(CellUtil.cloneQualifier(cell)));
System.out.println("value:" + new String(CellUtil.cloneValue(cell)));
}
}
public static void printResults(List<Result> results) {
for (Result rs : results) {
printResult(rs);
}
}
public static void main(String[] args) {
HBaseDAOImpl dao = new HBaseDAOImpl();
String tableName = "dao_test";
String family1 = "cf1";
String name1Col = "name1";
String name2Col = "name2";
String family2 = "cf2";
String ageCol = "age";
String phoneCol = "phone";
// 创建表
dao.createTable(tableName, new String[]{family1, family2});
// 插入单个put
byte[] firstRowBytes = Bytes.toBytes("1");
Put firstPut = new Put(firstRowBytes);
firstPut.addColumn(family1.getBytes(), name1Col.getBytes(), "zhang".getBytes());
firstPut.addColumn(family1.getBytes(), name2Col.getBytes(), "san".getBytes());
dao.save(firstPut, tableName);
// 插入put集合
List<Put> putList = new ArrayList<>();
for (int i = 2; i <= 100; i++) {
Put put = new Put(String.valueOf(i).getBytes());
int remain = i % 10;
put.addColumn(family1.getBytes(), name1Col.getBytes(), ("zhang" + remain).getBytes());
put.addColumn(family1.getBytes(), name2Col.getBytes(), ("san" + remain).getBytes());
putList.add(put);
}
dao.saveList(putList, tableName);
// 插入一个cell
dao.insertCell(tableName, "1", family2, ageCol, "18");
// 在一个列族下插入多个cell
dao.insertCells(tableName, "2", family2, new String[]{ageCol, phoneCol}, new String[]{"20", "15565430987"});
// 根据rowKey获取表中的一行数据
Result oneRow = dao.getOneRow(tableName, "1");
printResult(oneRow);
// 根据rowKey获取表中的一行数据,并过滤出指定的列
Result oneRowAndMultiColumn = dao.getOneRowAndMultiColumn(tableName, "1", family1, new String[]{name1Col});
printResult(oneRowAndMultiColumn);
// 根据rowKey进行前缀匹配,并过滤出指定列的值
List<Result> rows1 = dao.getRows(tableName, "1", family2, ageCol, "18");
printResults(rows1);
// 根据前缀匹配rowKey且获取部分列数据
List<Result> rows2 = dao.getRows(tableName, "1", family1, new String[]{name2Col});
printResults(rows2);
// 范围查询
List<Result> rows3 = dao.getRows(tableName, "1", "2");
printResults(rows3);
// 查询表中所有行
dao.scanner(tableName);
// 筛选表的部分列
dao.scannerByColumn(tableName, family2, new String[]{ageCol});
// 删除cell
dao.deleteCell(tableName, "1", family1, name1Col);
// 根据rowKey删除数据
dao.deleteRecords(tableName, "1");
// 删除一个表
dao.deleteTable(tableName);
}
}