xgraph-hbase
1.写点
#####1. 插入点数据
{
"label": "person",
"properties": {
"name": "huangxiaohu",
"city": "chengde",
"age": 29
}
}
2. 写处理通用数据整形
//数据整形
//1.是数字类型进入下面的writeNumber 16+id
//2.是字符类型则进行处理,先写入字符byte(len-1|0x80),在写入bytes数组
public BytesBuffer writeId(Id id, boolean big) {
boolean number = id.number();
if (number) { //数字类型
long value = id.asLong();
this.writeNumber(value);
} else { //字符类型
byte[] bytes = id.asBytes();
int len = bytes.length; // 1:huangxiaohu
E.checkArgument(len > 0, "Can't write empty id");
if (!big) {
E.checkArgument();
len -= 1; // mapping [1, 128] to [0, 127]
this.writeUInt8(len|0x80);//byte(len-1|0x80)= 0x80=128将与操作后的结果打入buffer
}
this.write(bytes); //写原始数据 可以看到是先写了len处理后的打入byte,在写字符数组
}
return this;
}
//对于传入的数字,数字大于0则先打入writeUInt8处理的数字,在写实际传入的数字
private void writeNumber(long val) {
int positive = val >= 0 ? 0x10 : 0x00; // 0x10=16 大于0则为16 否则为0
if (Byte.MIN_VALUE <= val && val <= Byte.MAX_VALUE) {
this.writeUInt8(0x00 | positive); //写16 标示后面写入的id的数值
this.write((byte) val); //写1
} else if (Short.MIN_VALUE <= val && val <= Short.MAX_VALUE) {
this.writeUInt8(0x20 | positive);
this.writeShort((short) val);
} else if (Integer.MIN_VALUE <= val && val <= Integer.MAX_VALUE) {
this.writeUInt8(0x40 | positive);
this.writeInt((int) val);
} else {
E.checkArgument(ID_MIN < val && val < ID_MAX,
"Id value must be in [%s, %s], but got %s",
ID_MIN, ID_MAX, val);
this.writeLong((val & ID_MASK) | ((0x60L | positive) << 56));
}
}
//3.先写入数据长度 再写入数据
public BytesBuffer writeBytes(byte[] bytes) {
E.checkArgument(bytes.length <= UINT16_MAX,
"The max length of bytes is %s, got %s",
UINT16_MAX, bytes.length);
require(SHORT_LEN + bytes.length);
this.writeUInt16(bytes.length); //先写入数据长度
this.write(bytes); //再写入数据
return this;
}
3.代码组织
1. 插入Hbase 点表
\x851:josh column=f:f\xC8, timestamp=1595584426480, value=\x10\x01
\x851:josh column=f:g\x10\x01, timestamp=1595584426480, value=\x01jos\xE8
\x851:josh column=f:g\x10\x02, timestamp=1595584426480, value=,
\x851:josh column=f:g\x10\x03, timestamp=1595584426480, value=\x01benx\xE9
\x852:test column=f:f\xC8, timestamp=1595584285412, value=\x10\x02
\x852:test column=f:g\x10\x01, timestamp=1595584285412, value=\x01tes\xF4
\x852:test column=f:g\x10\x04, timestamp=1595584285412, value=\x01\x01\x03\x01\x82c
\x852:test column=f:g\x10\x06, timestamp=1595584285412, value=\x01\x01\x02,
\x8C1:huangxiaohu column=f:f\xC8, timestamp=1595816279685, value=\x10\x01
\x8C1:huangxiaohu column=f:g\x10\x01, timestamp=1595816279685, value=\x01huangxiaoh\xF5
\x8C1:huangxiaohu column=f:g\x10\x02, timestamp=1595816279685, value=:
\x8C1:huangxiaohu column=f:g\x10\x03, timestamp=1595816279685, value=\x01chengd\xE5
1.rowkey : \x8C1:huangxiaohu
-
组成:前缀码+ vertexlabel+name
-
前缀码是根据 vertexlabel+name的长度进行编码取得,相同长度编码相同,在hbase前缀也相同,查找方便
-
例:id= 1:huangxiaohu :
writeId()
(如上函数对变量整形)- 写rowkey前缀: 适用全部rowkey前缀编码方式
例:ID=1:haungxiaohu len("1:haungxiaohu")=13 len-=1 // mapping [1, 128] to [0, 127] byte(len | 0x80) = -116 //0x80=128 将与操作后的结果打入buffer
- vertexlabel+name值转换为数组 : 写入原始数据
bytes[49(1), 58(:), 104(h), 117(u), 97(a), 110(n), 103(g), 120(x), 105(i), 97(a), 111(o), 104(h), 117(u)]
- 组合成完整字节数组:-116是由前缀进行与操作后转byte打入byte数组,因此无法解析
hbase显示: \x8C1:huangxiaohu 实际字节数组:bytes[-116,49(1), 58(:), 104(h), 117(u), 97(a), 110(n), 103(g), 120(x), 105(i), 97(a), 111(o), 104(h), 117(u)]
-
2. column
-
第一行
column=f:f\xC8, value=\x10\x01
,重点关注column的后缀以及valueprotected BackendColumn formatLabel(HugeElement elem) { BackendColumn col = new BackendColumn(); col.name = this.formatSyspropName(elem.id(), HugeKeys.LABEL); //f+byte(LABEL=200)=-56 Id label = elem.schemaLabel().id(); //1 BytesBuffer buffer = BytesBuffer.allocate(label.length() + 1); col.value = buffer.writeId(label).bytes(); //写vertexlabelid 此时为1,number类型,16 1 return col;}
col: name: 102 -56 -> f\xC8 value: 16 1 -> \x10\x01
-
其余行key:
column=f:g\x10\x01 column=f:g\x10\x02 column=f:g\x10\x03
g\x10\x01---->PROPERTYFlag(图中写死代码103) + writeId(0x10 + propertyKeyid ) column: 103 16 1 -> g\x10\x01 103 16 2 -> g\x10\x02 103 16 3 -> g\x10\x03
protected byte[] formatPropertyName(HugeProperty<?> prop) {
Id id = prop.element().id();
Id pkeyId = prop.propertyKey().id();
BytesBuffer buffer = BytesBuffer.allocate(idLen + 2 + pkeyId.length());
buffer.write(prop.type().code()); // HugeType.PROPERTY=103 ->g
buffer.writeId(pkeyId); //0x10+propertyid->16 1
return buffer.bytes();
}
- 其余行value:
value:\x01huangxiaoh\xF5
最后一位。进行操作 与 byte.
1:huangxiaohu-> (u)byte|0x80 ->1:huangxiaoh\xF5
public BackendEntry writeVertex(HugeVertex vertex) {
BinaryBackendEntry entry = newBackendEntry(vertex);
if (vertex.removed()) {
return entry;
}
// Write vertex label
entry.column(this.formatLabel(vertex));
// Write all properties of a Vertex
for (HugeProperty<?> prop : vertex.getProperties().values()) {
entry.column(this.formatProperty(prop));
}
return entry;
}
protected BackendColumn formatProperty(HugeProperty<?> prop) {
return BackendColumn.of(this.formatPropertyName(prop),KryoUtil.toKryo(prop.value()));}
3. 插入hbase 边表
插入数据
{
"label": "knows",
"outV": "1:jin",
"inV": "1:test",
"outVLabel": "person",
"inVLabel": "person",
"properties": {
"date": "2017-5-18"
}
}
ROW COLUMN+CELL
\x841:tom\x8C\x10\x01\x00\x00\x841:jin column=f:, timestamp=1595584285508, value=\x00\x00\x00\x01\x10\x05\x00\x09\x012019010\xB4
(s1:tom>2>>s1:jin)
\x851:josh\x82\x10\x03\x00\x092017-5-18\x871:Eoobao column=f:, timestamp=1596021449724, value=\x00\x00\x00\x01\x10\x05\x00\x0A\x012017-5-1\xB8
(S1:josh>3>2017-11-18>S1:Eoobao)
1. rowkey \x841:tom\x8C\x10\x01\x00\x00\x841:jin
rowkey: byte: -124 49 58 116 111 109 -116 16 1 0 0 -124 49 58 106 105 110
源点id (仍然使用writeid)(byte)((len(1:tom)-1)|0x80) + byte(1:tom) = -124 49 58 116 111 109
+边方向 byte(140(出边编码))=-116
+边label 16 1
+sortkeys (有的话) 0 若使用sortkey:添加相同边时候,不会覆盖相同边,使用指定的sortkey来唯一标识数据
+目标顶点 (同源) -124 49 58 106 105 110
//ID S1:josh>3>2017-11-18>S1:Eoobao
//0 0 0 1 16 dataid=5 0 10(length) 1(pro.size=1)
//50 48 49 55 45 53 45 49 -72 (2017-5-18)
//ID s1:tom>2>>s1:jin 的接口实现
public String asString() {
if (this.cache != null) {
return this.cache;
}{
this.cache = SplicingIdGenerator.concat(
IdUtil.writeString(this.sourceVertexId()), //S1:josh
this.edgeLabelId.asString(), //3
this.sortValues, //(2017-11-18)
IdUtil.writeString(this.targetVertexId())); // S1:Eoobao
}
return this.cache;
}
public HugeEdge addEdge(String label, Vertex vertex, Object... keyValues)
//......
// Attach edge to vertex
this.addOutEdge(edge); //出边
targetVertex.addInEdge(edge.switchOwner()); //switchOwner将出边边入边,入变出,对应边表反向存储
return this.tx().addEdge(edge);//rowkey[S1:josh>3>2017-11-18>S1:Eoobao] properties[1:josh-knowsByDate->1:Eoobao]
}
protected byte[] formatEdgeName(HugeEdge edge) {
// owner-vertex + dir + edge-label + sort-values + other-vertex 作为rowkey
BytesBuffer buffer = BytesBuffer.allocate(256);
buffer.writeId(edge.ownerVertex().id()); //当前边 仍然使用 writeId函数进行处理
buffer.write(edge.type().code()); //edge_in or edge_out 边方向
buffer.writeId(edge.schemaLabel().id()); //边label
buffer.writeString(edge.name()); // TODO: write if need sortValues()
buffer.writeId(edge.otherVertex().id()); //另一条边
return buffer.bytes();
}
4. 索引表(点+边)
ROW COLUMN+CELL
\x83-1:1\x80\x041:jin column=f:, timestamp=1595584285457, value=
\x83-1:1\x80\x041:tom column=f:, timestamp=1595584285457, value=
\x83-2:2\x80\x10S1:jin>2>>S2:test column=f:, timestamp=1595584285457, value=
1. Rowkey:
前缀+filedid+indexlabelid+elementid
S1:josh>3>2017-11-8>S1:Eoobao
\x83-2:3\x80\x1DS1:josh>3>2017-11-8>S1:Eoobao
fieldlabel=3 此时为edgeLabel=3 若为点则此处为vertexLabel
图中定义:
vertexLabel code=-1
edgeLabel code=-2
注意:并不根据数据的长度来判断前缀,前缀由filedid+indexlabelid长度确定。此时可根据属性进行查找点边,因为hbase中存储数据根据前缀id长度编码,那么长度才是他们查找的标识,此索引表是二级索引,可根据属性进行查找,比如查找非具体点数据,边数据就可以根据前缀和1:1这样的代码来快速匹配到具体的点,边的rowkey,进行二次查找。
public BackendEntry writeIndex(HugeIndex index) {
BinaryBackendEntry entry;
if (index.fieldValues() == null && index.elementIds().size() == 0) {
/*
* When field-values is null and elementIds size is 0, it is
* meaningful for deletion of index data by index label.
* TODO: improve
*/
entry = this.formatILDeletion(index);
} else {
Id id = index.id(); //-2:3
byte[] value = null;
if (!index.type().isRangeIndex() && indexIdLengthExceedLimit(id)) {
id = index.hashId();
// Save field-values as column value if the key is a hash string
value = StringEncoding.encode(index.fieldValues().toString());
}
entry = newBackendEntry(index.type(), id);
entry.column(this.formatIndexName(index), value);
entry.subId(index.elementId()); //S1:josh>3>2017-11-8>S1:Eoobao
}
return entry;
}
protected byte[] formatIndexName(HugeIndex index) {
Id elemId = index.elementId();
int idLen = 1 + elemId.length();
BytesBuffer buffer;
if (!this.indexWithIdPrefix) {
buffer = BytesBuffer.allocate(idLen);
} else {
Id indexId = index.id();
if (indexIdLengthExceedLimit(indexId)) {
indexId = index.hashId();
}
// Write index-id
idLen += 1 + indexId.length();
buffer = BytesBuffer.allocate(idLen);
buffer.writeId(indexId); //仍然是writeid进行写 -2:3
}
// Write element-id
buffer.writeId(elemId, true);
return buffer.bytes();
}
xgraph-Hbase 新版变化
1. 表变化
和新版mysql大体相同,并相比新版mysql多了三张表 :
- Vertexlabelindex ,edgelabelindex表
g_ei:edgelabelindex
:边labelindex: type+label+elementid
ROW COLUMN+CELL
-2:2\x00~\x871:xiaohu\x82\x08\x02\x00\x891:yangyang column=f:, timestamp=1596450806784, value=
-2:2\x00~\x8C1:huangtaibai\x82\x08\x02\x00\x891:yangyang column=f:, timestamp=1596450817604, value=
g_vi:vertexlabelindex
:vertexlabelindex
ROW COLUMN+CELL
-1:1\x00\x851:josh column=f:, timestamp=1596450656629, value=
-1:1\x00\x851:xiao column=f:, timestamp=1596526605929, value=
旧版hbase的secondaryindex将properties索引数据以及点索引数据和边索引数据放在一起。
新版hbase则将点和边的索引数据从原来的secondrey表中抽出单个的两张表。
graph LR;
g_si:secondreyindex旧版-->vertexlabelindex+edgelabelindex+propertiesindex;
g_si:secondreyindex新版-->vertexlabelindex:g_vi;
g_si:secondreyindex新版-->edgelabelindex:g_ei;
g_si:secondreyindex新版-->propertiesindex:g_si;
m_si: schema data index
:元信息表:edgelabel vertexlabel indexlabel properties
-5:created\x00\x08\x01 column=f:, timestamp=1596450366647, value=
-
Range index 范围索引变化:将原来的一张表拆分为四张,细分了每种range index的类型,int,bigint,folat,double 共四张表。
-
新增了shard_index表:
-
新增了unique_index表:除主键外指定唯一properties
2. 存储结构变化
- hbase中点表Properties由原版的四行合并为一行:
{"id":"1:xiao11","label":"person","type":"vertex","properties":{"age":1,"name":"xiao11","city":"beijing"}}
graph LR;
vertexlabel+property1+property2+```-->propertites新版0.11.2;
propertites-->vertexlabel;
propertites-->property1;
propertites-->property2;
propertites-->.....;
旧版0.9.2.8
//0.9.2.8旧版本
@Override
public BackendEntry writeVertex(HugeVertex vertex) {
BinaryBackendEntry entry = newBackendEntry(vertex);
if (vertex.removed()) {
return entry;
}
// Write vertex label
entry.column(this.formatLabel(vertex)); //每个column都是执行了add column操作
// Write all properties of a Vertex
for (HugeProperty<?> prop : vertex.getProperties().values()) {
entry.column(this.formatProperty(prop)); //每个property都添加一行column
}
return entry;
//0.11.2新版
@Override
public BackendEntry writeVertex(HugeVertex vertex) {
BinaryBackendEntry entry = newBackendEntry(vertex);
if (vertex.removed()) {
return entry;
}
int propsCount = vertex.getProperties().size();
BytesBuffer buffer = BytesBuffer.allocate(8 + 16 * propsCount);
// Write vertex label
buffer.writeId(vertex.schemaLabel().id());
// Write all properties of the vertex
this.formatProperties(vertex.getProperties().values(), buffer);
// Write vertex expired time if needed
if (vertex.hasTtl()) {
entry.ttl(vertex.ttl());
this.formatExpiredTime(vertex.expiredTime(), buffer);
}
// Fill column 将所有数据打入buffer 再添加一个column将其放入
byte[] name = this.keyWithIdPrefix ? entry.id().asBytes() : EMPTY_BYTES;
entry.column(name, buffer.bytes());//将vertex label和properties都放在一个buffer
return entry;
}
-
边存储构成无变化,编码方式有所不同
-
索引:
Secondery index 索引表相比前版本rowkey无长度编码作为开始
新:3:beijing\x00\x863:xiao1 indexlabelid+propertity+""+长度编码+rowkey 旧:\x882:Beijing\x80\x051:josh 长度编码+indexlabelid+propertity+type+长度编码+rowkey
-
表g_ei(edgelabelindex) g_vi(vertexlabelindex) 记录边和点的label信息索引表
g_ei(edgelabelindex) -2:6\x00~\x863:marko\x82\x08\x06\x00\x853:jos column=f:,timestamp=1596682446307, value=h g_vi(vertexlabelindex) -1:3\x00\x843:tom column=f:, timestamp=1596682446307, value=