huge-hbase 0.9.2.8写数据源码分析/huge-Hbase 0.11新版变化

258 阅读7分钟

xgraph-hbase

1.写点

#####1. 插入点数据

{
    "label": "person",
    "properties": {
        "name": "huangxiaohu",
        "city": "chengde",
        "age": 29
    }
}
2. 写处理通用数据整形

//数据整形
//1.是数字类型进入下面的writeNumber  16+id
//2.是字符类型则进行处理,先写入字符byte(len-1|0x80),在写入bytes数组
public BytesBuffer writeId(Id id, boolean big) {
  boolean number = id.number();  
  if (number) {    //数字类型
    long value = id.asLong();       
    this.writeNumber(value);  
  } else {   //字符类型
    byte[] bytes = id.asBytes();
    int len = bytes.length;     //  1:huangxiaohu
    E.checkArgument(len > 0, "Can't write empty id");
    if (!big) {
      E.checkArgument();
      len -= 1; // mapping [1, 128] to [0, 127]
      this.writeUInt8(len|0x80);//byte(len-1|0x80)=   0x80=128将与操作后的结果打入buffer
    }
    this.write(bytes);   //写原始数据  可以看到是先写了len处理后的打入byte,在写字符数组
  }
  return this;
}
//对于传入的数字,数字大于0则先打入writeUInt8处理的数字,在写实际传入的数字
private void writeNumber(long val) {
  int positive = val >= 0 ? 0x10 : 0x00;  // 0x10=16 大于0则为16 否则为0
  if (Byte.MIN_VALUE <= val && val <= Byte.MAX_VALUE) {
    this.writeUInt8(0x00 | positive);  //写16  标示后面写入的id的数值 
    this.write((byte) val);   //写1
  } else if (Short.MIN_VALUE <= val && val <= Short.MAX_VALUE) {
    this.writeUInt8(0x20 | positive);   
    this.writeShort((short) val);
  } else if (Integer.MIN_VALUE <= val && val <= Integer.MAX_VALUE) {
    this.writeUInt8(0x40 | positive);
    this.writeInt((int) val);
  } else {
    E.checkArgument(ID_MIN < val && val < ID_MAX,
                    "Id value must be in [%s, %s], but got %s",
                    ID_MIN, ID_MAX, val);
    this.writeLong((val & ID_MASK) | ((0x60L | positive) << 56));
  }
}
//3.先写入数据长度  再写入数据  
public BytesBuffer writeBytes(byte[] bytes) {
  E.checkArgument(bytes.length <= UINT16_MAX,
                  "The max length of bytes is %s, got %s",
                  UINT16_MAX, bytes.length);
  require(SHORT_LEN + bytes.length);
  this.writeUInt16(bytes.length);  //先写入数据长度
  this.write(bytes);     //再写入数据  
  return this;
}

3.代码组织

1. 插入Hbase 点表

 \x851:josh      column=f:f\xC8, timestamp=1595584426480, value=\x10\x01
 \x851:josh      column=f:g\x10\x01, timestamp=1595584426480, value=\x01jos\xE8
 \x851:josh      column=f:g\x10\x02, timestamp=1595584426480, value=,
 \x851:josh      column=f:g\x10\x03, timestamp=1595584426480, value=\x01benx\xE9
 \x852:test      column=f:f\xC8, timestamp=1595584285412, value=\x10\x02
 \x852:test      column=f:g\x10\x01, timestamp=1595584285412, value=\x01tes\xF4
 \x852:test      column=f:g\x10\x04, timestamp=1595584285412, value=\x01\x01\x03\x01\x82c
 \x852:test      column=f:g\x10\x06, timestamp=1595584285412, value=\x01\x01\x02,
 \x8C1:huangxiaohu  column=f:f\xC8, timestamp=1595816279685, value=\x10\x01
 \x8C1:huangxiaohu  column=f:g\x10\x01, timestamp=1595816279685, value=\x01huangxiaoh\xF5
 \x8C1:huangxiaohu  column=f:g\x10\x02, timestamp=1595816279685, value=:
 \x8C1:huangxiaohu  column=f:g\x10\x03, timestamp=1595816279685, value=\x01chengd\xE5
1.rowkey : \x8C1:huangxiaohu
  • 组成:前缀码+ vertexlabel+name

  • 前缀码是根据 vertexlabel+name的长度进行编码取得,相同长度编码相同,在hbase前缀也相同,查找方便

    • 例:id= 1:huangxiaohu : writeId()(如上函数对变量整形)

      1. 写rowkey前缀: 适用全部rowkey前缀编码方式
      例:ID=1:haungxiaohu
      len("1:haungxiaohu")=13
      len-=1   // mapping [1, 128] to [0, 127]
      byte(len | 0x80) = -116  //0x80=128  将与操作后的结果打入buffer
      
      1. vertexlabel+name值转换为数组 : 写入原始数据
      bytes[49(1), 58(:), 104(h), 117(u), 97(a), 110(n), 103(g), 120(x), 105(i), 97(a), 111(o), 104(h), 117(u)]
      
      1. 组合成完整字节数组:-116是由前缀进行与操作后转byte打入byte数组,因此无法解析
      hbase显示:  \x8C1:huangxiaohu
      实际字节数组:bytes[-116,49(1), 58(:), 104(h), 117(u), 97(a), 110(n), 103(g), 120(x), 105(i), 97(a), 111(o), 104(h), 117(u)]  
      
2. column

  1. 第一行 column=f:f\xC8, value=\x10\x01,重点关注column的后缀以及value

    protected BackendColumn formatLabel(HugeElement elem) {
        BackendColumn col = new BackendColumn();
        col.name = this.formatSyspropName(elem.id(), HugeKeys.LABEL); //f+byte(LABEL=200)=-56
        Id label = elem.schemaLabel().id();  //1
        BytesBuffer buffer = BytesBuffer.allocate(label.length() + 1);
        col.value = buffer.writeId(label).bytes();   //写vertexlabelid 此时为1,number类型,16 1
        return col;}
    
    col: name: 102 -56 -> f\xC8
         value: 16 1   -> \x10\x01
    
  2. 其余行key:column=f:g\x10\x01 column=f:g\x10\x02 column=f:g\x10\x03

    g\x10\x01---->PROPERTYFlag(图中写死代码103) + writeId(0x10 + propertyKeyid )
    column: 103 16 1   ->    g\x10\x01 
    				103 16 2   ->    g\x10\x02
    				103 16 3   ->    g\x10\x03  
    
protected byte[] formatPropertyName(HugeProperty<?> prop) {
        Id id = prop.element().id();  
        Id pkeyId = prop.propertyKey().id();
        BytesBuffer buffer = BytesBuffer.allocate(idLen + 2 + pkeyId.length());
        buffer.write(prop.type().code()); // HugeType.PROPERTY=103 ->g 
        buffer.writeId(pkeyId);  //0x10+propertyid->16 1
        return buffer.bytes();   
    }
  1. 其余行value:value:\x01huangxiaoh\xF5

最后一位。进行操作 与 byte.

1:huangxiaohu->  (u)byte|0x80  ->1:huangxiaoh\xF5
    public BackendEntry writeVertex(HugeVertex vertex) {
        BinaryBackendEntry entry = newBackendEntry(vertex);
        if (vertex.removed()) {
            return entry;
        }
        // Write vertex label
        entry.column(this.formatLabel(vertex));
        // Write all properties of a Vertex
        for (HugeProperty<?> prop : vertex.getProperties().values()) {
            entry.column(this.formatProperty(prop));
        }
        return entry;
    }
		protected BackendColumn formatProperty(HugeProperty<?> prop) {
    	return BackendColumn.of(this.formatPropertyName(prop),KryoUtil.toKryo(prop.value()));}

3. 插入hbase 边表

插入数据
{
    "label": "knows",
    "outV": "1:jin",
    "inV": "1:test",
    "outVLabel": "person",
    "inVLabel": "person",
    "properties": {
        "date": "2017-5-18"
    }
}
ROW                                    COLUMN+CELL
 \x841:tom\x8C\x10\x01\x00\x00\x841:jin  column=f:, timestamp=1595584285508, value=\x00\x00\x00\x01\x10\x05\x00\x09\x012019010\xB4
(s1:tom>2>>s1:jin)
 \x851:josh\x82\x10\x03\x00\x092017-5-18\x871:Eoobao column=f:, timestamp=1596021449724, value=\x00\x00\x00\x01\x10\x05\x00\x0A\x012017-5-1\xB8
 (S1:josh>3>2017-11-18>S1:Eoobao)
1. rowkey \x841:tom\x8C\x10\x01\x00\x00\x841:jin
rowkey: byte: -124 49 58 116 111 109 -116  16 1 0 0  -124 49 58 106 105 110

 源点id   (仍然使用writeid)(byte)((len(1:tom)-1)|0x80) +  byte(1:tom) = -124 49 58 116 111 109
+边方向   byte(140(出边编码))=-116
+边label   16 1
+sortkeys (有的话)  0  若使用sortkey:添加相同边时候,不会覆盖相同边,使用指定的sortkey来唯一标识数据
+目标顶点 (同源)   -124 49 58 106 105 110

//ID  S1:josh>3>2017-11-18>S1:Eoobao 
//0 0 0 1 16 dataid=5  0 10(length)  1(pro.size=1)  
//50 48 49 55 45 53 45 49 -72 (2017-5-18)		
//ID s1:tom>2>>s1:jin 的接口实现
public String asString() {
        if (this.cache != null) {
            return this.cache;
        }{
            this.cache = SplicingIdGenerator.concat(
                         IdUtil.writeString(this.sourceVertexId()), //S1:josh
                         this.edgeLabelId.asString(),      //3  
                         this.sortValues,   //(2017-11-18)
                         IdUtil.writeString(this.targetVertexId())); // S1:Eoobao
        }
        return this.cache;
    }

 public HugeEdge addEdge(String label, Vertex vertex, Object... keyValues) 
   			//......
        // Attach edge to vertex
        this.addOutEdge(edge);  //出边
        targetVertex.addInEdge(edge.switchOwner());  //switchOwner将出边边入边,入变出,对应边表反向存储
        return this.tx().addEdge(edge);//rowkey[S1:josh>3>2017-11-18>S1:Eoobao] properties[1:josh-knowsByDate->1:Eoobao]
    }

protected byte[] formatEdgeName(HugeEdge edge) {
  // owner-vertex + dir + edge-label + sort-values + other-vertex  作为rowkey
  BytesBuffer buffer = BytesBuffer.allocate(256);
  
  buffer.writeId(edge.ownerVertex().id());    //当前边  仍然使用	writeId函数进行处理    
  buffer.write(edge.type().code());      //edge_in or edge_out 边方向
  buffer.writeId(edge.schemaLabel().id());       //边label
  buffer.writeString(edge.name()); // TODO: write if need  sortValues()  
  buffer.writeId(edge.otherVertex().id());   //另一条边

  return buffer.bytes();
}

4. 索引表(点+边)

ROW                                          COLUMN+CELL
 \x83-1:1\x80\x041:jin             column=f:, timestamp=1595584285457, value=
 \x83-1:1\x80\x041:tom             column=f:, timestamp=1595584285457, value=
 \x83-2:2\x80\x10S1:jin>2>>S2:test column=f:, timestamp=1595584285457, value=
1. Rowkey:

前缀+filedid+indexlabelid+elementid

S1:josh>3>2017-11-8>S1:Eoobao 
\x83-2:3\x80\x1DS1:josh>3>2017-11-8>S1:Eoobao  
fieldlabel=3  此时为edgeLabel=3 若为点则此处为vertexLabel
图中定义:
vertexLabel code=-1 
edgeLabel code=-2  

注意:并不根据数据的长度来判断前缀,前缀由filedid+indexlabelid长度确定。此时可根据属性进行查找点边,因为hbase中存储数据根据前缀id长度编码,那么长度才是他们查找的标识,此索引表是二级索引,可根据属性进行查找,比如查找非具体点数据,边数据就可以根据前缀和1:1这样的代码来快速匹配到具体的点,边的rowkey,进行二次查找。

    public BackendEntry writeIndex(HugeIndex index) {
        BinaryBackendEntry entry;
        if (index.fieldValues() == null && index.elementIds().size() == 0) {
            /*
             * When field-values is null and elementIds size is 0, it is
             * meaningful for deletion of index data by index label.
             * TODO: improve
             */
            entry = this.formatILDeletion(index);
        } else {
            Id id = index.id();  //-2:3
            byte[] value = null;
            if (!index.type().isRangeIndex() && indexIdLengthExceedLimit(id)) {
                id = index.hashId(); 
                // Save field-values as column value if the key is a hash string
                value = StringEncoding.encode(index.fieldValues().toString());  
            }

            entry = newBackendEntry(index.type(), id);
            entry.column(this.formatIndexName(index), value); 
            entry.subId(index.elementId()); //S1:josh>3>2017-11-8>S1:Eoobao 
        }
        return entry;
    }

    protected byte[] formatIndexName(HugeIndex index) {
        Id elemId = index.elementId();
        int idLen = 1 + elemId.length();

        BytesBuffer buffer;
        if (!this.indexWithIdPrefix) {
            buffer = BytesBuffer.allocate(idLen);
        } else {
            Id indexId = index.id();
            if (indexIdLengthExceedLimit(indexId)) {
                indexId = index.hashId();
            }
            // Write index-id
            idLen += 1 + indexId.length();
            buffer = BytesBuffer.allocate(idLen);
            buffer.writeId(indexId);  //仍然是writeid进行写 -2:3
        }

        // Write element-id
        buffer.writeId(elemId, true);

        return buffer.bytes();
   }

xgraph-Hbase 新版变化

1. 表变化

和新版mysql大体相同,并相比新版mysql多了三张表 :

  1. Vertexlabelindex ,edgelabelindex表

g_ei:edgelabelindex :边labelindex: type+label+elementid

ROW                                                        COLUMN+CELL
 -2:2\x00~\x871:xiaohu\x82\x08\x02\x00\x891:yangyang       column=f:, timestamp=1596450806784, value=
 -2:2\x00~\x8C1:huangtaibai\x82\x08\x02\x00\x891:yangyang  column=f:, timestamp=1596450817604, value=

g_vi:vertexlabelindex:vertexlabelindex

ROW                                                        COLUMN+CELL
 -1:1\x00\x851:josh                                        column=f:, timestamp=1596450656629, value=
 -1:1\x00\x851:xiao                                        column=f:, timestamp=1596526605929, value=

旧版hbase的secondaryindex将properties索引数据以及点索引数据和边索引数据放在一起。

新版hbase则将点和边的索引数据从原来的secondrey表中抽出单个的两张表。

graph LR;
    g_si:secondreyindex旧版-->vertexlabelindex+edgelabelindex+propertiesindex;
    g_si:secondreyindex新版-->vertexlabelindex:g_vi;
    g_si:secondreyindex新版-->edgelabelindex:g_ei;
    g_si:secondreyindex新版-->propertiesindex:g_si;
  1. m_si: schema data index:元信息表:edgelabel vertexlabel indexlabel properties
 -5:created\x00\x08\x01                                    column=f:, timestamp=1596450366647, value=
  1. Range index 范围索引变化:将原来的一张表拆分为四张,细分了每种range index的类型,int,bigint,folat,double 共四张表。

  2. 新增了shard_index表:

  3. 新增了unique_index表:除主键外指定唯一properties

2. 存储结构变化

  1. hbase中点表Properties由原版的四行合并为一行:
{"id":"1:xiao11","label":"person","type":"vertex","properties":{"age":1,"name":"xiao11","city":"beijing"}}
graph LR;
    vertexlabel+property1+property2+```-->propertites新版0.11.2;  
    propertites-->vertexlabel;
    propertites-->property1;
    propertites-->property2;
    propertites-->.....;
     旧版0.9.2.8
 //0.9.2.8旧版本
  @Override
    public BackendEntry writeVertex(HugeVertex vertex) {
        BinaryBackendEntry entry = newBackendEntry(vertex);
        if (vertex.removed()) {
            return entry;
        }
        // Write vertex label
        entry.column(this.formatLabel(vertex));  //每个column都是执行了add column操作
        // Write all properties of a Vertex
        for (HugeProperty<?> prop : vertex.getProperties().values()) {
            entry.column(this.formatProperty(prop));     //每个property都添加一行column
        }
        return entry;
      
      
  //0.11.2新版 
   @Override
    public BackendEntry writeVertex(HugeVertex vertex) {
        BinaryBackendEntry entry = newBackendEntry(vertex);

        if (vertex.removed()) {
            return entry;
        }

        int propsCount = vertex.getProperties().size();
        BytesBuffer buffer = BytesBuffer.allocate(8 + 16 * propsCount);

        // Write vertex label
        buffer.writeId(vertex.schemaLabel().id());

        // Write all properties of the vertex
        this.formatProperties(vertex.getProperties().values(), buffer);

        // Write vertex expired time if needed
        if (vertex.hasTtl()) {
            entry.ttl(vertex.ttl());
            this.formatExpiredTime(vertex.expiredTime(), buffer);
        }

        // Fill column 将所有数据打入buffer 再添加一个column将其放入
        byte[] name = this.keyWithIdPrefix ? entry.id().asBytes() : EMPTY_BYTES;
        entry.column(name, buffer.bytes());//将vertex label和properties都放在一个buffer
        return entry;
    }

  1. 边存储构成无变化,编码方式有所不同

  2. 索引:

    Secondery index 索引表相比前版本rowkey无长度编码作为开始

    新:3:beijing\x00\x863:xiao1
    indexlabelid+propertity+""+长度编码+rowkey
    旧:\x882:Beijing\x80\x051:josh
    长度编码+indexlabelid+propertity+type+长度编码+rowkey
    
  3. 表g_ei(edgelabelindex) g_vi(vertexlabelindex) 记录边和点的label信息索引表

    g_ei(edgelabelindex)
    -2:6\x00~\x863:marko\x82\x08\x06\x00\x853:jos column=f:,timestamp=1596682446307, value=h
    g_vi(vertexlabelindex)
    -1:3\x00\x843:tom                             column=f:, timestamp=1596682446307, value=