【编译原理】json解析器的实现

470 阅读6分钟

​ 本文已参与「新人创作礼」活动,一起开启掘金创作之路。


 一、json结构

简单地介绍一下json的数据类型和语法

1. 数据类型

json每一个数据都是一个“对象”,其数据类型有6种:null、array、object、bool、number、string。

2. 语法

一个文件只能有一个对象,一个array可以存储多个对象,对象之间用逗号分隔,不限数据类型;一个object也可以存储多个对象,而且是按照key-value形式存储,每一个key都对应一个对象,同样也不限数据类型。

二、文法

1. 词法设计

json字符串可拆分成10种词组:

①数字:由负号或数字开头,其后由数字和一个小数点组成的字符串

②字符串:由引号开头、引号结尾的字符串

③布尔值:true、false

④空值:null

⑤上花括号:{ 

⑥下花括号:}

⑦上中括号:[

⑧下中括号:]

⑨冒号

⑩逗号

2. 语法设计

设定文法​, 其中

 开始符是,产生式如下图:

 注:文法未经优化

3. 文法符号分析

FIRST集合:

 FOLLOW集合:

4. 文法DFA

这个DFA是手动画的,所以可能会有那么一丢丢问题。

5. 文法分析表

根据DFA依次填表(已结合FOLLOW集合),得到以下分析表: (sn表示移进,并进入状态n;r n表示根据第n条产生式进行规约)

image.png

三、程序实现

1. 程序思路

①把文本拆分成若干个词组(名为token),形成一个token序列,并在末尾插入一个表示结束的token(即文法中的$)

②根据文法表,判断token序列的顺序是否符合设定的语法

③将合法的token序列转换为C的json数据结构

2. 源码实现

语言:C99标准及以上

注:代码未经优化

头文件:

#ifndef LIB_JSON_H
#define LIB_JSON_H

// Json 数据类型
#define Null 0
#define Array 1
#define Object 2
#define Bool 3
#define Number 4
#define String 5

// 解析错误码
#define SUCCESS 0 // 解析成功
#define ERROR -1 // 语法错误
#define UNKONW_CHARACTOR -2 // 无法识别的字符
#define LOST_QUOTATION -3 // 缺少引号
#define MULTIPLE_POINTS -4 // 多个小数点

#ifndef __cplusplus
typedef char bool;
#define true 1
#define false 0
#endif

struct JsonObj
{
    char *name; // 名称
    void *data; // 数据内容
    int type; // 数据类型
    int length; // array或object长度
    int capacity; // array或object容量
    int rindex; // array或object的读指针
};

typedef struct JsonObj * JsonObjPtr;

struct ParseResult
{
    int row; // 错误所在行号
    int col; // 错误所在列号
    int error_code; // 错误码
    JsonObjPtr result; // 解析成功的json对象类型指针,须手动调用Free释放内存
};

#ifdef __cplusplus
extern "C" 
{
#endif

/*
 * 释放空间
 */
extern void Free(JsonObjPtr obj);

/*
 * 创建Json类型结构,内存由父结点管理,若无父结点,则需手动管理
 */
extern JsonObjPtr Create(int type);
extern JsonObjPtr CreateNull();
extern JsonObjPtr CreateBool();
extern JsonObjPtr CreateNumber();
extern JsonObjPtr CreateString();
extern JsonObjPtr CreateArray();
extern JsonObjPtr CreateObject();

/*
 * 函数返回值,失败返回0,成功返回1
 */

/*
 * 判断结点类型
 */
extern bool IsNull(const JsonObjPtr obj);
extern bool IsBool(const JsonObjPtr obj);
extern bool IsNumber(const JsonObjPtr obj);
extern bool IsString(const JsonObjPtr obj);
extern bool IsArray(const JsonObjPtr obj);
extern bool IsObject(const JsonObjPtr obj);

/*
 * 设置结点值
 */
extern bool SetBool(JsonObjPtr obj, bool value);
extern bool SetNumber(JsonObjPtr obj, double value);
extern bool SetString(JsonObjPtr obj, const char *value);

/*
 * 获取结点值
 */
extern bool GetBool(JsonObjPtr obj);
extern double GetNumber(JsonObjPtr obj);
extern const char *GetString(JsonObjPtr obj);

/*
 * 在末尾增加数组元素
 */
extern bool AppendArrayElement(JsonObjPtr obj, JsonObjPtr c);
extern bool AppendObjectElement(JsonObjPtr obj, const char *name, JsonObjPtr c);

/*
 * 在末尾删除数组元素
 */
extern bool RemoveArrayElement(JsonObjPtr obj);

/*
 * 删除对象结点的元素
 */
extern bool RemoveObjectElement(JsonObjPtr obj, const char *name);

/*
 * 获取数组元素
 */
extern JsonObjPtr GetArrayElement(JsonObjPtr obj, int index);

/*
 * 获取对象元素
 */
extern JsonObjPtr GetObjectElement(JsonObjPtr obj, const char *name);

/*
 * 遍历第一个数组或对象结点
 */
extern JsonObjPtr FirstElement(JsonObjPtr obj);

/*
 * 遍历下一个数组或对象结点
 */
extern JsonObjPtr NextElement(JsonObjPtr obj);

/*
 * 解析json字符串
 * @str  json字符串
 * @size  字符串长度
 * @return  ParseResult对象
 */
extern struct ParseResult Parse(const char *str, int size);

// 获取解析错误信息
extern const char * ErrorMsg(const struct ParseResult *pr);

// 获取成功解析时的json对象指针,需要手动调用Free()释放内存
extern JsonObjPtr GetJsonObjPtr(struct ParseResult *pr);

// 判断解析是否成功
extern bool ParseSuccess(const struct ParseResult *pr);

// 获取解析错误所在的位置
extern int ParseErrorRow(const struct ParseResult *pr);
extern int ParseErrorCol(const struct ParseResult *pr);

/*
 * json object转化成字符串
 * @buff  用于存放字符串结果的内存空间
 * @newl  新行的字符,可以为NULL
 * @newc  行的起始符,可以为NULL
 * @aoc  object冒号后的字符,可以为NULL
 * @aac  array逗号后的字符,可以为NULL
 * @return  返回字符串长度
 */
extern int ToString(JsonObjPtr obj, char *buff, const char *newl, const char *newc, const char *aoc, const char *aac);

/*
 * 将json对象转化成格式化的字符串
 * @return  ToString(obj, buff, "\n", "    ", " ", " ")
 */
extern int ToFormatString(JsonObjPtr obj, char *buff);

/*
 * 将json对象转化成非格式化的字符串
 * @return  ToString(obj, buff, NULL, NULL, NULL, NULL)
 */
extern int ToUnformatString(JsonObjPtr obj, char *buff);

#ifdef __cplusplus
}
#endif

#endif // LIB_JSON_H

源文件:

#include "libjson.h"
#include <stdlib.h> // malloc  realloc  free  atof  gcvt 
#include <string.h> // memcpy  strlen  strcmp
#include <limits.h> // INT_MIN

#ifndef NULL
#define NULL 0
#endif

#define IndexObj(obj, index) \
    *(((JsonObjPtr *)obj->data) + index)

static void FreeChildren(JsonObjPtr obj)
{
    JsonObjPtr *array_data = (JsonObjPtr *)obj->data;
    int i = 0;
    while(i < obj->length)
    {
        Free(array_data[i++]);
    }
}

void Free(JsonObjPtr obj)
{
    if(NULL == obj)
    {
        return;
    }

    switch(obj->type)
    {
    case Array:
    case Object: 
        FreeChildren(obj); 
    case Bool:
    case Number:
    case String: 
        free(obj->data);
    default: 
        free(obj); 
        break;
    }
}

JsonObjPtr Create(int type)
{
    switch(type)
    {
    case Array: return CreateArray();
    case Object: return CreateObject();
    case Bool: return CreateBool();
    case Number: return CreateNumber();
    case String: return CreateString();
    default: return CreateNull();
    }
}

JsonObjPtr CreateNull()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = NULL;
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Null;
    ptr->capacity = 0;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateBool()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(bool));
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Bool;
    ptr->capacity = 0;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateNumber()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(double));
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Number;
    ptr->capacity = 0;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateString()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(char));
    *(char *)ptr->data = '\0';
    ptr->length = 1;
    ptr->rindex = 0;
    ptr->type = String;
    ptr->capacity = 1;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateArray()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(JsonObjPtr));
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Array;
    ptr->capacity = 1;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateObject()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(JsonObjPtr));
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Object;
    ptr->capacity = 1;
    ptr->name = NULL;
    return ptr;
}

bool IsNull(const JsonObjPtr obj)
{
    return obj->type == Null;
}

bool IsBool(const JsonObjPtr obj)
{
    return obj->type == Bool;
}

bool IsNumber(const JsonObjPtr obj)
{
    return obj->type == Number;
}

bool IsString(const JsonObjPtr obj)
{
    return obj->type == String;
}

bool IsArray(const JsonObjPtr obj)
{
    return obj->type == Array;
}

bool IsObject(const JsonObjPtr obj)
{
    return obj->type == Object;
}

bool SetBool(JsonObjPtr obj, bool value)
{
    if(!IsBool(obj))
    {
        return false;
    }
    *(bool *)obj->data = value;
    return true;
}

bool SetNumber(JsonObjPtr obj, double value)
{
    if(!IsNumber(obj))
    {
        return false;
    }
    *(double *)obj->data = value;
    return true;
}

bool SetString(JsonObjPtr obj, const char *value)
{
    int len;
    if(!IsString(obj))
    {
        return false;
    }
    len = (int)strlen(value);
    free(obj->data);
    obj->data = malloc(sizeof(char) * len + 1);
    memcpy(obj->data, value, len);
    *(((char *)obj->data) + len) = '\0';
    return true;
}

bool GetBool(JsonObjPtr obj)
{
    return *((bool *)obj->data); 
}

double GetNumber(JsonObjPtr obj)
{
    return *((double *)obj->data); 
}

const char *GetString(JsonObjPtr obj)
{
    return (const char *)obj->data; 
}

static void AppendElement(JsonObjPtr obj, JsonObjPtr c)
{
    if(obj->length >= obj->capacity)
    {
        obj->capacity *= 2;
        obj->data = realloc(obj->data, sizeof(JsonObjPtr) * obj->capacity);
    }

    IndexObj(obj, obj->length++) = c;
}

bool AppendArrayElement(JsonObjPtr obj, JsonObjPtr c)
{
    if(!IsArray(obj))
    {
        return false;
    }

    AppendElement(obj, c);
    return true;
}

bool AppendObjectElement(JsonObjPtr obj, const char *name, JsonObjPtr c)
{
    int nlen;
    if(!IsObject(obj))
    {
        return false;
    }

    nlen = strlen(name);
    c->name = (char *)realloc(c->name, sizeof(char) * nlen + 1);
    memcpy(c->name, name, nlen);
    c->name[nlen] = '\0';

    AppendElement(obj, c);
    return true;
}

bool RemoveArrayElement(JsonObjPtr obj)
{
    if(!IsArray(obj))
    {
        return false;
    }

    if(obj->length < 1)
    {
        return false;
    }

    Free(IndexObj(obj, obj->length - 1));
    --obj->length;
    return true;
}

bool RemoveObjectElement(JsonObjPtr obj, const char *name)
{
    int i = 0;
    if(!IsObject(obj))
    {
        return false;
    }

    while(i < obj->length)
    {
        JsonObjPtr ptr = IndexObj(obj, i);
        if(0 == strcmp(name, ptr->name))
        {
            break;
        }
        ++i;
    }

    if(i >= obj->length)
    {
        return true;
    }

    --obj->length;
    while(i++ < obj->length)
    {
        IndexObj(obj, i - 1) = IndexObj(obj, i);
    }
    return true;
}

JsonObjPtr GetArrayElement(JsonObjPtr obj, int index)
{
    if(!IsArray(obj))
    {
        return NULL;
    }

    if(index >= obj->length)
    {
        return NULL;
    }

    return IndexObj(obj, index);
}

JsonObjPtr GetObjectElement(JsonObjPtr obj, const char *name)
{
    int i = 0;
    if(!IsObject(obj))
    {
        return NULL;
    }

    while(i < obj->length)
    {
        JsonObjPtr ptr = IndexObj(obj, i);
        if(0 == strcmp(name, ptr->name))
        {
            return ptr;
        }
        ++i;
    }

    return NULL;
}

JsonObjPtr FirstElement(JsonObjPtr obj)
{
    if(!IsArray(obj) && !IsObject(obj))
    {
        return NULL; 
    }

    obj->rindex = 0;
    return obj->rindex >= obj->length ? NULL : IndexObj(obj, obj->rindex);
}

JsonObjPtr NextElement(JsonObjPtr obj)
{
    if(!IsArray(obj) && !IsObject(obj))
    {
        return NULL; 
    }
    
    ++obj->rindex;
    return obj->rindex >= obj->length ? NULL : IndexObj(obj, obj->rindex);
}

/*
 * =========================================================
 * ==========================解析实现========================
 * =========================================================
 */
#define TTYPE_NUMBER 0 // number
#define TTYPE_STRING 1 // string
#define TTYPE_BOOL 2 // true、false
#define TTYPE_NULL 3 // null
#define TTYPE_UP_OBJECT 4 // {
#define TTYPE_DOWN_OBJECT 5 // }
#define TTYPE_UP_ARRAY 6 // [
#define TTYPE_DOWN_ARRAY 7 // ]
#define TTYPE_COMMA 8 // 逗号
#define TTYPE_COLON 9 // 冒号
#define TTYPE_END 10 // 结束符

#define is_point(x) ((x) == '.')
#define is_digital(x) ((x) <= '9' && (x) >= '0')

struct Token
{
    int row;
    int col;
    int type;
    char *str;
};

static int GetTokenList(const char *cur, const char *end, struct Token **out, int *err_code)
{
    const char *tmp = NULL;
    struct Token *v, *tmp_token = NULL;
    int count = 0, cap = 1; // 数量、容量
    int row = 1, col = 0;// 当前行列号
    bool point = false; // 是否遇到小数点

    while(cur != end)
    {
        if(count >= cap)
        {
            cap *= 2;
            *out = (struct Token *)realloc(*out, sizeof(struct Token) * cap);
        }

        v = &(*out)[count];
        ++col;
        v->row = row;
        v->col = col;

        switch(*cur)
        {
        case '{':
            v->type = TTYPE_UP_OBJECT;
            v->str = NULL;
            break;
        case '}':
            v->type = TTYPE_DOWN_OBJECT;
            v->str = NULL;
            break;
        case '[':
            v->type = TTYPE_UP_ARRAY;
            v->str = NULL;
            break;
        case ']':
            v->type = TTYPE_DOWN_ARRAY;
            v->str = NULL;
            break;
        case ':':
            v->type = TTYPE_COLON;
            v->str = NULL;
            break;
        case ',':
            v->type = TTYPE_COMMA;
            v->str = NULL;
            break;
        case '\\':
            break;
        case '-':
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
            tmp = cur;
            ++tmp;
            while(is_digital(*tmp) && tmp != end)
            { 
                if(is_point(*++tmp))
                {
                    ++tmp; 
                    if(point)
                    {
                        *err_code = MULTIPLE_POINTS;
                        return count;
                    }
                    point = true;
                }
            }

            point = false;
            v->type = TTYPE_NUMBER;
            v->str = malloc(tmp - cur + 1);
            memcpy(v->str, cur, tmp - cur);
            v->str[tmp - cur] = '\0';
            cur = --tmp;
            break;

        case '"':
            tmp = cur;
            ++tmp;
            while(tmp != end)
            { 
                if(*tmp == '\\')
                {
                    if(++tmp == end)
                    {
                        break;
                    }
                }
                else if(*tmp == '"')
                {
                    break;
                }
                ++tmp;
            }
            v->type = TTYPE_STRING;
            v->str = malloc(tmp - cur);
            memcpy(v->str, cur + 1, tmp - cur - 1);
            v->str[tmp - cur - 1] = '\0';
            cur = tmp;
            break;

        case 'n':
            if(cur + 3 >= end)
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            if(*(cur + 1) != 'u' || *(cur + 2) != 'l' || *(cur + 3) != 'l')
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            v->type = TTYPE_NULL;
            v->str = malloc(5);
            memcpy(v->str, cur, 4);
            v->str[4] = '\0';
            cur += 3;
            break;
        
        case 't':
            if(cur + 3 >= end)
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            if(*(cur + 1) != 'r' || *(cur + 2) != 'u' || *(cur + 3) != 'e')
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            v->type = TTYPE_BOOL;
            v->str = malloc(5);
            memcpy(v->str, cur, 4);
            v->str[4] = '\0';
            cur += 3;
            break;

        case 'f':
            if(cur + 4 >= end)
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            if(*(cur + 1) != 'a' || *(cur + 2) != 'l' || *(cur + 3) != 's' || *(cur + 4) != 'e')
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            v->type = TTYPE_BOOL;
            v->str = malloc(6);
            memcpy(v->str, cur, 5);
            v->str[5] = '\0';
            cur += 4;
            break;

        case ' ':
        case '\t':
            --count;
            break;
        case '\n':
        case '\r':
            ++row;
            col = 1;
            --count;
            break;
        default:
            *err_code = UNKONW_CHARACTOR;
            return count;
        }

        ++count;
        ++cur;
    }

    if(count > 0)
    {
        if(count >= cap)
        {
            cap *= 2;
            *out = (struct Token *)realloc(*out, sizeof(struct Token) * cap);
        }

        v = &(*out)[count++];
        tmp_token = &(*out)[count - 2];
        v->type = TTYPE_END;
        v->str = NULL;
        v->row = tmp_token->row;
        v->col = tmp_token->col + (NULL == tmp_token->str ? 0 : strlen(tmp_token->str));
    }

    *err_code = SUCCESS;
    return count;
}

#ifdef _DEBUG
#include <stdio.h> 
static void _PrintStack(int *st, int count)
{
    int i = 0;
    if(count <= 0)
    { return; }

    printf("%d", st[i++]);
    while(i < count)
    {
        printf(",%d", st[i++]);
    }
}
static void _PrintType(int type)
{
    switch (type)
    {
    case TTYPE_NUMBER:
        printf("number");
        break;
    case TTYPE_STRING:
        printf("string");
        break;
    case TTYPE_BOOL:
        printf("bool");
        break;
    case TTYPE_NULL:
        printf("null");
        break;
    case TTYPE_UP_OBJECT:
        printf("{");
        break;
    case TTYPE_DOWN_OBJECT:
        printf("}");
        break;
    case TTYPE_UP_ARRAY:
        printf("[");
        break;
    case TTYPE_DOWN_ARRAY:
        printf("]");
        break;
    case TTYPE_COMMA:
        printf(",");
        break;
    case TTYPE_COLON:
        printf(":");
        break;
    case TTYPE_END:
        printf("$");
        break;
    default:
        break;
    }
}

#define PrintStack(st, count) _PrintStack(st, count)
#define Printf printf
#define PrintType(t) _PrintType(t)

#else

#define PrintStack(st, count)
#define Printf(...)
#define PrintType(t)

#endif

/*
 * 语法分析,判断语法是否合法
 */
static void CheckSyntax(
    struct Token *t, int size, int *err_code, int *err_row, int *err_col)
{
    // S:0  A:1  B:2  C:3  D:4
    static const int INFER[14] = { 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 4 };
    // 规约式长度
    static const int STATUTE_LEN[14] = { 2, 1, 1, 1, 1, 1, 3, 3, 2, 2, 3, 3, 1, 3};
    static const int GOTO[23][5] =
    {
            //     S        A        B        C        D
    /* 1*/  {INT_MIN,       3,       4, INT_MIN, INT_MIN},
    /* 2*/  {INT_MIN, INT_MIN, INT_MIN,      11, INT_MIN},
    /* 3*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 4*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 5*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 6*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 7*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 8*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 9*/  {INT_MIN, INT_MIN,      19, INT_MIN,      20},
    /*10*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*11*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*12*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*13*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*14*/  {INT_MIN, INT_MIN, INT_MIN,      15, INT_MIN},
    /*15*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*16*/  {INT_MIN, INT_MIN,      17, INT_MIN, INT_MIN},
    /*17*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*18*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*19*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*20*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*21*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*22*/  {INT_MIN, INT_MIN,      19, INT_MIN,      23},
    /*23*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    };

    static const int ACTION[23][11] = 
    {
            //    数字   字符串    布尔值     空值        {        }        [        ]        ,        :        $
    /* 1*/  {       5,       6,       7,       8,       2, INT_MIN,       9, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 2*/  { INT_MIN,      12, INT_MIN, INT_MIN, INT_MIN,      10, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 3*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,       0},
    /* 4*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -2},
    /* 5*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -3, INT_MIN,      -3,      -3, INT_MIN,      -3},
    /* 6*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -4, INT_MIN,      -4,      -4, INT_MIN,      -4},
    /* 7*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -5, INT_MIN,      -5,      -5, INT_MIN,      -5},
    /* 8*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -6, INT_MIN,      -6,      -6, INT_MIN,      -6},
    /* 9*/  {       5,       6,       7,       8,       2, INT_MIN,       9,      18, INT_MIN, INT_MIN, INT_MIN},
    /*10*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -9, INT_MIN,      -9,      -9, INT_MIN,      -9},
    /*11*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      13, INT_MIN, INT_MIN,      14, INT_MIN, INT_MIN},
    /*12*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      16, INT_MIN},
    /*13*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -7, INT_MIN,      -7,      -7, INT_MIN,      -7},
    /*14*/  { INT_MIN,      12, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*15*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -12, INT_MIN, INT_MIN,      14, INT_MIN, INT_MIN},
    /*16*/  {       5,       6,       7,       8,       2, INT_MIN,       9, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*17*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -11, INT_MIN, INT_MIN,     -11, INT_MIN, INT_MIN},
    /*18*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -10, INT_MIN,     -10,     -10, INT_MIN,     -10},
    /*19*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -13,     -13, INT_MIN, INT_MIN},
    /*20*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      21,      22, INT_MIN, INT_MIN},
    /*21*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -8, INT_MIN,      -8,      -8, INT_MIN,      -8},
    /*22*/  {       5,       6,       7,       8,       2, INT_MIN,       9, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*23*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -14,      22, INT_MIN, INT_MIN},
    };

    struct Token *cur = NULL; // 当前token
    int index = 0; // Token序列访问索引

    int state_size = 16; // 状态栈空间大小
    int state_index = 0; // 状态栈元素数量
    int *st = malloc(sizeof(int) * state_size); // 状态栈

    int vi = 0; // 当前状态栈顶值
    int action_state = 0; // 当前action表值
    int goto_state = 0; // 当前goto表值
    int infer_state = 0; // 当前规约获得的非终结符

    st[state_index++] = 1;

    while(state_index > 0)
    {
        PrintStack(st, state_index);
        // 取栈顶元素
        vi = st[state_index - 1];
        cur = &t[index];
        
        Printf("  ");
        PrintType(cur->type);

        action_state = ACTION[vi - 1][cur->type];
        // 结束了
        if(action_state == 0)
        {
            Printf("  success\n");
            break;
        }
        // 非法语法
        else if(action_state == INT_MIN)
        {
            *err_code = ERROR;
            *err_row = cur->row;
            *err_col = cur->col;
            Printf("\n");
            return;
        }

        // 移进操作
        if(action_state > 0)
        {
            if(state_index >= state_size)
            {
                state_size *= 2;
                st = (int *)realloc(st, sizeof(int) * state_size);
            }
            st[state_index++] = action_state;
            ++index;
            Printf("  shift-%d", action_state);
        }
        // 规约操作
        else
        {
            state_index -= STATUTE_LEN[- action_state - 1];
            // 重新获取栈顶元素
            vi = st[state_index - 1];
            Printf("  reduce%d", action_state);

            // 获取goto表值
            infer_state = INFER[- action_state - 1];
            goto_state = GOTO[vi - 1][infer_state];
            if(goto_state == INT_MIN)
            {
                *err_code = ERROR;
                *err_row = cur->row;
                *err_col = cur->col;
                Printf("\n");
                return;
            }
            if(state_index >= state_size)
            {
                state_size *= 2;
                st = (int *)realloc(st, sizeof(int) * state_size);
            }
            st[state_index++] = goto_state;
            Printf("  goto-%d", goto_state);
        }

        Printf("\n");
    }

    if(index != size - 1)
    {
        Printf("\n");
        cur = &t[index];
        *err_code = ERROR;
        *err_row = cur->row;
        *err_col = cur->col;
    }

    free(st);
}

static JsonObjPtr CreateNewObjWithToken(struct Token *t)
{
    JsonObjPtr result = NULL;
    switch (t->type)
    {
    case TTYPE_NUMBER:
        result = CreateNumber();
        SetNumber(result, atof(t->str));
        break;
    case TTYPE_STRING:
        result = CreateString();
        result->data = t->str;
        t->str = NULL;
        break;
    case TTYPE_BOOL:
        result = CreateBool();
        SetBool(result, strcmp("true", t->str) == 0 ? true : false);
        break;
    case TTYPE_NULL:
        result = CreateNull();
        break;
    case TTYPE_UP_OBJECT:
        result = CreateObject();
        break;
    case TTYPE_UP_ARRAY:
        result = CreateArray();
        break;
    default:
        break;
    }
    return result;
}

/*
 * 将token序列转化为Json对象
 */
static JsonObjPtr ChangeToJsonObject(struct Token *t)
{
    JsonObjPtr result = NULL; // 解析结果
    JsonObjPtr new_obj = NULL; // 新对象
    struct Token *cur = NULL; // 当前token
    JsonObjPtr top = NULL; // 栈顶结构
    int index = 0; // Token序列访问索引

    int st_cap = 4; // 栈大小
    int st_index = 0; // 栈元素数量
    JsonObjPtr *st = (JsonObjPtr *)malloc(sizeof(JsonObjPtr) * st_cap); // 栈

    struct Token *name_token = NULL; // 用于存储object时的name token

    result = CreateNewObjWithToken(&t[index++]); // 先创建根节点
    if(IsObject(result) || IsArray(result))
    {
        st[st_index++] = result;
    }

    while(st_index > 0)
    {
        cur = &t[index++];
        top = st[st_index - 1];

        switch (cur->type)
        {
        case TTYPE_NUMBER:
            new_obj = CreateNumber();
            SetNumber(new_obj, atof(cur->str));
            break;
        case TTYPE_STRING:
            if(NULL != name_token || !IsObject(top))
            {
                new_obj = CreateString();
                new_obj->data = cur->str;
                cur->str = NULL;
            }
            else 
            {
                name_token = cur;
                continue;
            }
            break;
        case TTYPE_BOOL:
            new_obj = CreateBool();
            SetBool(new_obj, strcmp("true", cur->str) == 0 ? true : false);
            break;
        case TTYPE_NULL:
            new_obj = CreateNull();
            break;
        case TTYPE_UP_OBJECT:
            new_obj = CreateObject();
            if(st_index >= st_cap)
            {
                st_cap *= 2;
                st = (JsonObjPtr *)realloc(st, sizeof(JsonObjPtr) * st_cap);
            }
            st[st_index++] = new_obj;
            break;
        case TTYPE_DOWN_OBJECT:
            --st_index;
            continue;
        case TTYPE_UP_ARRAY:
            new_obj = CreateArray();
            if(st_index >= st_cap)
            {
                st_cap *= 2;
                st = (JsonObjPtr *)realloc(st, sizeof(JsonObjPtr) * st_cap);
            }
            st[st_index++] = new_obj;
            break;
        case TTYPE_DOWN_ARRAY:
            --st_index;
            continue;
        case TTYPE_END:
            st_index = 0;
            continue;
        default:
            continue;
        }

        if(NULL != name_token)
        {
            new_obj->name = name_token->str;
            name_token->str = NULL;
            name_token = NULL;
        }
        AppendElement(top, new_obj);
    } 

    return result;
}

struct ParseResult Parse(const char *str, int size)
{
    struct ParseResult result;
    const char *cur = str, *end = str + size;
    struct Token *t = (struct Token *)malloc(sizeof(struct Token));
    int count = GetTokenList(cur, end, &t, &result.error_code);
    
    if(result.error_code != SUCCESS)
    {
        if(count > 0)
        {
            result.row = t[count - 1].row;
            result.col = t[count - 1].col;
        }
        else 
        {
            result.row = 0;
            result.col = 0;
        }
    }
    else if(count > 0)
    {
        CheckSyntax(t, count, &result.error_code, &result.row, &result.col);
        if(ParseSuccess(&result))
        {
            result.result = ChangeToJsonObject(t);
        }
    }
    while(count-- > 0)
    {
        if(NULL != t[count].str)
        {
            free(t[count].str);
        }
    }
    free(t);
    return result;
}

const char * ErrorMsg(const struct ParseResult *pr)
{
    if(NULL == pr)
    {
        return "";
    }

    switch (pr->error_code)
    {
    case ERROR:
        return "parse error";
    case UNKONW_CHARACTOR:
        return "unkown charactor";
    case LOST_QUOTATION:
        return "maybe lost quotation";
    case MULTIPLE_POINTS:
        return "decimal point is too many";
    }

    return "unkown error";
}

JsonObjPtr GetJsonObjPtr(struct ParseResult *pr)
{
    return NULL == pr ? NULL : pr->result;
}

bool ParseSuccess(const struct ParseResult *pr)
{
    return NULL == pr ? false : (pr->error_code == SUCCESS);
}

int ParseErrorRow(const struct ParseResult *pr)
{
    return NULL == pr ? -1 : pr->row;
}

int ParseErrorCol(const struct ParseResult *pr)
{
    return NULL == pr ? -1 : pr->col;
}

static int NumberToString(double num, char *buff)
{
    int count = 15;
    gcvt(num, 16, buff);
    while(buff[count] == '0' || buff[count] == '\0')
    {
        --count;
    }
    if(buff[count] == '.')
    {
        --count;
    }
    return count + 1;
}

static int WriteBuffer(char *buff, const char *str, int len, int count)
{
    int result = 0;
    while(count-- > 0)
    {
        memcpy(buff + result, str, len);
        result += len;
    }
    return result;
}

int ToString(JsonObjPtr obj, char *buff, const char *newl, const char *newc, const char *aoc, const char *aac)
{
    int buff_index = 0; // buff索引
    int str_length = 0;

    JsonObjPtr cur = obj, next = NULL, parent = NULL;
    int st_cap = 4; // 栈大小
    int st_index = 0; // 栈元素数量
    JsonObjPtr *st = (JsonObjPtr *)malloc(sizeof(JsonObjPtr) * st_cap); // 栈

    int newl_len = strlen(NULL == newl ? (newl = "") : newl);
    int newc_len = strlen(NULL == newc ? (newc = "") : newc);
    int aoc_len = strlen(NULL == aoc ? (aoc = "") : aoc);
    int aac_len = strlen(NULL == aac ? (aac = "") : aac);

    while(NULL != cur)
    {
        if(NULL != parent)
        {
            next = NextElement(parent);
            if(NULL == next)
            {
                buff_index += WriteBuffer(buff + buff_index, newl, newl_len, 1);
                buff_index += WriteBuffer(buff + buff_index, newc, newc_len, st_index - 1);

                buff[buff_index++] = parent->type == Array ? ']' : '}';
                if(--st_index == 0)
                {
                    break;
                }
                parent = st[st_index - 1];
                continue;
            }
            buff[buff_index++] = ',';
            if(IsObject(parent))
            {
                buff_index += WriteBuffer(buff + buff_index, newl, newl_len, 1);
                buff_index += WriteBuffer(buff + buff_index, newc, newc_len, st_index);
            }
            else
            {
                buff_index += WriteBuffer(buff + buff_index, aac, aac_len, 1);
            }
            cur = next;
        }
        else if(st_index > 0)
        {
            parent = st[st_index - 1];
            cur = FirstElement(parent);
        }

        if(NULL != parent && NULL != cur && IsObject(parent))
        {
            buff[buff_index++] = '"';
            buff_index += WriteBuffer(buff + buff_index, cur->name, strlen(cur->name), 1);
            buff[buff_index++] = '"';
            buff[buff_index++] = ':';
            buff_index += WriteBuffer(buff + buff_index, aoc, aoc_len, 1);
        }

        switch (cur->type)
        {
        case Null:
            buff[buff_index++] = 'n';
            buff[buff_index++] = 'u';
            buff[buff_index++] = 'l';
            buff[buff_index++] = 'l';
            break;
        case Array:
            buff[buff_index++] = '[';

            goto new_layer;
            break;
        case Object:
            buff[buff_index++] = '{';

        new_layer:
            if(st_index >= st_cap)
            {
                st_cap *= 2;
                st = (JsonObjPtr *)realloc(st, sizeof(JsonObjPtr) * st_cap); // 栈
            }
            st[st_index++] = cur;
            parent = NULL;

            buff_index += WriteBuffer(buff + buff_index, newl, newl_len, 1);
            buff_index += WriteBuffer(buff + buff_index, newc, newc_len, st_index);
            break;
        case Bool:
            if(GetBool(cur))
            {
                buff[buff_index++] = 't';
                buff[buff_index++] = 'r';
                buff[buff_index++] = 'u';
                buff[buff_index++] = 'e';
            }
            else 
            {
                buff[buff_index++] = 'f';
                buff[buff_index++] = 'a';
                buff[buff_index++] = 'l';
                buff[buff_index++] = 's';
                buff[buff_index++] = 'e';
            }
            break;
        case Number:
            buff_index += NumberToString(GetNumber(cur), buff + buff_index);
            break;
        case String:
            buff[buff_index++] = '"';
            str_length = strlen(GetString(cur));
            memcpy(buff + buff_index, GetString(cur), str_length);
            buff_index += str_length;
            buff[buff_index++] = '"';
            break;
        default:
            break;
        }
    }

    buff[buff_index] = '\0';
    return buff_index;
}

int ToFormatString(JsonObjPtr obj, char *buff)
{ 
    return ToString(obj, buff, "\n", "    ", " ", " "); 
}

int ToUnformatString(JsonObjPtr obj, char *buff)
{ 
    return ToString(obj, buff, NULL, NULL, NULL, NULL); 
}

3. 测试

测试代码

#include <stdlib.h>
#include <string.h>
#include "libjson.h"

static const char *input_json_str =
"                       \
{                       \
    \"aaaa\": 1000,     \
    \"bbbb\": \"value\",\
    \"cccc\": [         \
        null, true, false \
    ],                  \
    \"dddd\": \"wocao\" \
}                       \
";

static void test()
{
    char buff[1024];
    JsonObjPtr result = NULL, new_obj = NULL;
    struct ParseResult pr = Parse(input_json_str, strlen(input_json_str));
    if(!ParseSuccess(&pr))
    {
        printf("parse error!!! at row(%d) col(%d)\n", 
            ParseErrorRow(&pr), ParseErrorCol(&pr));
        return;
    }

    printf("parse success!!!\n");
    result = GetJsonObjPtr(&pr);
    if(NULL != result)
    {
        ToUnformatString(result, buff);
        printf("unformat string: %s\n", buff);
    }

    printf("-----------add node-------------\n");
    new_obj = CreateString();
    SetString(new_obj, "yohohoho");
    AppendObjectElement(result, "new node", new_obj);
    ToUnformatString(result, buff);
    printf("unformat string: %s\n", buff);
    printf("==========================\n");
    ToFormatString(result, buff);
    printf("format string: \n%s\n", buff);

    Free(result);
}

int main()
{
    test();
    // system("pause");
    return 0;
}

测试结果

①文法分析过程 ​

②测试输出结果: ​

四、结语

这个json解析器是我在系统地复习了编译原理之后的实验品,如有问题,欢迎指出!