JSON是Javascript的子集,尽管可以用eval来将JSON字符串转换为Javascript对象,但这会带来安全问题
如果要实现一个JSON.stringify
,我们首先要能理解JSON的结构。通过简单的查找资料,我们能知道一个JSON可以是以下内容
- 字面量, 包括
true
,false
,null
- 数字
- 字符串
- 对象,对象的键为字符串,值可以为一个JSON值
- 数组,数组的内容可以包含任意个JSON值
因此,我们用Typescript表示,可以将JSON定义如下
type LiteralValue = boolean | null;
type PrimitiveValue = number | LiteralValue | string;
type JSONArray = (PrimitiveValue | JSONArray | JSONObject)[];
type JSONObject = { [key: string]: PrimitiveValue | JSONArray | JSONObject };
type JSONValue = PrimitiveValue | JSONArray | JSONObject;
其中,JSONValue代表了我们通过JSON.parse出现所有可能的类型
那么,非常自然地,对于三种基本类型,我们需要提供这样三个方法
名称 | |
---|---|
parseLiteral | 转换true , false , null |
parseNumber | 转换数字 |
parseString | 转换字符串 |
然后,对于能形成套娃结构的数组和对象,我们需要另外两个方法,parseObject
和parseArray
。
很显然,他们会间接地递归调用自身和转换数字、字符串、字面量的方法。我们定义一个parseJSONValue
方法,它会猜测接下来的值的类型,来调用合适的parse xxx
方法。
另外,我们还需要一个skipWhitespace
方法,来跳过空字符。他们对于JSON转换的结果是没有影响的
这样,我们的代码就有了一个大概的框架
class JSONParser {
private input: string;
constructor(input: string) {
this.input = input;
}
private skipWhitespace(cur?: number): number {}
private guessNextValueType(cur?: number): MaybeJSONValue
private parseLiteral(cur?: number): ParseResult<LiteralValue>
private parseNumber(cur?: number): ParseResult<number>
private parseString(cur?: number): ParseResult<string>
private parseObject(cur?: number): ParseResult<JSONObject>
private parseArray(cur?: number): ParseResult<JSONArray>
private parseJSON(cur?: number): ParseResult<JSONValue>
// 这是供外部调用的方法
parse(): string;
}
还需要一些类型和枚举的定义
type ParseResult<T extends JSONValue> = {
success: boolean;
// 如果转换成功,它的值表示值的最后一位在整个JSON字符串的位置
// 如果失败,它表示失败的那个位置
position: number;
value?: T;
};
enum MaybeJSONValue {
LITERAL,
NUMBER,
STRING,
ARRAY,
OBJECT,
UNKNOWN,
}
实现
我们约定,所有的parse方法中的cur
参数,代表了从输入字符串的何处开始往后进行转换操作。这些函数返回值中的position
字段,代表了当前解析成功或者失败时的位置。随着我们转换进度的推进,cur
的值会越来越大直到转换完成或转换出错。
parseLiteral
字面量的值是固定的。只需要用等号判断字面量的字符串,就能完成对字面量的转换。
private parseLiteral(cur = 0): ParseResult<LiteralValue> {
if (this.input[cur] === "t") {
if (this.input.substring(cur, cur + 4) === "true") {
return {
success: true,
position: cur + 3,
value: true,
};
}
} else if (this.input[cur] === "f") {
if (this.input.substring(cur, cur + 5) === "false") {
return {
success: true,
position: cur + 4,
value: false,
};
}
} else if (this.input[cur] === "n") {
if (this.input.substring(cur, cur + 4) === "null") {
return {
success: true,
position: cur + 3,
value: null,
};
}
}
return {
success: false,
position: cur,
};
}
parseString
有一点关于字符串长度的细节需要注意,'\n'.length === 1
,执行JSON.stringify('\n')
的结果是'\\n'
。显然,我们需要一个字典,将转换前的多个以'\\'
开头的字符转化为单个如\n
这样的字符。
const ESCAPE_CHAR_MAP = {
"\\\\": "\\",
'\\"': '"',
"\\b": "\b",
"\\f": "\f",
"\\n": "\n",
"\\r": "\r",
};
private parseString(cur = 0): ParseResult<string> {
if (this.input[cur] !== '"') {
return {
success: false,
position: cur,
};
}
let value = "";
cur++;
while (this.input[cur] !== '"') {
if (this.input[cur] === "\\") {
const maybeEscapeChar = this.input.slice(cur, cur + 2);
const ch = ESCAPE_CHAR_MAP[maybeEscapeChar];
if (ch) {
value += ch;
cur += 2;
continue;
} else {
return {
success: false,
position: cur,
};
}
}
value += this.input[cur];
cur++;
}
return {
success: true,
position: cur,
value,
};
}
parseNumber
数字的转化其实比想象中要复杂,JSON中的数字,有整数,普通的浮点数,还可以用如2e-6
这样科学计数法来表示的浮点数
但官方的文档中已经将各种情形都用图片表示出来了,尽管它的代码实现略长,我画了一个流程图帮助大家理解。
graph TD
A[parseNumber被调用] --> B{是否有负号}
B --> |是|C(读入负号)
B --> |否|D{是否以0开头}
C --> D
D --> |是|E[读入0]
D --> |否|F[读入非0数字]
F --> G{是否有小数点}
E --> G
G --> |是|H[读入小数点]
H --> I[读入小数点后数字]
I --> J
G --> |否|J{是否有字符e或E}
J --> K[读入e]
K --> L{是否有正号或负号}
L --> |是|M[读入指数的正号或负号]
K -->|否| O
M --> O[读入科学计数法的指数]
J --> |否|P
O --> P[调用parseInt或者parseFloat将读入的字符串转化成数字]
private parseNumber(cur = 0): ParseResult<number> {
const parseDigit = (cur: number, allowLeadingZero: boolean) => {
let dights = "";
if (!allowLeadingZero && this.input[cur] === "0") {
return ["", cur] as const;
}
let allowZero = allowLeadingZero;
while (
(allowZero ? "0" : "1") <= this.input[cur] &&
this.input[cur] <= "9"
) {
dights += this.input[cur];
cur++;
allowZero = true;
}
return [dights, cur - 1] as const;
};
let value = "";
let isFloat = false;
// 负号
if (this.input[cur] === "-") {
value += "-";
cur++;
}
// 小数点前的数字
if (this.input[cur] === "0") {
value += "0";
} else {
const [dights, endCur] = parseDigit(cur, false);
// 非法情形1,以非数字开头或以多个0开头
if (dights.length === 0) {
return {
success: false,
position: cur,
};
}
value += dights;
cur = endCur;
}
// 小数点
if (this.input[cur + 1] === ".") {
isFloat = true;
value += ".";
cur++;
// 此时input[cur]是小数点
// 移动到小数点之后的位置
const [dights, endCur] = parseDigit(cur + 1, true);
// 非法情形2,小数点后没有数字了
if (dights.length === 0) {
return {
success: false,
position: cur,
};
}
value += dights;
cur = endCur;
}
// 科学计数法的指数
if (this.input[cur + 1] === "e" || this.input[cur + 1] === "E") {
isFloat = true;
value += "e";
cur++;
// 此时this.input[cur]是e或E
if (this.input[cur + 1] === "+" || this.input[cur + 1] === "-") {
cur++;
value += this.input[cur];
// 此时this.input[cur]是符号
}
const [dights, endCur] = parseDigit(cur + 1, false);
// 非法情形3,E后面没有指数
if (dights.length === 0) {
return {
success: false,
position: cur,
};
}
value += dights;
cur = endCur;
}
return {
success: true,
value: isFloat ? parseFloat(value) : parseInt(value, 10),
position: cur,
};
}
parseJSON
和guessNextValueType
我们需要通过guessNextValueType
来对后面出现的JSONValue的具体类型做一个猜测,这个猜测的逻辑非常简单,如果是以'['
开头便是数组,以'"'
开头便是字符串。其它类型同理。当我们猜不到的时候,说明传入的JSON字符串是不合法的。
private guessNextValueType(cur = 0): MaybeJSONValue {
const leadingChar = this.input[cur];
if (/[-0-9]/.test(leadingChar)) {
return MaybeJSONValue.NUMBER;
}
switch (leadingChar) {
case "[":
return MaybeJSONValue.ARRAY;
case "{":
return MaybeJSONValue.OBJECT;
case '"':
return MaybeJSONValue.STRING;
case "n":
return MaybeJSONValue.LITERAL;
case "t":
return MaybeJSONValue.LITERAL;
case "f":
return MaybeJSONValue.LITERAL;
default:
return MaybeJSONValue.UNKNOWN;
}
}
private parseJSON(cur = 0): ParseResult<JSONValue> {
const valueType = this.guessNextValueType(cur);
switch (valueType) {
case MaybeJSONValue.NUMBER:
return this.parseNumber(cur);
case MaybeJSONValue.ARRAY:
return this.parseArray(cur);
case MaybeJSONValue.OBJECT:
return this.parseObject(cur);
case MaybeJSONValue.STRING:
return this.parseString(cur);
case MaybeJSONValue.LITERAL:
return this.parseLiteral(cur);
case MaybeJSONValue.UNKNOWN:
return {
success: false,
position: cur,
};
}
}
parseArray
当我们有上面的parseJSON
之后,parseArray
的实现也变得比较简单,我们要做的就是读取左方括号[
后不断地调用parseJSON
和处理分割元素的逗号,将转换完成的元素置入一个数组之中。
private parseArray(cur = 0): ParseResult<JSONArray> {
if (this.input[cur] !== "[") {
return {
success: false,
position: cur,
};
}
const result: JSONArray = [];
cur++;
let isFirstItem = true;
while (this.input[cur] !== "]") {
cur = this.skipWhitespace(cur);
if (!isFirstItem) {
if (this.input[cur] !== ",") {
return {
success: false,
position: cur,
};
}
cur++;
}
const itemResult = this.parseJSON(cur);
if (!itemResult.success) {
return itemResult as ParseResult<JSONArray>;
}
cur = itemResult.position + 1;
result.push(itemResult.value!);
isFirstItem = false;
}
return {
success: true,
position: cur,
value: result,
};
}
parseObject
同理,我们要做的事情,和上面的parseArray
大同小异,只不过object是有键的,我们每一轮循环还需要先调用parseString
取到对象的键,接着调用parseJSON
取得值,最后将键和它的值设置到结果中。
private parseObject(cur = 0): ParseResult<JSONObject> {
if (this.input[cur] !== "{") {
return {
success: false,
position: cur,
};
}
const result: JSONObject = {};
let isFirstItem = true;
cur++;
cur = this.skipWhitespace(cur);
while (this.input[cur] !== "}") {
cur = this.skipWhitespace(cur);
if (!isFirstItem) {
if (this.input[cur] !== ",") {
return {
success: false,
position: cur,
};
}
cur++;
}
const keyResult = this.parseString(cur);
if (!keyResult.success) {
return keyResult as unknown as ParseResult<JSONObject>;
}
cur = keyResult.position;
cur = this.skipWhitespace(cur);
cur++;
if (this.input[cur] !== ":") {
return {
success: false,
position: cur,
};
}
const valueResult = this.parseJSON(cur + 1);
result[keyResult.value!] = valueResult.value;
isFirstItem = false;
cur = valueResult.position + 1;
}
return {
success: true,
value: result,
position: cur,
};
}
parse
方法和用来测试的test
方法
public parse() {
const result = this.parseJSON();
if (result.success) {
return result.value!;
} else {
throw new Error(`parse error at ${result.position}`);
}
}
function test(input: JSONValue) {
const parser = new JSONParser(JSON.stringify(input));
const result = parser.parse();
if (JSON.stringify(result) !== JSON.stringify(input)) {
throw new Error(`${JSON.stringify(result)} !== ${JSON.stringify(input)}`);
}
}
最后我们来进行简单的测试
// 数字
test(0.1);
test(1.1);
test(0);
test(-1);
test(+2);
test(+1e2);
test(+1e-2);
test(123456);
test(1.23456e2);
// 字符串
test("");
test("Hello, world");
test("\n");
test("\b");
test("\f");
test("\r");
test("\\\\\\");
test('"');
test('\\\\"');
// 字面量
test(null);
test(true);
test(false);
// 数组
test([]);
test([0, null, undefined, true, false, "", [], [[], []], {}, { value: {} }]);
// 对象
test({
number: 1,
string: "",
array: [],
object: {},
null: null,
boolean: true,
nested: {
number: 1,
string: "",
array: [123],
object: {},
null: null,
boolean: true,
},
});
我使用了Visual Studio Code的Quokka扩展,它能实时响应代码的变化,并会在正确运行的代码左侧做出标记。我们的测试是没有问题的
完整代码
type LiteralValue = boolean | null;
type PrimitiveValue = number | LiteralValue | string;
type JSONArray = (PrimitiveValue | JSONArray | JSONObject)[];
type JSONObject = { [key: string]: PrimitiveValue | JSONArray | JSONObject };
type JSONValue = PrimitiveValue | JSONArray | JSONObject;
type ParseResult<T extends JSONValue> = {
success: boolean;
// 如果转换成功,它的值表示值的最后一位在整个JSON字符串的位置
// 如果失败,它表示失败的那个位置
position: number;
value?: T;
};
enum MaybeJSONValue {
LITERAL,
NUMBER,
STRING,
ARRAY,
OBJECT,
UNKNOWN,
}
const ESCAPE_CHAR_MAP = {
"\\\\": "\\",
'\\"': '"',
"\\b": "\b",
"\\f": "\f",
"\\n": "\n",
"\\r": "\r",
};
class JSONParser {
private input: string;
constructor(input: string) {
this.input = input;
}
private parseLiteral(cur = 0): ParseResult<LiteralValue> {
if (this.input[cur] === "t") {
if (this.input.substring(cur, cur + 4) === "true") {
return {
success: true,
position: cur + 3,
value: true,
};
}
} else if (this.input[cur] === "f") {
if (this.input.substring(cur, cur + 5) === "false") {
return {
success: true,
position: cur + 4,
value: false,
};
}
} else if (this.input[cur] === "n") {
if (this.input.substring(cur, cur + 4) === "null") {
return {
success: true,
position: cur + 3,
value: null,
};
}
}
return {
success: false,
position: cur,
};
}
private parseNumber(cur = 0): ParseResult<number> {
const parseDigit = (cur: number, allowLeadingZero: boolean) => {
let dights = "";
if (!allowLeadingZero && this.input[cur] === "0") {
return ["", cur] as const;
}
let allowZero = allowLeadingZero;
while (
(allowZero ? "0" : "1") <= this.input[cur] &&
this.input[cur] <= "9"
) {
dights += this.input[cur];
cur++;
allowZero = true;
}
return [dights, cur - 1] as const;
};
let value = "";
let isFloat = false;
// 负号
if (this.input[cur] === "-") {
value += "-";
cur++;
}
// 小数点前的数字
if (this.input[cur] === "0") {
value += "0";
} else {
const [dights, endCur] = parseDigit(cur, false);
// 非法情形1,以非数字开头或以多个0开头
if (dights.length === 0) {
return {
success: false,
position: cur,
};
}
value += dights;
cur = endCur;
}
// 小数点
if (this.input[cur + 1] === ".") {
isFloat = true;
value += ".";
cur++;
// 此时input[cur]是小数点
// 移动到小数点之后的位置
const [dights, endCur] = parseDigit(cur + 1, true);
// 非法情形2,小数点后没有数字了
if (dights.length === 0) {
return {
success: false,
position: cur,
};
}
value += dights;
cur = endCur;
}
// 科学计数法的指数
if (this.input[cur + 1] === "e" || this.input[cur + 1] === "E") {
isFloat = true;
value += "e";
cur++;
// 此时this.input[cur]是e或E
if (this.input[cur + 1] === "+" || this.input[cur + 1] === "-") {
cur++;
value += this.input[cur];
// 此时this.input[cur]是符号
}
const [dights, endCur] = parseDigit(cur + 1, false);
// 非法情形3,E后面没有指数
if (dights.length === 0) {
return {
success: false,
position: cur,
};
}
value += dights;
cur = endCur;
}
return {
success: true,
value: isFloat ? parseFloat(value) : parseInt(value, 10),
position: cur,
};
}
private parseString(cur = 0): ParseResult<string> {
if (this.input[cur] !== '"') {
return {
success: false,
position: cur,
};
}
let value = "";
cur++;
while (this.input[cur] !== '"') {
if (this.input[cur] === "\\") {
const maybeEscapeChar = this.input.slice(cur, cur + 2);
const ch = ESCAPE_CHAR_MAP[maybeEscapeChar];
if (ch) {
value += ch;
cur += 2;
continue;
} else {
return {
success: false,
position: cur,
};
}
}
value += this.input[cur];
cur++;
}
return {
success: true,
position: cur,
value,
};
}
private skipWhitespace(cur = 0): number {
const isWhitespace = (cur: string) => {
return (
cur === "\u0009" ||
cur === "\u000A" ||
cur === "\u000D" ||
cur === "\u0020"
);
};
while (isWhitespace(this.input[cur])) {
cur++;
}
return cur;
}
private parseArray(cur = 0): ParseResult<JSONArray> {
if (this.input[cur] !== "[") {
return {
success: false,
position: cur,
};
}
const result: JSONArray = [];
cur++;
let isFirstItem = true;
while (this.input[cur] !== "]") {
cur = this.skipWhitespace(cur);
if (!isFirstItem) {
if (this.input[cur] !== ",") {
return {
success: false,
position: cur,
};
}
cur++;
}
const itemResult = this.parseJSON(cur);
if (!itemResult.success) {
return itemResult as ParseResult<JSONArray>;
}
cur = itemResult.position + 1;
result.push(itemResult.value!);
isFirstItem = false;
}
return {
success: true,
position: cur,
value: result,
};
}
private parseObject(cur = 0): ParseResult<JSONObject> {
if (this.input[cur] !== "{") {
return {
success: false,
position: cur,
};
}
const result: JSONObject = {};
let isFirstItem = true;
cur++;
cur = this.skipWhitespace(cur);
while (this.input[cur] !== "}") {
cur = this.skipWhitespace(cur);
if (!isFirstItem) {
if (this.input[cur] !== ",") {
return {
success: false,
position: cur,
};
}
cur++;
}
const keyResult = this.parseString(cur);
if (!keyResult.success) {
return keyResult as unknown as ParseResult<JSONObject>;
}
cur = keyResult.position;
cur = this.skipWhitespace(cur);
cur++;
if (this.input[cur] !== ":") {
return {
success: false,
position: cur,
};
}
const valueResult = this.parseJSON(cur + 1);
result[keyResult.value!] = valueResult.value;
isFirstItem = false;
cur = valueResult.position + 1;
}
return {
success: true,
value: result,
position: cur,
};
}
private guessNextValueType(cur = 0): MaybeJSONValue {
const leadingChar = this.input[cur];
if (/[-0-9]/.test(leadingChar)) {
return MaybeJSONValue.NUMBER;
}
switch (leadingChar) {
case "[":
return MaybeJSONValue.ARRAY;
case "{":
return MaybeJSONValue.OBJECT;
case '"':
return MaybeJSONValue.STRING;
case "n":
return MaybeJSONValue.LITERAL;
case "t":
return MaybeJSONValue.LITERAL;
case "f":
return MaybeJSONValue.LITERAL;
default:
return MaybeJSONValue.UNKNOWN;
}
}
private parseJSON(cur = 0): ParseResult<JSONValue> {
const valueType = this.guessNextValueType(cur);
switch (valueType) {
case MaybeJSONValue.NUMBER:
return this.parseNumber(cur);
case MaybeJSONValue.ARRAY:
return this.parseArray(cur);
case MaybeJSONValue.OBJECT:
return this.parseObject(cur);
case MaybeJSONValue.STRING:
return this.parseString(cur);
case MaybeJSONValue.LITERAL:
return this.parseLiteral(cur);
case MaybeJSONValue.UNKNOWN:
return {
success: false,
position: cur,
};
}
}
public parse() {
const result = this.parseJSON();
if (result.success) {
return result.value!;
} else {
throw new Error(`parse error at ${result.position}`);
}
}
}
function test(input: JSONValue) {
const parser = new JSONParser(JSON.stringify(input));
const result = parser.parse();
if (JSON.stringify(result) !== JSON.stringify(input)) {
throw new Error(`${JSON.stringify(result)} !== ${JSON.stringify(input)}`);
}
}
// 数字
test(0.1);
test(1.1);
test(0);
test(-1);
test(+2);
test(+1e2);
test(+1e-2);
test(123456);
test(1.23456e2);
// 字符串
test("");
test("Hello, world");
test("\n");
test("\b");
test("\f");
test("\r");
test("\\\\\\");
test('"');
test('\\\\"');
// 字面量
test(null);
test(true);
test(false);
// 数组
test([]);
test([0, null, undefined, true, false, "", [], [[], []], {}, { value: {} }]);
// 对象
test({
number: 1,
string: "",
array: [],
object: {},
null: null,
boolean: true,
nested: {
number: 1,
string: "",
array: [123],
object: {},
null: null,
boolean: true,
},
});