使用流式批量处理超大JSON数据

1,189 阅读1分钟

业务场景

公司从第三方下载的数据报告是一个json数组格式压缩文本文件,文件大小不固定,单个文件可能超过几百兆.直接转字符串再解析成对象的话占用大量内存.容易出现OOM.

数据格式:
[
    {
        "key1":"value1",
        "key2":"value2",
        "key3":"value3",
        "key4":"value4",
        "key5":"value5"
    },
    {
        "key1":"value1",
        "key2":"value2",
        "key3":"value3",
        "key4":"value4",
        "key5":"value5"
    },
    {
        "key1":"value1",
        "key2":"value2",
        "key3":"value3",
        "key4":"value4",
        "key5":"value5"
    },
    {
        "key1":"value1",
        "key2":"value2",
        "key3":"value3",
        "key4":"value4",
        "key5":"value5"
    },
    {
        "key1":"value1",
        "key2":"value2",
        "key3":"value3",
        "key4":"value4",
        "key5":"value5"
    }
]

解决方案

采用fastJson提供的jsonReader流来读取流

public void processReport(byte[] content) {
    try (InputStreamReader inputStreamReader = new InputStreamReader(new GZIPInputStream(new ByteArrayInputStream(content)))) {
            JSONReader jsonReader = new JSONReader(inputStreamReader);
            jsonReader.startArray();
            while (jsonReader.hasNext()) {
                jsonReader.startObject();
                      Entity entity =  new Entity().readJson();
                      // 逐个获取到对象,进行后续业务处理
                      // JSONReader 具体的使用方法可以自己找资料了解一下
                      // 这里注意一定不使用 json.readObject(Entity.class)  别使用反射,反射特别消耗内存
                jsonReader.endObject();
                if (report.getImpressions() == 0) {
                    continue;
                }
            }
            jsonReader.endArray();
            jsonReader.close();
        
    } catch (Exception e) {
        //处理异常
    }
}

上面用到的实体对象

import com.alibaba.fastjson.JSONReader;
import lombok.Data;

@Data
public class Entity {
    private String key1;
    private String key2;
    private String key3;
    private String key4;
    private String key5;

    public Entity readJson(JSONReader jsonReader) {
        while (jsonReader.hasNext()) {
            String key = jsonReader.readString();
            switch (key) {
                case "key1":
                    this.key1 = jsonReader.readString();
                    break;
                case "key2":
                    this.key2 = jsonReader.readString();
                    break;
                case "key3":
                    this.key3 = jsonReader.readString();
                    break;
                case "key4":
                    this.key4 = jsonReader.readString();
                    break;
                case "key5":
                    this.key5 = jsonReader.readString();
                    break;
                default:
                    break;
            }
        }
        return this;
    }
}