用Golang读取和解码一个大的JSON文件并以流的方式进行的方法

1,052 阅读2分钟

在这个例子中,我们将读取一个JSON文件并对其进行解码。然而,我们将以流的方式读取和解码该文件。这样做的原因是,我们不知道这个文件会有多大。它可能小到一千字节,大到几百兆字节或几千兆字节,等等。如果我们不得不将整个文件读入内存,应用程序可能会出现很多问题。这就是为什么我们在这里使用JSON流。

[  {    "id": 1,    "name": "user-1"  },  {    "id": 2,    "name": "user-2"  },  {    "id": 3,    "name": "user-3"  },  {    "id": 4,    "name": "user-4"  },  {    "id": 5,    "name": "user-5"  },  ...,  {    "id": 1000,    "name": "user-1000"  }]

用户

user.go

package user

type User struct {
	ID   int    `json:"id"`
	Name string `json:"name"`
}

stream.go

package user

import (
	"encoding/json"
	"fmt"
	"os"
)

// Entry represents each stream. If the stream fails, an error will be present.
type Entry struct {
	Error error
	User  User
}

// Stream helps transmit each streams withing a channel.
type Stream struct {
	stream chan Entry
}

// NewJSONStream returns a new `Stream` type.
func NewJSONStream() Stream {
	return Stream{
		stream: make(chan Entry),
	}
}

// Watch watches JSON streams. Each stream entry will either have an error or a
// User object. Client code does not need to explicitly exit after catching an
// error as the `Start` method will close the channel automatically.
func (s Stream) Watch() <-chan Entry {
	return s.stream
}

// Start starts streaming JSON file line by line. If an error occurs, the channel
// will be closed.
func (s Stream) Start(path string) {
	// Stop streaming channel as soon as nothing left to read in the file.
	defer close(s.stream)

	// Open file to read.
	file, err := os.Open(path)
	if err != nil {
		s.stream <- Entry{Error: fmt.Errorf("open file: %w", err)}
		return
	}
	defer file.Close()

	decoder := json.NewDecoder(file)

	// Read opening delimiter. `[` or `{`
	if _, err := decoder.Token(); err != nil {
		s.stream <- Entry{Error: fmt.Errorf("decode opening delimiter: %w", err)}
		return
	}

	// Read file content as long as there is something.
	i := 1
	for decoder.More() {
		var user User
		if err := decoder.Decode(&user); err != nil {
			s.stream <- Entry{Error: fmt.Errorf("decode line %d: %w", i, err)}
			return
		}
		s.stream <- Entry{User: user}

		i++
	}

	// Read closing delimiter. `]` or `}`
	if _, err := decoder.Token(); err != nil {
		s.stream <- Entry{Error: fmt.Errorf("decode closing delimiter: %w", err)}
		return
	}
}

main.go

package main

import (
	"log"

	"github.com/you/internal/user"
)

func main() {
	stream := user.NewJSONStream()
	go func() {
		for data := range stream.Watch() {
			if data.Error != nil {
				log.Println(data.Error)
			}
			log.Println(data.User.ID, ":", data.User.Name)
		}
	}()
	stream.Start("users.json")
}

测试

正如你在下面看到的,每个JSON块都被逐一打印出来:

$ go run -race .
2021/04/15 22:45:23 1 : user-1
2021/04/15 22:45:23 2 : user-2
2021/04/15 22:45:23 3 : user-3
2021/04/15 22:45:23 4 : user-4
2021/04/15 22:45:23 5 : user-5
...
2021/04/15 22:45:24 1000 : user-1000