golang数据压缩选型

629 阅读3分钟

背景

当数据量过大的时候,压缩成了一种解决问题的很好方式。在实际项目中,压缩需要考虑,压缩的速度,压缩的比率,资源的消息程度等等。综合考虑上述几个点,选出了gzip和snapy两种压缩算法,进行实际的测试

压缩原理介绍

gzip压缩原理:www.jianshu.com/p/4033028e5… snapy压缩原理:zzjw.cc/post/snappy…

测试结果

image.png image.png 可以看出,snapy是非常优秀的压缩算法

测试代码

package main

import (
	"bytes"
	"compress/gzip"
	"compress/zlib"
	"fmt"
	"io/ioutil"
	"os"
	"testing"
	"time"

	"github.com/golang/protobuf/proto"
	"github.com/golang/snappy"
)

var (
	marshal []byte
	fr      *os.File
)

func TestMain(m *testing.M) {
	msgbody := &PMsgBody{
		MsgId:       1231233,
		ConvType:    1,
		ClientMsgId: "w2eqqweqewqweeqw",
		InnerBody:   []byte("一二三四五六一二三四五六一二三四五六一二三四五六一二三四五六一二三四五六俄期间2克里斯蒂2框架离开家啊啥的空间"),
		InnerType:   123,
	}
	b, err := proto.Marshal(msgbody)
	if err != nil {
		fmt.Println(err)
		return
	}
	marshal = b
	// 打开本地gz格式压缩包
	f, err := os.Open("/Users/xxxxx/Downloads/bookmarks.html")
	if err != nil {
		panic(err)
	}
	fr = f
	m.Run()
}

func TestSnapyStruct(t *testing.T) {
	fmt.Println("before snapy:", len(marshal))
	start := time.Now()
	encode := snappy.Encode(nil, marshal)
	fmt.Println("snapy struct encode cost:", time.Now().Sub(start))
	fmt.Println("after snapy:", len(encode))
	start = time.Now()
	decode, err := snappy.Decode(nil, encode)
	fmt.Println("snappy struct decode cost:", time.Now().Sub(start))
	if err != nil {
		fmt.Println(err)
		return
	}
	u := &chat.PMsgBody{}
	err = proto.Unmarshal(decode, u)
	if err != nil {
		fmt.Println(err)
		return
	}
}

func TestGzipStruct(t *testing.T) {
	fmt.Println("before gzip", len(marshal))
	var buf bytes.Buffer
	start := time.Now()
	writer := gzip.NewWriter(&buf)
	_, err := writer.Write(marshal)
	if err != nil {
		fmt.Println(err)
		return
	}
	if err := writer.Flush(); err != nil {
		fmt.Println(err)
		return
	}
	if err := writer.Close(); err != nil {
		fmt.Println(err)
		return
	}
	fmt.Println("gzip struct encode cost:", time.Now().Sub(start))
	b := buf.Bytes()
	fmt.Println("after gzip:", len(b))

	// 创建一个新的 gzip.Reader
	start = time.Now()
	bytesReader := bytes.NewReader(b)
	gzipReader, err := gzip.NewReader(bytesReader)
	fmt.Println("gzip struct decode cost:", time.Now().Sub(start))
	if err != nil {
		fmt.Println(err)
		return
	}
	defer func() {
		// defer 中关闭 gzipReader
		_ = gzipReader.Close()
	}()
	all, err := ioutil.ReadAll(gzipReader)
	if err != nil {
		fmt.Println("ReadAll err:", err)
		return
	}
	u := &chat.PMsgBody{}
	err = proto.Unmarshal(all, u)
	if err != nil {
		fmt.Println(err)
		return
	}
}

func TestZlibStruct(t *testing.T) {
	fmt.Println("before zlib", len(marshal))
	var buf bytes.Buffer
	start := time.Now()
	writer := zlib.NewWriter(&buf)
	_, err := writer.Write(marshal)
	if err != nil {
		fmt.Println(err)
		return
	}
	if err := writer.Flush(); err != nil {
		fmt.Println(err)
		return
	}
	if err := writer.Close(); err != nil {
		fmt.Println(err)
		return
	}
	fmt.Println("zlib struct encode cost:", time.Now().Sub(start))
	b := buf.Bytes()
	fmt.Println("after zlib:", len(b))

	// 创建一个新的 gzip.Reader
	start = time.Now()
	bytesReader := bytes.NewReader(b)
	gzipReader, err := zlib.NewReader(bytesReader)
	fmt.Println("zlib struct decode cost:", time.Now().Sub(start))
	if err != nil {
		fmt.Println(err)
		return
	}
	defer func() {
		// defer 中关闭 gzipReader
		_ = gzipReader.Close()
	}()
	all, err := ioutil.ReadAll(gzipReader)
	if err != nil {
		fmt.Println("ReadAll err:", err)
		return
	}
	u := &chat.PMsgBody{}
	err = proto.Unmarshal(all, u)
	if err != nil {
		fmt.Println(err)
		return
	}
}

func TestGzipFile(t *testing.T) {
	all, err := ioutil.ReadAll(fr)
	if err != nil {
		fmt.Println(err)
		return
	}
	fmt.Println("before gzip file len:", len(all))
	var buf bytes.Buffer
	start := time.Now()
	writer := gzip.NewWriter(&buf)
	_, err = writer.Write(all)
	fmt.Println("gzip file encode cost:", time.Now().Sub(start))
	if err != nil {
		fmt.Println(err)
		return
	}
	if err := writer.Flush(); err != nil {
		fmt.Println(err)
		return
	}
	if err := writer.Close(); err != nil {
		fmt.Println(err)
		return
	}
	b := buf.Bytes()
	fmt.Println("after gzip file len:", len(b))
	// 创建一个新的 gzip.Reader
	start = time.Now()
	bytesReader := bytes.NewReader(b)
	gzipReader, err := gzip.NewReader(bytesReader)
	fmt.Println("gzip file decode cost:", time.Now().Sub(start))
	if err != nil {
		fmt.Println(err)
		return
	}
	defer func() {
		// defer 中关闭 gzipReader
		_ = gzipReader.Close()
	}()
}

func TestSnapyFile(t *testing.T) {
	// 打开本地gz格式压缩包
	all, err := ioutil.ReadAll(fr)
	if err != nil {
		fmt.Println(err)
		return
	}
	fmt.Println("before Snapy file len:", len(all))
	start := time.Now()
	encode := snappy.Encode(nil, all)
	fmt.Println("snapy file encode cost:", time.Now().Sub(start))
	fmt.Println("after Snapy file len:", len(encode))
	start = time.Now()
	_, err = snappy.Decode(nil, encode)
	fmt.Println("snappy struct decode cost:", time.Now().Sub(start))
	if err != nil {
		fmt.Println(err)
		return
	}
}

func TestZlibFile(t *testing.T) {
	// 打开本地gz格式压缩包
	all, err := ioutil.ReadAll(fr)
	if err != nil {
		fmt.Println(err)
		return
	}
	fmt.Println("before zlib file len:", len(all))
	var buf bytes.Buffer
	start := time.Now()
	writer := zlib.NewWriter(&buf)
	_, err = writer.Write(all)
	fmt.Println("zlib file encode cost:", time.Now().Sub(start))
	if err != nil {
		fmt.Println(err)
		return
	}
	if err := writer.Flush(); err != nil {
		fmt.Println(err)
		return
	}
	if err := writer.Close(); err != nil {
		fmt.Println(err)
		return
	}
	b := buf.Bytes()
	fmt.Println("after zlib file len:", len(b))
	// 创建一个新的 gzip.Reader
	start = time.Now()
	bytesReader := bytes.NewReader(b)
	gzipReader, err := zlib.NewReader(bytesReader)
	fmt.Println("zlib file decode cost:", time.Now().Sub(start))
	if err != nil {
		fmt.Println(err)
		return
	}
	defer func() {
		// defer 中关闭 gzipReader
		_ = gzipReader.Close()
	}()
}

func BenchmarkGzipFile(b *testing.B) {
	for n := 0; n < b.N; n++ {
		var buf bytes.Buffer
		all, _ := ioutil.ReadAll(fr)
		writer := gzip.NewWriter(&buf)
		_, _ = writer.Write(all)
		writer.Flush()
		writer.Close()
		b := buf.Bytes()
		bytesReader := bytes.NewReader(b)
		gzipReader, _ := gzip.NewReader(bytesReader)
		gzipReader.Close()
	}
}

func BenchmarkSnapyFile(b *testing.B) {
	for i := 0; i < b.N; i++ {
		// 打开本地gz格式压缩包
		all, _ := ioutil.ReadAll(fr)
		encode := snappy.Encode(nil, all)
		_, _ = snappy.Decode(nil, encode)
	}
}
func BenchmarkGzipStruct(b *testing.B) {
	for n := 0; n < b.N; n++ {
		var buf bytes.Buffer
		writer := gzip.NewWriter(&buf)
		_, _ = writer.Write(marshal)
		writer.Flush()
		writer.Close()
		b := buf.Bytes()
		bytesReader := bytes.NewReader(b)
		gzipReader, _ := gzip.NewReader(bytesReader)
		gzipReader.Close()
	}
}

func BenchmarkSnapyStruct(b *testing.B) {
	for i := 0; i < b.N; i++ {
		encode := snappy.Encode(nil, marshal)
		snappy.Decode(nil, encode)
	}
}