本文已参与「新人创作礼」活动,一起开启掘金创作之路。
C#重写Lz4 JavaScript文本压缩算法(一) - 掘金 (juejin.cn)
C#重写Lz4 JavaScript文本压缩算法(二) - 掘金 (juejin.cn)
C#重写Lz4 JavaScript文本压缩算法(三) - 掘金 (juejin.cn)
util.js -> Util.cs
using System;
namespace Lz4CSharp
{
class Util
{
//相当于js中运算符‘>>>‘
public static UInt32 UInt32MoveRight(UInt32 x, int y)
{
UInt32 mask = 0x7fffffff; //Integer.MAX_VALUE
for (int i = 0; i < y; i++)
{
x >>= 1;
x &= mask;
}
return x;
}
// Simple hash function, from: http://burtleburtle.net/bob/hash/integer.html.
// Chosen because it doesn't use multiply and achieves full avalanche.
public static UInt32 hashU32(UInt32 a)
{
a |= 0;
a = a + 2127912214 + (a << 12) | 0;
a = (UInt32)(a ^ -949894596 ^ UInt32MoveRight(a, 19));
a = a + 374761393 + (a << 5) | 0;
a = (UInt32)(a + -744332180 ^ a << 9);
a = (UInt32)(a + -42973499 + (a << 3) | 0);
return (UInt32)(a ^ -1252372727 ^ UInt32MoveRight(a, 16) | 0);
}
// Reads a 64-bit little-endian integer from an array.
public static UInt64 readU64(UInt32[] b, int n)
{
UInt64 x = 0;
x |= b[n++] << 0;
x |= b[n++] << 8;
x |= b[n++] << 16;
x |= b[n++] << 24;
x |= b[n++] << 32;
x |= b[n++] << 40;
x |= b[n++] << 48;
x |= b[n++] << 56;
return x;
}
// Reads a 32-bit little-endian integer from an array.
public static UInt32 readU32(UInt32[] b, int n)
{
UInt32 x = 0;
x |= b[n++] << 0;
x |= b[n++] << 8;
x |= b[n++] << 16;
x |= b[n++] << 24;
return x;
}
// Writes a 32-bit little-endian integer from an array.
public static void writeU32(UInt32[] b, int n, UInt32 x)
{
b[n++] = (x >> 0) & 0xff;
b[n++] = (x >> 8) & 0xff;
b[n++] = (x >> 16) & 0xff;
b[n++] = (x >> 24) & 0xff;
}
// Multiplies two numbers using 32-bit integer multiplication.
// Algorithm from Emscripten.
public static UInt32 imul(UInt32 a, UInt32 b)
{
var ah = UInt32MoveRight(a, 16);
var al = a & 65535;
var bh = UInt32MoveRight(b, 16);
var bl = b & 65535;
return al * bl + (ah * bl + al * bh << 16) | 0;
}
}
}
测试
C#端
static void Main(string[] args)
{
Lz4.Init();
string str1 = "Hello World! 你好 世界!";
string str2 = "A painter hangs his or her finished picture on a wall, and everyone can see it. A composer writes a work, but no one can hear it until it is performed. Professional singers and players have great responsibilities, for the composer is utterly dependent on them. A student of music needs as long and as arduous a training to become a performer as a medical student needs to become a doctor. Most training is concerned with technique, for musicians have to have the muscular proficiency of an athlete or a ballet dancer. Singers practice breathing every day, as their vocal chords would be inadequate without controlled muscular support. String players practice moving the fingers of the left hand up and down, while drawing the bow to and fro with the right arm -- two entirely different movements.画家将已完成的作品挂在墙上,每个人都可以观赏到。 作曲家写完了一部作品,得由 演奏者将其演奏出来,其他人才能得以欣赏。因为作曲家是如此完全地依赖于职业歌手和职 业演奏者,所以职业歌手和职业演奏者肩上的担子可谓不轻。 一名学音乐的学生要想成为 一名演奏者,需要经受长期的、严格的训练,就象一名医科的学生要成为一名医生一样。 绝 大多数的训练是技巧性的。 音乐家们控制肌肉的熟练程度,必须达到与运动员或巴蕾舞演 员相当的水平。 歌手们每天都练习吊嗓子,因为如果不能有效地控制肌肉的话,他们的声 带将不能满足演唱的要求。 弦乐器的演奏者练习的则是在左手的手指上下滑动的同时,用 右手前后拉动琴弓--两个截然不同的动作。";
byte[] bytes = Encoding.UTF8.GetBytes(str2);
var compress = Lz4.compress(Array.ConvertAll<byte, UInt32>(bytes, input => { return (UInt32)input; }));
byte[] compressBytes = Array.ConvertAll<UInt32, byte>(compress, input => { return (byte)input; });
var base64Str = Convert.ToBase64String(compressBytes);
Console.WriteLine(base64Str);
var decompress = Lz4.decompress(Array.ConvertAll<byte, UInt32>(Convert.FromBase64String(base64Str), input => { return (UInt32)input; }));
byte[] decompressBytes = Array.ConvertAll<UInt32, byte>(decompress, input => { return (byte)input; });
string decompressString = Encoding.UTF8.GetString(decompressBytes);
Console.WriteLine(decompressString);
Console.ReadKey();
}
测试结果
JavaScript端
<script>
export default {
mounted() {
var lz4 = require("lz4js");
var str1 = "Hello World! 你好 世界!"
var str2 = 'A painter hangs his or her finished picture on a wall, and everyone can see it. A composer writes a work, but no one can hear it until it is performed. Professional singers and players have great responsibilities, for the composer is utterly dependent on them. A student of music needs as long and as arduous a training to become a performer as a medical student needs to become a doctor. Most training is concerned with technique, for musicians have to have the muscular proficiency of an athlete or a ballet dancer. Singers practice breathing every day, as their vocal chords would be inadequate without controlled muscular support. String players practice moving the fingers of the left hand up and down, while drawing the bow to and fro with the right arm -- two entirely different movements.画家将已完成的作品挂在墙上,每个人都可以观赏到。 作曲家写完了一部作品,得由 演奏者将其演奏出来,其他人才能得以欣赏。因为作曲家是如此完全地依赖于职业歌手和职 业演奏者,所以职业歌手和职业演奏者肩上的担子可谓不轻。 一名学音乐的学生要想成为 一名演奏者,需要经受长期的、严格的训练,就象一名医科的学生要成为一名医生一样。 绝 大多数的训练是技巧性的。 音乐家们控制肌肉的熟练程度,必须达到与运动员或巴蕾舞演 员相当的水平。 歌手们每天都练习吊嗓子,因为如果不能有效地控制肌肉的话,他们的声 带将不能满足演唱的要求。 弦乐器的演奏者练习的则是在左手的手指上下滑动的同时,用 右手前后拉动琴弓--两个截然不同的动作。'
var u8a = Buffer.from(str2);
var compressed = lz4.compress(u8a);
var base64Str = this.uint8arrayToBase64(compressed);
console.log(base64Str);
var decompressU8A = Uint8Array.from(atob(base64Str), c => c.charCodeAt(0));
var decompressed = lz4.decompress(decompressU8A);
// 由于部分浏览器对String栈有大小限制,下面这行解析字符串,对于本文的短字符串没有问题,但是对于长字符串就会导致栈溢出(可自行测试几兆的字符串,及不同的浏览器)
// var encodedString = String.fromCharCode.apply(null, decompressed);
// 使用分段解析可以避免出现栈溢出的问题
var encodedString = this.sliceReadArray(decompressed);
var decodedString = decodeURIComponent(escape(encodedString));
console.log(decodedString);
},
methods:{
uintToString(uintArray) {
var encodedString = String.fromCharCode.apply(null, uintArray),
decodedString = decodeURIComponent(escape(encodedString));
return decodedString;
},
uint8arrayToBase64(u8Arr) {
let CHUNK_SIZE = 0x8000; //arbitrary number
let index = 0;
let length = u8Arr.length;
let result = '';
let slice;
while (index < length) {
slice = u8Arr.subarray(index, Math.min(index + CHUNK_SIZE, length));
result += String.fromCharCode.apply(null, slice);
index += CHUNK_SIZE;
}
return btoa(result);
},
sliceReadArray(array){
var res = '';
var chunk = 8 * 1024;
var i;
for (i = 0; i < array.length / chunk; i++) {
res += String.fromCharCode.apply(null, array.slice(i * chunk, (i + 1) * chunk));
}
res += String.fromCharCode.apply(null, array.slice(i * chunk));
return res;
}
}
};
</script>
测试结果
不足之处
由于一开始的误判,将byte类型写为UInt32,导致C#在压缩及解压时,会占用较多的空间。