字符串遍历

byte 是 uint8 的别名

rune 是 int32 的别名，相当于 Go 里面的 char

如果包含汉字，以下遍历方式会出现乱码：

str := "你好世界！"

for i := 0; i < len(str); i++ {
  fmt.Printf("%c", str[i])
}
// ä½ å¥½ä¸çï¼%

解决方案1：转成 rune 切片再遍历

str := "你好世界！"
newStr := []rune(str)
for i := 0; i < len(newStr); i++ {
  fmt.Printf("%c", newStr[i])
}
// 你好世界！

解决方案2：使用 range 来遍历

range 按照字符遍历，前面的 for 按照字节遍历

str := "你好世界123"
for index, value := range str {
  fmt.Printf("index = %d value = %c\n", index, value)
}

/*
index = 0 value = 你
index = 3 value = 好
index = 6 value = 世
index = 9 value = 界
index = 12 value = 1
index = 13 value = 2
index = 14 value = 3
*/

strings 包

字符串比较：使用 strings.Compare 比较两个字符串的字典序

strings.Compare("aaa", "bbb") // -1
strings.Compare("baa", "abb") // 1
strings.Compare("aaa", "aaa") // 0

查找函数：使用 strings.Index 查找字符串中子串的位置（第 1 个），不存在返回 -1

strings.Index("hello world", "o") // 4

类似的，使用 strings.LastIndex 查找字符串子串出现的最后一个位置，不存在返回 -1

strings.Index("hello world", "o") // 4

Count、Repeat：

使用 strings.Count 统计子串在整体中出现的次数：

strings.Count("abc abc abab abc", "abc") // 3

使用 strings.Repeat 将字符串重复指定次数：

strings.Repeat("abc", 3) // abcabcabc

Replace、Split、Join：

strings.Replace 实现字符串替换

str := "acaacccc"

// 局部替换 param3: 替换次数，< 0 则全部替换
strings.Replace(str, "a", "b", 2)  // bcbacccc
strings.Replace(str, "a", "b", -1) // bcbbcccc

// 全部替换
strings.ReplaceAll(str, "a", "b")  // bcbbcccc

strings.Split 实现字符串切割

str := "abc,bbc,bbd"

slice := strings.Split(str, ",")
fmt.Println(slice) // [abc bbc bbd]

strings.Join 实现字符串拼接

slice := []string{"aab", "aba", "baa"}

str := strings.Join(slice, ",")
fmt.Println(str // aab,aba,baa

bytes 包

Buffer 是 bytes 包中定义的 type Buffer struct {...}，Bufer 是一个变长的可读可写的缓冲区。

创建缓冲器：bytes.NewBufferString、bytes.NewBuffer

func main() {
	buf1 := bytes.NewBufferString("hello")
	buf2 := bytes.NewBuffer([]byte("hello"))
	buf3 := bytes.NewBuffer([]byte{'h', 'e', 'l', 'l', 'o'})

	fmt.Printf("%v,%v,%v\n", buf1, buf2, buf3)
	fmt.Printf("%v,%v,%v\n", buf1.Bytes(), buf2.Bytes(), buf3.Bytes())

	buf4 := bytes.NewBufferString("")
	buf5 := bytes.NewBuffer([]byte{})
	fmt.Println(buf4.Bytes(), buf5.Bytes())
}


/*
hello,hello,hello
[104 101 108 108 111],[104 101 108 108 111],[104 101 108 108 111]
[] []
*/

写入缓冲器：Write、WriteString、WriteByte、WriteRune、WriteTo

func main() {
	buf := bytes.NewBufferString("a")
	fmt.Printf("%v, %v\n", buf.String(), buf.Bytes())
	// a, [97]

	buf.Write([]byte("b")) // Write
	buf.WriteString("c")   // WriteString
	buf.WriteByte('d')     // WriteByte
	buf.WriteRune('e')     // WriteRune
	fmt.Printf("%v, %v\n", buf.String(), buf.Bytes())
	// abcde, [97 98 99 100 101]
}

缓冲区原理介绍：Go 字节缓冲区底层以字节切片做存储，切片存在长度 len 与容量 cap

缓冲区从长度 len 的位置开始写，当 len > cap 时，会自动扩容
缓冲区从内置标记 off 位置开始读（off 始终记录读的起始位置）
当 off == len 时，表明缓冲区已读完，读完就重置缓冲区 len = off = 0

func main() {
	byteSlice := make([]byte, 20)
	byteSlice[0] = 1                                  // 将缓冲区第一个字节置1
	byteBuffer := bytes.NewBuffer(byteSlice)          // 创建20字节缓冲区 len = 20 off = 0
	c, _ := byteBuffer.ReadByte()                     // off+=1
	fmt.Printf("len:%d, c=%d\n", byteBuffer.Len(), c) // len = 20 off =1   打印c=1
	byteBuffer.Reset()                                // len = 0 off = 0
	fmt.Printf("len:%d\n", byteBuffer.Len())          // 打印len=0
	byteBuffer.Write([]byte("hello byte buffer"))     // 写缓冲区  len+=17
	fmt.Printf("len:%d\n", byteBuffer.Len())          // 打印len=17
	byteBuffer.Next(4)                                // 跳过4个字节 off+=4
	c, _ = byteBuffer.ReadByte()                      // 读第5个字节 off+=1
	fmt.Printf("第5个字节:%d\n", c)                    // 打印:111(对应字母o)    len=17 off=5
	byteBuffer.Truncate(3)                            // 将未字节数置为3        len=off+3=8   off=5
	fmt.Printf("len:%d\n", byteBuffer.Len())          // 打印len=3为未读字节数  上面len=8是底层切片长度
	byteBuffer.WriteByte(96)                          // len+=1=9 将y改成A
	byteBuffer.Next(3)                                // len=9 off+=3=8
	c, _ = byteBuffer.ReadByte()                      // off+=1=9    c=96
	fmt.Printf("第9个字节:%d\n", c)                    // 打印:96
}

缓冲区：

func main() {
	buf := &bytes.Buffer{}
	// 写缓冲区
	buf.WriteString("abc?def")
	// 从缓冲区读（分隔符为 ?）
	str, _ := buf.ReadString('?')

	fmt.Println("str = ", str)
	fmt.Println("buff = ", buf.String())
}


/*
str =  abc?
buff =  def
*/

缓冲区读数据：Read、ReadByte、ReadByes、ReadString、ReadRune、ReadFrom

strconv 包

字符串转 [ ]byte：

sum := []byte("hello")

字符串 ——> 整数：使用 strconv.Atoi 或 strconv.ParseInt

// 按照 10进制 转换，返回 int 类型
i, _ := strconv.Atoi("33234")
fmt.Printf("%T\n", i) // int

// param1：要转化的字符串
// param2：转换的进制，如 2,8,16,32
// param3：返回bit的大小（注意，字面量显示还是 int64）
i2, _ := strconv.ParseInt("33234", 10, 0)
fmt.Printf("%T\n", i2) // int64

字符串 ——> 浮点数：使用 strconv.ParseFloat

// 参数类似 ParseInt
val, _ := strconv.ParseFloat("33.33", 32)
fmt.Printf("type: %T\n", val) // type: float64

val2, _ := strconv.ParseFloat("33.33", 64)
fmt.Printf("type: %T\n", val2) // type: float64

整数 —> 字符串：使用 strconv.Iota 或 strconv.FormatInt

num := 180

// 默认按照10进制转换
f1 := strconv.Itoa(num)

// param1: 要转换的数字(必须是int64类型)
// param2: 转换的进制
f2 := strconv.FormatInt(int64(num), 10)

浮点数 —> 整数：使用 strconv.FormatFloat

num := 23423134.323422
fmt.Println(strconv.FormatFloat(float64(num), 'f', -1, 64)) // 普通模式
fmt.Println(strconv.FormatFloat(float64(num), 'b', -1, 64)) // 二进制模式
fmt.Println(strconv.FormatFloat(float64(num), 'e', -1, 64)) // 科学记数法
fmt.Println(strconv.FormatFloat(float64(num), 'E', -1, 64)) // 同上，显示为E
fmt.Println(strconv.FormatFloat(float64(num), 'g', -1, 64)) // 指数大时用科学记数，否则普通模式
fmt.Println(strconv.FormatFloat(float64(num), 'G', -1, 64)) // 同上，显示为E


/*
23423134.323422
6287599743057036p-28
2.3423134323422e+07
2.3423134323422E+07
2.3423134323422e+07
2.3423134323422E+07
*/

字符串和 bool 类型转换：

// string --> bool
flagBool, _ := strconv.ParseBool("true")
// It accepts 1, t, T, TRUE, true, True, 0, f, F, FALSE, false, False.
// Any other value returns an error.

// bool --> string
flagStr := strconv.FormatBool(true)

unicode 包

/src/unicode/letter.go：

// 判断字符 r 是否为大写格式
func IsUpper(r rune) bool

// 判断字符 r 是否为小写格式
func IsLower(r rune) bool

// 判断字符 r 是否为 Unicode 规定的 Title 字符
// 大部分字符的 Title 格式就是其大写格式
// 只有少数字符的 Title 格式是特殊字符
// 这里判断的就是特殊字符
func IsTitle(r rune) bool

// ToUpper 将字符 r 转换为大写格式
func ToUpper(r rune) rune

// ToLower 将字符 r 转换为小写格式
func ToLower(r rune) rune

// ToTitle 将字符 r 转换为 Title 格式
// 大部分字符的 Title 格式就是其大写格式
// 只有少数字符的 Title 格式是特殊字符
func ToTitle(r rune) rune

// To 将字符 r 转换为指定的格式
// _case 取值：UpperCase、LowerCase、TitleCase
func To(_case int, r rune) rune

/src/unicode/digit.go：

// IsDigit 判断 r 是否为一个十进制的数字字符
func IsDigit(r rune) bool

/src/unicode/graphic.go：

// IsNumber 判断 r 是否为一个数字字符 (类别 N)
func IsNumber(r rune) bool

// IsLetter 判断 r 是否为一个字母字符 (类别 L)
// 汉字也是一个字母字符
func IsLetter(r rune) bool

// IsSpace 判断 r 是否为一个空白字符
// 在 Latin-1 字符集中，空白字符为：\t, \n, \v, \f, \r,
// 空格, U+0085 (NEL), U+00A0 (NBSP)
// 其它空白字符的定义有“类别 Z”和“Pattern_White_Space 属性”
func IsSpace(r rune) bool

// IsControl 判断 r 是否为一个控制字符
// Unicode 类别 C 包含更多字符，比如代理字符
// 使用 Is(C, r) 来测试它们
func IsControl(r rune) bool

// IsGraphic 判断字符 r 是否为一个“图形字符”
// “图形字符”包括字母、标记、数字、标点、符号、空格
// 他们分别对应于 L、M、N、P、S、Zs 类别
// 这些类别是 RangeTable 类型，存储了相应类别的字符范围
func IsGraphic(r rune) bool

// IsPrint 判断字符 r 是否为 Go 所定义的“可打印字符”
// “可打印字符”包括字母、标记、数字、标点、符号和 ASCII 空格
// 他们分别对应于 L, M, N, P, S 类别和 ASCII 空格
// “可打印字符”和“图形字符”基本是相同的，不同之处在于
// “可打印字符”只包含 Zs 类别中的 ASCII 空格（U+0020）
func IsPrint(r rune) bool

// IsPunct 判断 r 是否为一个标点字符 (类别 P)
func IsPunct(r rune) bool

// IsSymbol 判断 r 是否为一个符号字符
func IsSymbol(r rune) bool

// IsMark 判断 r 是否为一个 mark 字符 (类别 M)
func IsMark(r rune) bool

// IsOneOf 判断 r 是否在 set 范围内
func IsOneOf(set []*RangeTable, r rune) bool

Golang语言基础第二部分 | 青训营

字符串遍历

strings 包

bytes 包

strconv 包

unicode 包