1.创建假数据 我们将在C:\Users\admim\logs\这个文件目录下创建 循环创建5个文件夹,每个文件夹里面创建100个后缀名为.txt的文件,并向每个文件写入10000行"test (2) tees 4"数据,
代码
import (
"fmt"
"os"
)
func main() {
dir := "C:\\Users\\admin\\logs\\"
for i := 1; i <= 5; i++ {
subDir := fmt.Sprintf("%s\\folder%d", dir, i)
if err := os.MkdirAll(subDir, os.ModePerm); err != nil {
panic(err)
}
for j := 1; j <= 100; j++ {
fileName := fmt.Sprintf("%s\\file%d.txt", subDir, j)
file, err := os.Create(fileName)
if err != nil {
panic(err)
}
for k := 1; k <= 10000; k++ {
if _, err := file.WriteString("test (2) tees 4\n"); err != nil {
panic(err)
}
}
file.Close()
}
}
fmt.Println("Done.")
}
2.需求,获取文件里面每一行 "test (2) tees 4",把(2)的值全部加起来
3.不使用并发
import (
"bufio"
"fmt"
"io"
"io/fs"
"log"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
)
var (
compile = regexp.MustCompile(`\d+`)
sum int32
)
func main() {
begin := time.Now()
ProcessDir("C:\\Users\\admin\\logs")
fmt.Println(sum)
fmt.Println("耗时 :", time.Since(begin))
}
func ExtractNumber(log string) int32 {
indexs := compile.FindString(log)
if len(indexs) > 0 {
if n, err := strconv.Atoi(indexs); err != nil {
return -1
} else {
return int32(n)
}
} else {
return -1
}
}
//处理目录
func ProcessDir(file_path string) {
dir := ListDir(file_path)
for _, s := range dir {
ProcessFile(s)
}
}
func ProcessFile(file string) {
open, err := os.Open(file)
if err != nil {
log.Fatalf("文件打开失败 %v", err)
}
defer open.Close()
reader := bufio.NewReader(open)
for {
if readString, err := reader.ReadString('\n'); err != nil {
if err == io.EOF {
if len(readString) > 0 {
number := ExtractNumber(readString)
sum += number
}
break
} else {
log.Fatalf("读取文件异常 %v", err)
break
}
} else {
right := strings.TrimRight(readString, "\n")
extractNumber := ExtractNumber(right)
sum += extractNumber
}
}
}
func ListDir(path string) []string {
files := make([]string, 0, 32)
filepath.Walk(path, func(path string, info fs.FileInfo, err error) error {
if err != nil {
return err
} else if info.Mode().IsDir() && path != path {
files = append(files, ListDir(path)...)
} else if info.Mode().IsRegular() {
files = append(files, path)
}
return nil
})
return files
}
使用并发读取:主要是修改了处理目录这块,把sum使用并发安全的atomic。可以对比下,速度提升了不少
import (
"bufio"
"fmt"
"io"
"io/fs"
"log"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
)
var (
compile = regexp.MustCompile(`\d+`)
sum int32
)
func main() {
begin := time.Now()
ProcessDir("C:\\Users\\admin\\logs")
fmt.Println(sum)
fmt.Println("耗时 :", time.Since(begin))
}
func ExtractNumber(log string) int32 {
indexs := compile.FindString(log)
if len(indexs) > 0 {
if n, err := strconv.Atoi(indexs); err != nil {
return -1
} else {
return int32(n)
}
} else {
return -1
}
}
//处理目录
func ProcessDir(file_path string) {
dir := ListDir(file_path)
i := len(dir)
wait := sync.WaitGroup{}
wait.Add(i)
for _, s := range dir {
go func(s string) {
defer wait.Done()
ProcessFile(s)
}(s)
}
wait.Wait()
}
func ProcessFile(file string) {
open, err := os.Open(file)
if err != nil {
log.Fatalf("文件打开失败 %v", err)
}
defer open.Close()
reader := bufio.NewReader(open)
for {
if readString, err := reader.ReadString('\n'); err != nil {
if err == io.EOF {
if len(readString) > 0 {
number := ExtractNumber(readString)
atomic.AddInt32(&sum, number)
}
break
} else {
log.Fatalf("读取文件异常 %v", err)
break
}
} else {
right := strings.TrimRight(readString, "\n")
extractNumber := ExtractNumber(right)
atomic.AddInt32(&sum, extractNumber)
}
}
}
func ListDir(path string) []string {
files := make([]string, 0, 32)
filepath.Walk(path, func(path string, info fs.FileInfo, err error) error {
if err != nil {
return err
} else if info.Mode().IsDir() && path != path {
files = append(files, ListDir(path)...)
} else if info.Mode().IsRegular() {
files = append(files, path)
}
return nil
})
return files
}
打印:C:\Users\admim\AppData\Local\JetBrains\GoLand2023.1\tmp\GoLand___go_build_gotest_test_file.exe 10000000 耗时 : 465.4588ms
Process finished with the exit code 0