Go并行处理文件

56 阅读2分钟

1.创建假数据 我们将在C:\Users\admim\logs\这个文件目录下创建 循环创建5个文件夹,每个文件夹里面创建100个后缀名为.txt的文件,并向每个文件写入10000行"test (2) tees 4"数据,

代码

  
import (  
"fmt"  
"os"  
)  
  
func main() {  
dir := "C:\\Users\\admin\\logs\\"  
for i := 1; i <= 5; i++ {  
subDir := fmt.Sprintf("%s\\folder%d", dir, i)  
if err := os.MkdirAll(subDir, os.ModePerm); err != nil {  
panic(err)  
}  
for j := 1; j <= 100; j++ {  
fileName := fmt.Sprintf("%s\\file%d.txt", subDir, j)  
file, err := os.Create(fileName)  
if err != nil {  
panic(err)  
}  
for k := 1; k <= 10000; k++ {  
if _, err := file.WriteString("test (2) tees 4\n"); err != nil {  
panic(err)  
}  
}  
file.Close()  
}  
}  
fmt.Println("Done.")  
}

2.需求,获取文件里面每一行 "test (2) tees 4",把(2)的值全部加起来

3.不使用并发

  
import (  
"bufio"  
"fmt"  
"io"  
"io/fs"  
"log"  
"os"  
"path/filepath"  
"regexp"  
"strconv"  
"strings"  
"time"  
)  
  
var (  
compile = regexp.MustCompile(`\d+`)  
sum int32  
)  
  
func main() {  
  
begin := time.Now()  
ProcessDir("C:\\Users\\admin\\logs")  
fmt.Println(sum)  
fmt.Println("耗时 :", time.Since(begin))  
}  
  
func ExtractNumber(log string) int32 {  
indexs := compile.FindString(log)  
if len(indexs) > 0 {  
if n, err := strconv.Atoi(indexs); err != nil {  
return -1  
} else {  
return int32(n)  
}  
} else {  
return -1  
}  
}  
  
//处理目录  
func ProcessDir(file_path string) {  
dir := ListDir(file_path)  
for _, s := range dir {  
ProcessFile(s)  
}  
}  
  
func ProcessFile(file string) {  
open, err := os.Open(file)  
if err != nil {  
log.Fatalf("文件打开失败 %v", err)  
}  
defer open.Close()  
reader := bufio.NewReader(open)  
  
for {  
if readString, err := reader.ReadString('\n'); err != nil {  
if err == io.EOF {  
if len(readString) > 0 {  
number := ExtractNumber(readString)  
sum += number  
}  
break  
} else {  
log.Fatalf("读取文件异常 %v", err)  
break  
}  
} else {  
right := strings.TrimRight(readString, "\n")  
extractNumber := ExtractNumber(right)  
sum += extractNumber  
}  
  
}  
}  
  
func ListDir(path string) []string {  
files := make([]string, 0, 32)  
  
filepath.Walk(path, func(path string, info fs.FileInfo, err error) error {  
if err != nil {  
return err  
} else if info.Mode().IsDir() && path != path {  
files = append(files, ListDir(path)...)  
} else if info.Mode().IsRegular() {  
files = append(files, path)  
}  
return nil  
})  
return files  
}

使用并发读取:主要是修改了处理目录这块,把sum使用并发安全的atomic。可以对比下,速度提升了不少

  
import (  
"bufio"  
"fmt"  
"io"  
"io/fs"  
"log"  
"os"  
"path/filepath"  
"regexp"  
"strconv"  
"strings"  
"sync"  
"sync/atomic"  
"time"  
)  
  
var (  
compile = regexp.MustCompile(`\d+`)  
sum int32  
)  
  
func main() {  
  
begin := time.Now()  
ProcessDir("C:\\Users\\admin\\logs")  
fmt.Println(sum)  
fmt.Println("耗时 :", time.Since(begin))  
}  
  
func ExtractNumber(log string) int32 {  
indexs := compile.FindString(log)  
if len(indexs) > 0 {  
if n, err := strconv.Atoi(indexs); err != nil {  
return -1  
} else {  
return int32(n)  
}  
} else {  
return -1  
}  
}  
  
//处理目录  
func ProcessDir(file_path string) {  
dir := ListDir(file_path)  
  
i := len(dir)  
wait := sync.WaitGroup{}  
wait.Add(i)  
  
for _, s := range dir {  
go func(s string) {  
defer wait.Done()  
ProcessFile(s)  
}(s)  
}  
wait.Wait()  
}  
  
func ProcessFile(file string) {  
open, err := os.Open(file)  
if err != nil {  
log.Fatalf("文件打开失败 %v", err)  
}  
defer open.Close()  
reader := bufio.NewReader(open)  
  
for {  
if readString, err := reader.ReadString('\n'); err != nil {  
if err == io.EOF {  
if len(readString) > 0 {  
number := ExtractNumber(readString)  
atomic.AddInt32(&sum, number)  
}  
break  
} else {  
log.Fatalf("读取文件异常 %v", err)  
break  
}  
} else {  
right := strings.TrimRight(readString, "\n")  
extractNumber := ExtractNumber(right)  
atomic.AddInt32(&sum, extractNumber)  
}  
  
}  
}  
  
func ListDir(path string) []string {  
files := make([]string, 0, 32)  
  
filepath.Walk(path, func(path string, info fs.FileInfo, err error) error {  
if err != nil {  
return err  
} else if info.Mode().IsDir() && path != path {  
files = append(files, ListDir(path)...)  
} else if info.Mode().IsRegular() {  
files = append(files, path)  
}  
return nil  
})  
return files  
}

打印:C:\Users\admim\AppData\Local\JetBrains\GoLand2023.1\tmp\GoLand___go_build_gotest_test_file.exe 10000000 耗时 : 465.4588ms

Process finished with the exit code 0