记一次SQL迁移的脚本编写以及.csv文件的坑

268 阅读2分钟

背景

  • 旧库数据迁移到新库,部分字段不再需要

思路

  1. 导出数据库.csv文件
  2. 读取原本列名(.csv文件第一行)[]string
  3. 读取原本数据(.csv文件剩余数据)[]map[string]string:key-列名, val-数据,一个map对应一行数据
  4. 设定好新库中列名,去上述每个map中取值,拼接成SQL INSERT语句

.csv文件的一个坑

  • 列名的第一个值永远拿不到,row[0]内的值和想象中不一样
  • eg:第一列列名为id,应该row[0] = "id",但是并不相等
  • 最终排查结果:
len(row[0])
  • row[0]并不等于2,而是5,.csv文件开头有\ufeff的隐藏字符
//处理BOW头,去掉/ufeff
row[0] = row[0][3:]

示例代码

package main

import (
   "encoding/csv"
   "fmt"
   "io"
   "log"
   "os"
   "unicode"
)

func ReadCsv(fileName string) (datas []map[string]string) {
   fs, err := os.Open(fileName)
   if err != nil {
      return
   }
   defer fs.Close()
   r := csv.NewReader(fs)
   row, err := r.Read()
   if err != nil && err != io.EOF {
      log.Fatalf("%+v", err)
      return
   }

   //处理BOW头,去掉/ufeff
   row[0] = row[0][3:]
   rowName := row

   datas = []map[string]string{}
   for {
      row, err = r.Read()
      if err != nil && err != io.EOF {
         log.Fatalf("%+v", err)
         return
      }
      if err == io.EOF {
         break
      }
      data := make(map[string]string)
      loc := 0
      for i := range row {
         data[rowName[loc]] = row[i]
         loc++
      }
      datas = append(datas, data)
   }
   return datas
}

func insertSQL(tableName string, lists []string, datas []map[string]string) (ans []string) {
   for _, data := range datas {
      rowName := ""
      rowData := ""

      for i, list := range lists {
         rowName += list
         if data[list] == "" {
            rowData += "'" + "', "
            continue
         }
         isDigitFlag := true
         for i, x := range data[list] {
            if i == 0 {
               if !(x == '-' || unicode.IsDigit(x)) {
                  isDigitFlag = true
               }
            } else {
               if !(unicode.IsDigit(x)) {
                  isDigitFlag = false
               }
            }
         }
         if isDigitFlag {
            rowData += data[list]
         } else {
            rowData += "'" + data[list] + "'"
         }

         //TODO:2和TODO3一起修改,结尾,修改
         if i != len(lists)-1 {
            rowName += "'"
            rowData += "'"
         }
         rowName += "'"
         rowData += "'"
      }
      //TODO:3看是否需要写死时间,csv文件解析出来的时间格式不对
      rowName += "create_time, update_time"
      rowData += "'2023-02-24T15:00:00', '2023-02-24T15:00:00'"
      tempAns := "insert into" + tableName + "(" + rowName + ")" + "values" + "(" + rowData + ");"
      ans = append(ans, tempAns)
   }
   return ans
}
func main() {
   //TODO:1修改文件参数
   CSVFileName := ""
   TableNAme := ""
   RowName := []string{}
   datas := ReadCsv(CSVFileName)
   sqls := insertSQL(TableNAme, RowName, datas)
   for _, sql := range sqls {
      fmt.Println(sql)
   }
}