Nim编程早茶
这一节,我们介绍,如何使用 Nimdata 进行对简单的 csv 文件进行数据处理。
读取 csv 文件
使用 Df.fromFile 来读取 csv 文件,take 表示取前几行数据, show 用来打印数据。
import nimdata
import re, sequtils, tables, json
var csv = DF.fromFile("city_code.csv")
csv.take(5).show()
输出:
CN101010100,beijing,北京,CN,China,中国,beijing,北京,beijing,北京,39.904987,116.40529,"110,100,110,000,100,000"
CN101010200,haidian,海淀,CN,China,中国,beijing,北京,beijing,北京,39.956074,116.31032,110108
CN101010300,chaoyang,朝阳,CN,China,中国,beijing,北京,beijing,北京,39.92149,116.48641,110105
CN101010400,shunyi,顺义,CN,China,中国,beijing,北京,beijing,北京,40.128937,116.65353,110113
CN101010500,huairou,怀柔,CN,China,中国,beijing,北京,beijing,北京,40.324272,116.63712,110116
创建列名
下面的代码说明了每一列的列名,,以及其对应的类型(strCol, intCol, floatStr)。我们使用 pipeline 操作,map(schemaParser(schema, ',')) 创建列名,projectTo 将 dataFrame 映射到 city_ID 以及 city_EN 两列。
const schema = [
strCol("city_ID"),
strCol("city_EN"),
strCol("city_CN"),
strCol("country_code"),
strCol("country_EN"),
strCol("country_CN"),
strCol("province_EN"),
strCol("province_CN"),
strCol("admin_district_EN"),
strCol("admin_district_CN"),
floatCol("latitude"),
floatCol("longitude"),
intCol("ad_code"),
]
let df = DF.fromFile("city_code.csv")
.map(schemaParser(schema, ','))
.map(record => record.projectTo(city_ID, city_EN))
获取每一列对应序列
我们还可以使用 collect 函数将单列 dataFrame 转换为对应类型的 seq。map(convertId) 对该列每一个元素施加 convertId 操作。convertId 也就是把 CN101010100 转换为 101010100。
proc convertId(x: string): string =
x.replace(re"\D+")
let id: seq[string] = df.map(record => record.city_ID).map(convertId).collect()
let city: seq[string] = df.map(record => record.city_EN).collect()
创建哈希表并序列化
%* 把哈希表转换位 JsonNode。parseJson 将字符串解析为 JsonNode 对象,to 将 jsonNode 转换为相应的对象。
var city_id: Table[string, string]
for pairs in zip(id, city):
let (i, c) = pairs
city_id[c] = i
var city_id_json = %* city_id
writeFile("city_id.txt", $city_id_json)
var f = open("city_id.txt", fmRead)
let jsonNode = parseJson(f.readAll())
var t = to(jsonNode, Table[string, string])
assert t == city_id
f.close()
完整代码
import nimdata
import re, sequtils, tables, json
proc convertId(x: string): string =
x.replace(re"\D+")
var csv = DF.fromFile("city_code.csv")
csv.take(5).show()
const schema = [
strCol("city_ID"),
strCol("city_EN"),
strCol("city_CN"),
strCol("country_code"),
strCol("country_EN"),
strCol("country_CN"),
strCol("province_EN"),
strCol("province_CN"),
strCol("admin_district_EN"),
strCol("admin_district_CN"),
floatCol("latitude"),
floatCol("longitude"),
intCol("ad_code"),
]
let df = DF.fromFile("city_code.csv")
.map(schemaParser(schema, ','))
.map(record => record.projectTo(city_ID, city_EN))
let id = df.map(record => record.city_ID).map(convertId).collect()
let city = df.map(record => record.city_EN).collect()
var city_id: Table[string, string]
for pairs in zip(id, city):
let (i, c) = pairs
city_id[c] = i
var city_id_json = %* city_id
writeFile("city_id.txt", $city_id_json)
var f = open("city_id.txt", fmRead)
let jsonNode = parseJson(f.readAll())
var t = to(jsonNode, Table[string, string])
echo t == city_id
f.close()