数据库表输出转sql

49 阅读2分钟

leetcode的SQL试题都是直接用的数据库的输出,不提供sql语句,如果想要进行本地测试就很难受了。

image.png 然后就手撸了一个解析然后输出sql的python脚本。具体如下:

"""
Activity

+--------------+---------+
| Column Name  | Type    |
+--------------+---------+
| player_id    | int     |
| device_id    | int     |
| event_date   | date    |
| games_played | int     |
+--------------+---------+
+-----------+-----------+------------+--------------+
| player_id | device_id | event_date | games_played |
+-----------+-----------+------------+--------------+
| 1         | 2         | 2016-03-01 | 5            |
| 1         | 2         | 2016-03-02 | 6            |
| 2         | 3         | 2017-06-25 | 1            |
| 3         | 1         | 2016-03-02 | 0            |
| 3         | 4         | 2018-07-03 | 5            |
+-----------+-----------+------------+--------------+
"""
from pathlib import Path


def read_data(str_var):
    path = Path(str_var)
    if path.exists():
        with open(path, "r") as f:
            data = f.read()
    else:
        data = str_var
    return data


def make_sql(tb_struct, data):
    tb_name = tb_struct.pop("tb_name")
    data_values = []
    create_tb_cols = [f"{c} {ct}" for c,ct in tb_struct.items()]
    col_types = list()
    cols = []
    for col in data[0]:
        coltype = tb_struct.get(col)
        if coltype is None:
            continue
        cols.append(col)
        col_types.append(coltype)
    for row in data[1:]:
        row_value = []
        for index, value in enumerate(row):
            if col_types[index] in ("date", "datetime"):
                value = f"'{value}'"
            elif value in ("null", "NULL", "Null"):
                value = ""
            row_value.append(value)
        
        data_values.append(",".join(row_value))

    drop_tb_sql = f"drop table if exists {tb_name};"
    insert_sql = f"""insert into {tb_name}({",".join(cols)}) values({"),(".join(data_values)});"""
    create_tb_sql = f"create table {tb_name}({','.join(create_tb_cols)});"
    return drop_tb_sql, insert_sql, create_tb_sql


def main():
    tb_schema = """Activity

    +--------------+---------+
    | Column Name  | Type    |
    +--------------+---------+
    | player_id    | int     |
    | device_id    | int     |
    | event_date   | date    |
    | games_played | int     |
    +--------------+---------+
    """
    # tb_schema 是表结构信息
    data_schema = r"D:\project\tmp\new_file.txt"   # 表数据信息
    
    tb_struct = dict()  # 存放表结构
    data = list()  # 存放表数据

    tb_schema_str = read_data(tb_schema)
    data_schema_str = read_data(data_schema)

    for index, line in enumerate(tb_schema_str.split("\n")):
        line2 = line.strip()
        if not line2:
            continue
        elif ("ColumnName" in line2) or ("Column Name" in line2):
            continue
        if (index == 0) and (not line2.startswith("+-")):
            tb_struct['tb_name'] = line2
            continue
        if line2.startswith("+-"):
            continue

        line3 = line2.replace(" ", "")[1:-1]
        col_name, col_type = line3.split("|")
        tb_struct[col_name] = col_type

    for index, line in enumerate(data_schema_str.split("\n")):
        line2 = line.strip()
        if not line2:
            continue
        elif "ColumnName" in line2:
            continue
        elif line2.startswith("+-"):
            continue
        elif "---" in line2:  # | --------- | --------- | ---------- | ------------ |
            continue
        line3 = line2[1:-1].replace(" ", "")
        data.append(line3.split("|"))

    drop_tb_sql, insert_sql, create_tb_sql = make_sql(tb_struct, data)
    # print(insert_sql)
    # print(create_tb_sql)
    with open("drop.sql", "w", encoding="utf-8") as f:
        f.write(drop_tb_sql)
    with open("insert.sql", "w", encoding="utf-8") as f:
        f.write(insert_sql)
    with open("create.sql", "w", encoding="utf-8") as f:
        f.write(create_tb_sql)