实习所做SQL

135 阅读4分钟

本文已参与「新人创作礼」活动,一起开启掘金创作之路。

需求:

将历史场所码数据,加工成应用所需要的表。

开发工具:

DataWork

难点:

原表中的数据结构不一致,需要根据不同的情况设计不同的SQL进行查询。

SQL:

CREATE TABLE csm_tmp_rxd_713

AS

SELECT DISTINCT *

FROM (

SELECT id,

       name AS place_name,

       CAST(place_code as BIGINT) AS place_code,

       '' AS place_type_id,

       area_code,

       region_name AS area_name,

       street_id AS street_code,

       street AS street_name,

       community_id AS community_code,

       community AS community_name,

       code_address AS addr,

       code_latitude AS lat,

       code_longitude AS lon,

       '' AS social_uni_code,

       manager_name,

       manager_contract,

       '' AS state,

       code,

       '' AS audit_status,

       CAST('' as datetime)  AS audit_at,

       '' AS is_deleted,

       CAST(gmt_create as datetime) AS create_at,

       creator AS create_by,

       CAST(gmt_modified as datetime) AS update_at,

       modifier AS update_by

FROM(SELECT (CASE WHEN code_address LIKE '%柯城%' THEN '330802000000' --提取村代码或者街道乡镇代码前6位,直接拼接成区县码,之后和区县码表join获得区县的名称。

                  WHEN street_id is null THEN CONCAT(SUBSTR(community_id,1,6),'000000' )

             ELSE CONCAT(SUBSTR(street_id,1,6),'000000' ) END)as area_code,

       id,

       name,

       code,

       place_code,

       code_address,

       code_latitude,

       code_longitude,

       gmt_modified,

       gmt_create,

       creator,

       modifier,

       street,

       street_id,

       community,

       community_id,

       manager_name,

       manager_contract

FROM(

    SELECT

       id,

       name,

       code,

       place_code,

       code_address,

       code_latitude,

       code_longitude,

       gmt_modified,

       gmt_create,

       creator,

       modifier,

       street,

       street_id,

       community,

       community_id,

       manager_name,

       manager_contract

FROM(

       (SELECT id,

               name,

               code,

               (scene_id * 100000000L) AS place_code,

               code_address,

               code_latitude,

               code_longitude,

               gmt_modified,

               gmt_create,

               creator,

               modifier,

               (CASE WHEN code_address LIKE '%.%.%.%' THEN SPLIT_PART(code_address,'.',3)

                     WHEN code_address LIKE '%街道%' THEN (CASE WHEN code_address LIKE '%区%街道%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'区',2),'街道',1),'街道')

                                                              ELSE CONCAT(SPLIT_PART(code_address,'街道',1),'街道') END)

                     WHEN code_address LIKE '%镇%' THEN (CASE WHEN code_address LIKE '%村%镇%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'村',2),'镇',1),'镇')

                                                              WHEN code_address LIKE '%区%镇%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'区',2),'镇',1),'镇')

                                                              ELSE CONCAT(SPLIT_PART(code_address,'镇',1),'镇') END)

                     WHEN code_address LIKE '%乡%' THEN (CASE WHEN code_address LIKE '%区%乡%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'区',2),'乡',1),'乡')

                                                              ELSE CONCAT(SPLIT_PART(code_address,'乡',1),'乡') END)

                     END) AS street,

               (CASE WHEN code_address LIKE '%.%.%.%村%' THEN (CASE WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村') LIKE '%乡%村' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村'),'乡',2),'村',1),'村')

                                                              WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村') LIKE '%镇%村' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村'),'镇',2),'村',1),'村')

                                                              WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村') LIKE '%街道%村' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村'),'街道',2),'村',1),'村')

                                                              WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村') LIKE '%市%村' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村'),'市',2),'村',1),'村')

                                                              WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村') LIKE '%省%村' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村'),'省',2),'村',1),'村')

                                                              ELSE CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村') END)

                     WHEN code_address LIKE '%.%.%.%社区%' THEN (CASE WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区') LIKE '%号%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区'),'号',2),'社区',1),'社区')

                                                              WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区') LIKE '%路%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区'),'路',2),'社区',1),'社区')

                                                              WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区') LIKE '%幢%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区'),'幢',2),'社区',1),'社区')

                                                              WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区') LIKE '%栋%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区'),'栋',2),'社区',1),'社区')

                                                              ELSE CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区') END) --上方SQL使用的CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区')为所得到的社区字符串 如"*****社区"

                     WHEN code_address LIKE '%.%.%.%委员会%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'委员会',1),'委员会')

                     WHEN code_address LIKE '%村%' THEN (CASE WHEN code_address LIKE '%社区%村%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'社区',2),'村',1),'村')

                                                              WHEN code_address LIKE '%乡%村%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'乡',2),'村',1),'村')

                                                              WHEN code_address LIKE '%镇%村%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'镇',2),'村',1),'村')

                                                              WHEN code_address LIKE '%街道%村%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'街道',2),'村',1),'村')

                                                              WHEN code_address LIKE '%区%村%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'区',2),'村',1),'村')

                                                              WHEN code_address LIKE '%市%村%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'市',2),'村',1),'村')

                                                              WHEN code_address LIKE '%省%村%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'省',2),'村',1),'村')

                                                              ELSE CONCAT(SPLIT_PART(code_address,'村',1),'村') END)

                     WHEN code_address LIKE '%社区%' THEN (CASE

                                                              WHEN code_address LIKE '%乡%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'乡',2),'社区',1),'社区')

                                                              WHEN code_address LIKE '%镇%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'镇',2),'社区',1),'社区')

                                                              WHEN code_address LIKE '%街道%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'街道',2),'社区',1),'社区')

                                                              WHEN code_address LIKE '%区%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'区',2),'社区',1),'社区')

                                                              ELSE CONCAT(SPLIT_PART(code_address,'社区',1),'社区') END)

                     WHEN code_address LIKE '%委员会%' THEN (CASE

                                                              WHEN code_address LIKE '%乡%委员会%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'乡',2),'委员会',1),'委员会')

                                                              WHEN code_address LIKE '%镇%委员会%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'镇',2),'委员会',1),'委员会')

                                                              WHEN code_address LIKE '%街道%委员会%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'街道',2),'委员会',1),'委员会')

                                                              WHEN code_address LIKE '%区%委员会%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'区',2),'委员会',1),'委员会')

                                                              ELSE CONCAT(SPLIT_PART(code_address,'委员会',1),'委员会') END)

                     END) AS community,

               (CASE

                     WHEN extra_data REGEXP '^[1][35678][0-9]{9}$' THEN ''

                     WHEN extra_data LIKE '%,%,%,%' THEN SPLIT_PART(extra_data,',',3)

                     WHEN extra_data LIKE '%,%' THEN SPLIT_PART(extra_data,',',1)

                     WHEN extra_data LIKE '%,%' THEN SPLIT_PART(extra_data,',',1)

                     WHEN extra_data LIKE '%-%' THEN SPLIT_PART(extra_data,'-',1)

                     WHEN LENGTH(extra_data) = 15 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN extra_data ELSE SUBSTR(extra_data,1,4) END ) --这里使用正则表达式先匹配,如果字符串不满足以手机号结尾但长度匹配,将字符串直接写入名字。

                     WHEN LENGTH(extra_data) = 14 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN extra_data ELSE SUBSTR(extra_data,1,3) END )

                     WHEN LENGTH(extra_data) = 13 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN extra_data ELSE SUBSTR(extra_data,1,2) END )

                     WHEN LENGTH(extra_data) = 12 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN extra_data ELSE SUBSTR(extra_data,1,1) END )

                     WHEN LENGTH(extra_data) = 11 THEN (CASE WHEN extra_data NOT REGEXP '[2378][0-9]{6}$' THEN extra_data ELSE SUBSTR(extra_data,1,4) END )

                     WHEN LENGTH(extra_data) = 10 THEN (CASE WHEN extra_data NOT REGEXP '[2378][0-9]{6}$' THEN extra_data ELSE SUBSTR(extra_data,1,3) END )

                     WHEN LENGTH(extra_data) = 9 THEN (CASE WHEN extra_data NOT REGEXP '[2378][0-9]{6}$' THEN extra_data ELSE SUBSTR(extra_data,1,2) END )

                     ELSE extra_data

        END) AS manager_name,

       (CASE

                     WHEN extra_data REGEXP '^[1][35678][0-9]{9}$' THEN extra_data

                     WHEN extra_data LIKE '%,%,%,%' THEN SPLIT_PART(extra_data,',',4)

                     WHEN extra_data LIKE '%,%' THEN SPLIT_PART(extra_data,',',2)

                     WHEN extra_data LIKE '%,%' THEN SPLIT_PART(extra_data,',',2)

                     WHEN extra_data LIKE '%-%' THEN SPLIT_PART(extra_data,'-',2)

                     WHEN LENGTH(extra_data) = 15 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN '' ELSE SUBSTR(extra_data,5,11) END )

                     WHEN LENGTH(extra_data) = 14 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN '' ELSE SUBSTR(extra_data,4,11) END )

                     WHEN LENGTH(extra_data) = 13 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN '' ELSE SUBSTR(extra_data,3,11) END )

                     WHEN LENGTH(extra_data) = 12 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN '' ELSE SUBSTR(extra_data,2,11) END )

                     WHEN LENGTH(extra_data) = 11 THEN (CASE WHEN extra_data NOT REGEXP '[2378][0-9]{6}$' THEN '' ELSE SUBSTR(extra_data,5,7) END )

                     WHEN LENGTH(extra_data) = 10 THEN (CASE WHEN extra_data NOT REGEXP '[2378][0-9]{6}$' THEN '' ELSE SUBSTR(extra_data,4,7) END )

                     WHEN LENGTH(extra_data) = 9 THEN (CASE WHEN extra_data NOT REGEXP '[2378][0-9]{6}$' THEN '' ELSE SUBSTR(extra_data,3,7) END )

        END) AS manager_contract

        FROM mqcp_active_code_5

        WHERE dt = '20220708'

          AND name NOT LIKE '%浙H%'

          AND name NOT LIKE '%浙A%')r1

     LEFT JOIN hz_query_street_code_tmp sc ON sc.street_name = r1.street

     LEFT JOIN hz_query_tmp_country_code cc ON cc.community_name = r1.community)

))r2

LEFT JOIN hz_query_region_code_tmp rc on r2.area_code = rc.region_id);

部分结果:

image.png