本文已参与「新人创作礼」活动,一起开启掘金创作之路。
需求:
将历史场所码数据,加工成应用所需要的表。
开发工具:
DataWork
难点:
原表中的数据结构不一致,需要根据不同的情况设计不同的SQL进行查询。
SQL:
CREATE TABLE csm_tmp_rxd_713
AS
SELECT DISTINCT *
FROM (
SELECT id,
name AS place_name,
CAST(place_code as BIGINT) AS place_code,
'' AS place_type_id,
area_code,
region_name AS area_name,
street_id AS street_code,
street AS street_name,
community_id AS community_code,
community AS community_name,
code_address AS addr,
code_latitude AS lat,
code_longitude AS lon,
'' AS social_uni_code,
manager_name,
manager_contract,
'' AS state,
code,
'' AS audit_status,
CAST('' as datetime) AS audit_at,
'' AS is_deleted,
CAST(gmt_create as datetime) AS create_at,
creator AS create_by,
CAST(gmt_modified as datetime) AS update_at,
modifier AS update_by
FROM(SELECT (CASE WHEN code_address LIKE '%柯城%' THEN '330802000000' --提取村代码或者街道乡镇代码前6位,直接拼接成区县码,之后和区县码表join获得区县的名称。
WHEN street_id is null THEN CONCAT(SUBSTR(community_id,1,6),'000000' )
ELSE CONCAT(SUBSTR(street_id,1,6),'000000' ) END)as area_code,
id,
name,
code,
place_code,
code_address,
code_latitude,
code_longitude,
gmt_modified,
gmt_create,
creator,
modifier,
street,
street_id,
community,
community_id,
manager_name,
manager_contract
FROM(
SELECT
id,
name,
code,
place_code,
code_address,
code_latitude,
code_longitude,
gmt_modified,
gmt_create,
creator,
modifier,
street,
street_id,
community,
community_id,
manager_name,
manager_contract
FROM(
(SELECT id,
name,
code,
(scene_id * 100000000L) AS place_code,
code_address,
code_latitude,
code_longitude,
gmt_modified,
gmt_create,
creator,
modifier,
(CASE WHEN code_address LIKE '%.%.%.%' THEN SPLIT_PART(code_address,'.',3)
WHEN code_address LIKE '%街道%' THEN (CASE WHEN code_address LIKE '%区%街道%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'区',2),'街道',1),'街道')
ELSE CONCAT(SPLIT_PART(code_address,'街道',1),'街道') END)
WHEN code_address LIKE '%镇%' THEN (CASE WHEN code_address LIKE '%村%镇%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'村',2),'镇',1),'镇')
WHEN code_address LIKE '%区%镇%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'区',2),'镇',1),'镇')
ELSE CONCAT(SPLIT_PART(code_address,'镇',1),'镇') END)
WHEN code_address LIKE '%乡%' THEN (CASE WHEN code_address LIKE '%区%乡%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'区',2),'乡',1),'乡')
ELSE CONCAT(SPLIT_PART(code_address,'乡',1),'乡') END)
END) AS street,
(CASE WHEN code_address LIKE '%.%.%.%村%' THEN (CASE WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村') LIKE '%乡%村' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村'),'乡',2),'村',1),'村')
WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村') LIKE '%镇%村' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村'),'镇',2),'村',1),'村')
WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村') LIKE '%街道%村' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村'),'街道',2),'村',1),'村')
WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村') LIKE '%市%村' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村'),'市',2),'村',1),'村')
WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村') LIKE '%省%村' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村'),'省',2),'村',1),'村')
ELSE CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'村',1),'村') END)
WHEN code_address LIKE '%.%.%.%社区%' THEN (CASE WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区') LIKE '%号%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区'),'号',2),'社区',1),'社区')
WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区') LIKE '%路%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区'),'路',2),'社区',1),'社区')
WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区') LIKE '%幢%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区'),'幢',2),'社区',1),'社区')
WHEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区') LIKE '%栋%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区'),'栋',2),'社区',1),'社区')
ELSE CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区') END) --上方SQL使用的CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'社区',1),'社区')为所得到的社区字符串 如"*****社区"
WHEN code_address LIKE '%.%.%.%委员会%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'.',4),'委员会',1),'委员会')
WHEN code_address LIKE '%村%' THEN (CASE WHEN code_address LIKE '%社区%村%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'社区',2),'村',1),'村')
WHEN code_address LIKE '%乡%村%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'乡',2),'村',1),'村')
WHEN code_address LIKE '%镇%村%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'镇',2),'村',1),'村')
WHEN code_address LIKE '%街道%村%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'街道',2),'村',1),'村')
WHEN code_address LIKE '%区%村%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'区',2),'村',1),'村')
WHEN code_address LIKE '%市%村%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'市',2),'村',1),'村')
WHEN code_address LIKE '%省%村%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'省',2),'村',1),'村')
ELSE CONCAT(SPLIT_PART(code_address,'村',1),'村') END)
WHEN code_address LIKE '%社区%' THEN (CASE
WHEN code_address LIKE '%乡%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'乡',2),'社区',1),'社区')
WHEN code_address LIKE '%镇%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'镇',2),'社区',1),'社区')
WHEN code_address LIKE '%街道%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'街道',2),'社区',1),'社区')
WHEN code_address LIKE '%区%社区%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'区',2),'社区',1),'社区')
ELSE CONCAT(SPLIT_PART(code_address,'社区',1),'社区') END)
WHEN code_address LIKE '%委员会%' THEN (CASE
WHEN code_address LIKE '%乡%委员会%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'乡',2),'委员会',1),'委员会')
WHEN code_address LIKE '%镇%委员会%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'镇',2),'委员会',1),'委员会')
WHEN code_address LIKE '%街道%委员会%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'街道',2),'委员会',1),'委员会')
WHEN code_address LIKE '%区%委员会%' THEN CONCAT(SPLIT_PART(SPLIT_PART(code_address,'区',2),'委员会',1),'委员会')
ELSE CONCAT(SPLIT_PART(code_address,'委员会',1),'委员会') END)
END) AS community,
(CASE
WHEN extra_data REGEXP '^[1][35678][0-9]{9}$' THEN ''
WHEN extra_data LIKE '%,%,%,%' THEN SPLIT_PART(extra_data,',',3)
WHEN extra_data LIKE '%,%' THEN SPLIT_PART(extra_data,',',1)
WHEN extra_data LIKE '%,%' THEN SPLIT_PART(extra_data,',',1)
WHEN extra_data LIKE '%-%' THEN SPLIT_PART(extra_data,'-',1)
WHEN LENGTH(extra_data) = 15 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN extra_data ELSE SUBSTR(extra_data,1,4) END ) --这里使用正则表达式先匹配,如果字符串不满足以手机号结尾但长度匹配,将字符串直接写入名字。
WHEN LENGTH(extra_data) = 14 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN extra_data ELSE SUBSTR(extra_data,1,3) END )
WHEN LENGTH(extra_data) = 13 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN extra_data ELSE SUBSTR(extra_data,1,2) END )
WHEN LENGTH(extra_data) = 12 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN extra_data ELSE SUBSTR(extra_data,1,1) END )
WHEN LENGTH(extra_data) = 11 THEN (CASE WHEN extra_data NOT REGEXP '[2378][0-9]{6}$' THEN extra_data ELSE SUBSTR(extra_data,1,4) END )
WHEN LENGTH(extra_data) = 10 THEN (CASE WHEN extra_data NOT REGEXP '[2378][0-9]{6}$' THEN extra_data ELSE SUBSTR(extra_data,1,3) END )
WHEN LENGTH(extra_data) = 9 THEN (CASE WHEN extra_data NOT REGEXP '[2378][0-9]{6}$' THEN extra_data ELSE SUBSTR(extra_data,1,2) END )
ELSE extra_data
END) AS manager_name,
(CASE
WHEN extra_data REGEXP '^[1][35678][0-9]{9}$' THEN extra_data
WHEN extra_data LIKE '%,%,%,%' THEN SPLIT_PART(extra_data,',',4)
WHEN extra_data LIKE '%,%' THEN SPLIT_PART(extra_data,',',2)
WHEN extra_data LIKE '%,%' THEN SPLIT_PART(extra_data,',',2)
WHEN extra_data LIKE '%-%' THEN SPLIT_PART(extra_data,'-',2)
WHEN LENGTH(extra_data) = 15 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN '' ELSE SUBSTR(extra_data,5,11) END )
WHEN LENGTH(extra_data) = 14 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN '' ELSE SUBSTR(extra_data,4,11) END )
WHEN LENGTH(extra_data) = 13 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN '' ELSE SUBSTR(extra_data,3,11) END )
WHEN LENGTH(extra_data) = 12 THEN (CASE WHEN extra_data NOT REGEXP '[1][35678][0-9]{9}$' THEN '' ELSE SUBSTR(extra_data,2,11) END )
WHEN LENGTH(extra_data) = 11 THEN (CASE WHEN extra_data NOT REGEXP '[2378][0-9]{6}$' THEN '' ELSE SUBSTR(extra_data,5,7) END )
WHEN LENGTH(extra_data) = 10 THEN (CASE WHEN extra_data NOT REGEXP '[2378][0-9]{6}$' THEN '' ELSE SUBSTR(extra_data,4,7) END )
WHEN LENGTH(extra_data) = 9 THEN (CASE WHEN extra_data NOT REGEXP '[2378][0-9]{6}$' THEN '' ELSE SUBSTR(extra_data,3,7) END )
END) AS manager_contract
FROM mqcp_active_code_5
WHERE dt = '20220708'
AND name NOT LIKE '%浙H%'
AND name NOT LIKE '%浙A%')r1
LEFT JOIN hz_query_street_code_tmp sc ON sc.street_name = r1.street
LEFT JOIN hz_query_tmp_country_code cc ON cc.community_name = r1.community)
))r2
LEFT JOIN hz_query_region_code_tmp rc on r2.area_code = rc.region_id);