基于FlinkSql实时数仓构建之二

190 阅读4分钟

本文已参与「新人创作礼」活动,一起开启掘金创作之路。

4.2 DIM层数据处理

  • 维表数据回表
-- 用户表
create table if not exists dim_user_profile(
 userId string primary key comment '用户id',
 province string comment '省',
 city string comment '市',
 age int comment '年龄',
 education string comment '学历',
 jobType string comment '工作类别',
 marriage string comment '婚姻状态',
 sex string comment '性别',
 interest string comment '兴趣'
)
WITH (
    'connector' = 'jdbc',
    'url' = 'jdbc:mysql://你猜:3306/userAnalysis?serverTimezone=UTC&useUnicode=true&characterEncoding=utf8&useSSL=false',
    'table-name' = 'user_profile',
    'driver' = 'com.mysql.jdbc.Driver',
    'username' = '你猜',
    'password' = '你猜',
    'sink.buffer-flush.interval' = '1s'
)


-- 用户表
insert into dim_user_profile
select * from ods_users


-- 操作类型
create table if not exists dim_action_type(
id int PRIMARY KEY comment '操作类型ID',
type string comment '操作类型'
)
WITH (
    'connector' = 'jdbc',
    'url' = 'jdbc:mysql://你猜:3306/userAnalysis?serverTimezone=UTC&useUnicode=true&characterEncoding=utf8&useSSL=false',
    'table-name' = 'action_type',
    'driver' = 'com.mysql.jdbc.Driver',
    'username' = '你猜',
    'password' = '你猜',
    'sink.buffer-flush.interval' = '1s'
)

-- 操作类型
insert into dim_action_type
select * from ods_action_type

-- 商品品类
create table if not exists dim_category(
id int PRIMARY KEY comment '商品品类ID',
category string comment '商品品类'
)
WITH (
    'connector' = 'jdbc',
    'url' = 'jdbc:mysql://你猜:3306/userAnalysis?serverTimezone=UTC&useUnicode=true&characterEncoding=utf8&useSSL=false',
    'table-name' = 'category',
    'driver' = 'com.mysql.jdbc.Driver',
    'username' = '你猜',
    'password' = '你猜',
    'sink.buffer-flush.interval' = '1s'
)

-- 商品品类
insert into dim_category
select * from ods_category

-- 商品表
create table if not exists dim_commodity(
id int PRIMARY KEY comment '商品ID',
category int comment '商品品类ID',
commodity string comment '商品名称'
)
WITH (
    'connector' = 'jdbc',
    'url' = 'jdbc:mysql://你猜:3306/userAnalysis?serverTimezone=UTC&useUnicode=true&characterEncoding=utf8&useSSL=false',
    'table-name' = 'commodity',
    'driver' = 'com.mysql.jdbc.Driver',
    'username' = '你猜',
    'password' = '你猜',
    'sink.buffer-flush.interval' = '1s'
)

-- 商品表
insert into dim_commodity
select * from ods_commodity

  • 创建商品表视图
-- 商品表详细信息
-- 在数据库端操作
CREATE
VIEW dim_commodity_info AS
SELECT
	c.id AS categoryId,
	com.id AS commodityId,
	c.category,
	com.commodity
FROM
	categorys c
INNER JOIN commoditys com ON c.id = com.category;

  • Flink 加载维表数据
-- 商品表详细信息

create table if not exists dim_commodity_info(
commodityId int PRIMARY KEY comment '商品ID',
categoryId int comment '商品品类ID',
commodity string comment '商品名称',
category string comment '商品品类',
)
WITH (
    'connector' = 'jdbc',
    'url' = 'jdbc:mysql://你猜:3306/userAnalysis?serverTimezone=UTC&useUnicode=true&characterEncoding=utf8&useSSL=false',
    'table-name' = 'dim_commodity_info',
    'driver' = 'com.mysql.jdbc.Driver',
    'username' = '你猜',
    'password' = '你猜',
    'scan.fetch-size' = '200',
    'lookup.cache.max-rows' = '1000',
    'lookup.cache.ttl' = '60000'
)

4.3 DWD层数据处理

-- 订单与评价宽表
create table if not exists dwd_paid_order_detail(
id int PRIMARY KEY comment 'ID',
userId string comment '用户ID',
described int comment '评分',
evaluate string comment '评价',
logisticsAttitude int comment '物流评分',
serviceAttitude int comment '服务评分',
merchantId string comment '商家ID',
timestamps bigint comment '订单创建时间',
orderInformationId string comment '订单信息ID',
categoryId int comment '商品类别',
productId int comment '商品ID',
price decimal(10,2) comment '单价',
productCount int comment '购买数量',
priceSum decimal(10,2) comment '订单总价',
shipAddress string comment '商家地址',
receiverAddress string comment '收货地址'
)
with(
'connector' = 'kafka',
'format' = 'debezium-avro-confluent', -- avro-confluent
'debezium-avro-confluent.schema-registry.url' = 'http://你猜:8081', -- avro-confluent.schema-registry.url
'debezium-avro-confluent.schema-registry.subject' = 'dwd_paid_order_detail',
'topic' = 'dwd_paid_order_detail',
'properties.bootstrap.servers' = '你猜:9092',
'properties.group.id' = 'flink-analysis',
'sink.partitioner' = 'fixed'
)

-- 插入宽表数据
insert into dwd_paid_order_detail
select
o.id
,e.userId
,e.described
,e.`comment` as evaluate
,e.logisticsAttitude
,e.serviceAttitude
,e.merchantId
,o.timestamps
,o.orderInformationId
,o.categoryId
,o.productId
,o.price
,o.productCount
,o.priceSum
,o.shipAddress
,o.receiverAddress
from
ods_order as o inner join ods_evaluate e
on o.orderInformationId=e.orderInformationId

-- 创建宽表临时表
create table if not exists dwd_paid_order_detail_tmp(
id int PRIMARY KEY comment 'ID',
userId string comment '用户ID',
described int comment '评分',
evaluate string comment '评价',
logisticsAttitude int comment '物流评分',
serviceAttitude int comment '服务评分',
merchantId string comment '商家ID',
timestamps bigint comment '订单创建时间',
orderInformationId string comment '订单信息ID',
categoryId int comment '商品类别',
productId int comment '商品ID',
price decimal(10,2) comment '单价',
productCount int comment '购买数量',
priceSum decimal(10,2) comment '订单总价',
shipAddress string comment '商家地址',
receiverAddress string comment '收货地址'
)
with(
'connector' = 'kafka',
'format' = 'debezium-avro-confluent', -- avro-confluent
'debezium-avro-confluent.schema-registry.url' = 'http://你猜:8081', -- avro-confluent.schema-registry.url
'topic' = 'dwd_paid_order_detail',
'properties.bootstrap.servers' = '你猜:9092',
'properties.group.id' = 'flink-analysis',
'scan.startup.mode' = 'earliest-offset' --earliest-offset
)

4.4 ADS层数据处理

-- 地区消费能力
create table if not exists ads_power_consumption_index(
province string primary key,
totalPrice decimal(20,2)
)
with(
'connector' = 'elasticsearch-7',
'hosts' = 'http://localhost:9200',
'index' = 'area_cost'
)

-- 地区消费能力
insert into ads_power_consumption_index
select u.province as province,sum(o.priceSum) as totalPrice
from dwd_paid_order_detail_tmp o join ods_users u on o.userId = u.userId
group by province

-- 消费总金额
create table if not exists ads_total_consumption_index(
endTime timestamp(3),
money decimal(20,2)
)
with(
'connector' = 'elasticsearch-7',
'hosts' = 'http://localhost:9200',
'index' = 'total_money'
)
-- 消费总金额
insert into ads_total_consumption_index
select cast(max(timestamps)/1000 as timestamp(3)) as endTime,
sum(priceSum) as money from dwd_paid_order_detail_tmp

-- 学历购物爱好
create table if not exists ads_edu_shopping_hobby_index(
education string primary key,
money decimal(20,2)
)
with(
'connector' = 'elasticsearch-7',
'hosts' = 'http://localhost:9200',
'index' = 'edu_cost'
)
       
--学历购物爱好
insert into ads_edu_shopping_hobby_index
select
u.education as education,
sum(o.priceSum) as money
from
ods_users u join dwd_paid_order_detail_tmp o
on u.userId = o.userId
group by u.education    

-- 热门商品
create table if not exists ads_commodity_index(
commodity string primary key,
heat int
)
with(
'connector' = 'elasticsearch-7',
'hosts' = 'http://localhost:9200',
'index' = 'commodity_index'
)
   
-- 热门商品    
insert into ads_commodity_index
select d.commodity,sum(o.productCount) heat
from dwd_paid_order_detail_tmp o join ods_commodity d
on o.productId = d.id
group by d.commodity    
    
-- 文明城市    
create table if not exists ads_civilized_city_index(
city string primary key,
score double
)
with(
'connector' = 'elasticsearch-7',
'hosts' = 'http://localhost:9200',
'index' = 'civilized_city_index'
)
    
-- 文明城市 
insert into ads_civilized_city_index
select a.* from (
select u.city,avg(
(described*0.4+logisticsAttitude*0.3+serviceAttitude*0.3)/3
) score
from dwd_paid_order_detail_tmp o join ods_users u
on o.userId = u.userId
group by u.city
)as a
where a.score> 0.9    

4.5 Flink Sql Client 执行

在这里插入图片描述 在这里插入图片描述 在这里插入图片描述

5 、指标可视化

image-20201125163629490 注:此图仅供参考不代表任何立场