MySQL表导入到Parquet格式HDFS文件,同步到Hive的Parquet表

118 阅读1分钟

​ 步骤:step1:创建mysql表并插入数据

create table base_category ( id varchar(10), name varchar(50) );

insert into base_category values  ('2','手机'), ('3','家用电器'), ('4','数码'), ('5','家居家装'), ('6','电脑办公'), ('7','厨具'), ('8','个护化妆'), ('9','服饰内衣'), ('10','钟表'), ('11','鞋靴'), ('12','母婴'), ('13','礼品箱包'), ('15','珠宝'), ('16','汽车用品'), ('17','运动健康'), ('18','玩具乐器');

step2:sqoop抽数

sqoop import
--connect jdbc:mysql://IP:3306/test
--username 用户名 
--password 密码 
--table base_category
--target-dir '/hdfs上的路径/base_category'
--delete-target-dir
--m 1
--fields-terminated-by "\t"
--as-parquetfile 

step3:创建hive外表并查询

CREATE EXTERNAL TABLE base_category(   id string,   name string)  STORED AS PARQUET  LOCATION 'hdfs://bigdata01:8020/xxx/xxx/test/base_category';

select * from base_category;