1. 导入数据
1.1 RDBNS到HDFS
1.1.1 全量导入
#!/bin/bash
sqoop import \
# techadata:数据库名
--connect jdbc:mysql://192.168.3.30:3306/techdata \
# 用户名
--username root \
# 密码
--password 123456 \
# mysql表名
--table essential_information \
# hdfs路径
--target-dir /user/essential_information \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by "\t"
1.1.2 查询导入
#!/bin/bash
sqoop import \
--connect jdbc:mysql://192.168.3.30:3306/techdata \
--username root \
--password 123456 \
--target-dir /user/essential_information \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by "\t" \
--query 'select name,sex from essential_information where id <10 and $CONDITIONS;'
1.1.3 导入指定列
#!/bin/bash
sqoop import \
--connect jdbc:mysql://192.168.3.30:3306/techdata \
--username root \
--password 123456 \
--target-dir /user/essential_information \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by "\t" \
--columns id,age \
--table essential_information
columns 中如果涉及到多列,用逗号分隔,分隔时不要添加空格
1.1.4 条件导入数据
sqoop import \
--connect jdbc:mysql://192.168.3.30:3306/techdata \
--username root \
--password 123456 \
--target-dir /user/essential_information \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by "\t" \
--table essential_information \
--where "id=1"
1.2 RDBMS到hive
sqoop import \
--connect jdbc:mysql://192.168.3.30:3306/techdata \
--username root \
--password 123456 \
--table essential_information \
--num-mappers 1 \
--hive-import \
--fields-terminated-by "\t" \
--hive-overwrite \
--hive-table essential_information
1.3 RDBMS到hbase
1.3.1 全量导入
#!/bin/bash
sqoop import \
--connect jdbc:mysql://192.168.3.30:3306/techdata \
--username root \
--password 123456 \
# mysql表名
--table essential_information \
# 导入字段名
--columns "id, age, allowance, area, awards, bank_account, bank_name, birthdate, birthplace, cdc, degree, department" \
# hbase表名
--hbase-table "essential_information" \
# hbase列簇
--column-family "info" \
# hbase主键
--hbase-row-key id
1.3.2 增量导入
- 基于递增列的增量数据导入(Append方式)
#!/bin/bash
sqoop import \
--connect jdbc:mysql://192.168.3.30:3306/techdata \
--username root \
--password 123456 \
--table essential_information \
--columns "id, age, allowance, area, awards, bank_account, bank_name, birthdate, birthplace, cdc, degree, departmen, last_mod" \
--hbase-table "essential_information" \
--column-family "info" \
--hbase-row-key id \
# 递增列
--check-column id \
--incremental append \
# 阈值
--last-value 0 \
- 基于时间列的数据增量导入(LastModified方式)
#!/bin/bash
sqoop import \
--connect jdbc:mysql://192.168.3.30:3306/techdata \
--username root \
--password 123456 \
--table essential_information \
--columns "id, age, allowance, area, awards, bank_account, bank_name, birthdate, birthplace, cdc, degree, departmen, last_mod" \
--hbase-table "essential_information" \
--column-family "info" \
--hbase-row-key id \
# 检查的列
--check-column last_mod \
--incremental lastmodified \
# 阈值
--last-value "2022-05-19 22:00:00" \
--append
使用lastmodified方式导入数据要指定增量数据是要--append(追加)还是要 --merge-key(合并);last-value 指定的值是会包含于增量导入的数据中。
1.4 sqoop job增量导入
sqoop job每次会为我们维护last-value的值,达到自动增量导入的目的
- 查看已经存在的job
sqoop job --list
- 创建job
#!bin/bash
sqoop job \
--create essjob \
-- improt \
--connect jdbc:mysql://192.168.3.30:3306/techdata \
--username root \
--password 123456 \
--table essential_information \
--columns "id, age, allowance, area, awards, bank_account, bank_name, birthdate, birthplace, cdc, degree, department, last_mod" \
--hbase-table "essential_information" \
--column-family "info" \
--hbase-row-key id \
--check-column id \
--incremental append \
--last-value 0
- 删除job
sqoop job --delete essjob
- 执行job
sqoop job --exec essjob
- 检查job
sqoop job --show essjob
job任务每次执行需要手动输入密码,配置job免密输入,在sqoop-site.xml添加如下内容
<property>
<name>sqoop.metastore.client.record.password</name>
<value>true</value>
<description>if true, allow saved passwords in the metastore</description>
</property>
将密码配置在hdfs文件中,并修改权限为400
echo -n "nlpservicefz116" > mysql.pwd
hadoop fs -mkdir -p /sqoopmysql/pwd/
hadoop fs -put mysql.pwd /sqoopmysql/pwd/
hadoop fs -chmod 400 /sqoopmysql/pwd/mysql.pwd
修改--password参数为--password-file
#!bin/bash
sqoop job \
--create essjob \
-- improt \
--connect jdbc:mysql://192.168.3.30:3306/techdata \
--username root \
--password-file /sqoopmysql/pwd/mysql.pwd \
--table essential_information \
--columns "id, age, allowance, area, awards, bank_account, bank_name, birthdate, birthplace, cdc, degree, department, last_mod" \
--hbase-table "essential_information" \
--column-family "info" \
--hbase-row-key id \
--check-column id \
--incremental append \
--last-value 0