持续创作,加速成长!这是我参与「掘金日新计划 · 10 月更文挑战」的第13天,10月更文诚意加码,激发写作潜力|掘金·日新计划 - 掘金 (juejin.cn)点击查看活动详情
编程题
1. 写出 wordcount的核心代码(mapper和reducer)
public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
// 定义往外写的value为全局变量,因为它的值恒等于1,不会改变
private LongWritable outValue = new LongWritable(1);
private Text outKey = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 把读取进来的每一行文本转换成字符串
String string = value.toString();
// 根据空格切割每行文本,转换成字符串数组
String[] strings = string.split(" ");
// 循环每个字符串数组
for (String word : strings) {
// 设置 outKey的值,也就是每个单词
outKey.set(word);
// map阶段往reduce阶段输出的key-value键值对
context.write(outKey, outValue);
}
}
}
public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
// 往外输出的value,就是1累加后的结果
private LongWritable outValue = new LongWritable();
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
// 定义的累加的合
long sum = 0;
// 对相同单词的迭代器做循环
for (LongWritable value : values) {
// 每次循环得到的1,进行累加
sum += value.get();
}
// 设置累加后的和
outValue.set(sum);
// reduce阶段往外写最终的每个单词出现的次数
context.write(key, outValue);
}
}
2. 针对如下数据,将男生和女生的的数据进行分区统计排序(按照身高进行升序排序,如果身高相同则根据年龄降序排序),最后输出为两个文件(请写出对应的Student代码)
id name height sex age
1-张三-170-男-18
2-李四-181-男-19
3-王五-165-女-17
4-赵六-185-男-20
5-田七-168-女-21
6-马八-168-女-18
7-陈九-170-男-16
public class Student implements WritableComparable<Student> {
private int id;
private String name;
private Double height;
private char sex;
private int age;
@Override
public String toString() {
return id + "\t" + name + "\t" + height + "\t" + sex + "\t" + age;
}
public Student(int id, String name, Double height, char sex, int age) {
this.id = id;
this.name = name;
this.height = height;
this.sex = sex;
this.age = age;
}
public Student() {
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Double getHeight() {
return height;
}
public void setHeight(Double height) {
this.height = height;
}
public char getSex() {
return sex;
}
public void setSex(char sex) {
this.sex = sex;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
@Override
public int compareTo(Student o) {
// 根据身高升序排序
if (this.height > o.getHeight()) {
return 1;
} else if (this.height < o.getHeight()) {
return -1;
} else {
// 根据年龄降序排序
if (this.age > o.getAge()) {
return -1;
} else {
return 1;
}
}
}
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(id);
out.writeUTF(name);
out.writeDouble(height);
out.writeChar(sex);
out.writeInt(age);
}
@Override
public void readFields(DataInput in) throws IOException {
this.id = in.readInt();
this.name = in.readUTF();
this.height = in.readDouble();
this.sex = in.readChar();
this.age = in.readInt();
}
}
3. 将首字母从a-p的单词分为一个区进行词频统计,将首字母从q-z的单词分为一个区进行词频统计(写出分区代码即可)
public class WordCountPartitioner extends Partitioner<Text, LongWritable> {
/**
* 自定义的分区器,根据单词的首字母返回对应的分区号
*
* @param text
* @param longWritable
* @param numPartitions
* @return
*/
@Override
public int getPartition(Text text, LongWritable longWritable, int numPartitions) {
String word = text.toString();
// 取出首字母
char firstChar = word.charAt(0);
if (firstChar >= 'a' && firstChar <= 'p') {
return 0;
}else{
return 1;
}
}
}
4. 针对如下数据,将男生和女生的的数据进行分区统计排序(按照身高进行升序排序,如果身高相同则根据年龄降序排序),最后输出为两个文件(请写出对应的分区代码)
id name height sex age
1-张三-170-男-18
2-李四-181-男-19
3-王五-165-女-17
4-赵六-185-男-20
5-田七-168-女-21
6-马八-168-女-18
7-陈九-170-男-16
public class StudentPartitioner extends Partitioner<Student, NullWritable> {
@Override
public int getPartition(Student student, NullWritable nullWritable, int numPartitions) {
if (student.getSex() == '男') {
return 0;
} else {
return 1;
}
}
}
5. 针对如下数据,将男生和女生的的数据进行分区统计排序(按照身高进行升序排序,如果身高相同则根据年龄降序排序),最后输出为两个文件(请写出对应的mapper代码)
id name height sex age
1-张三-170-男-18
2-李四-181-男-19
3-王五-165-女-17
4-赵六-185-男-20
5-田七-168-女-21
6-马八-168-女-18
7-陈九-170-男-16
public class StudentMapper extends Mapper<LongWritable, Text, Student, NullWritable> {
private int count = 0;
private Student student = new Student();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
if (count == 0) {
count += 1;
} else {
String string = value.toString();
String[] strings = string.split("-");
student.setId(Integer.parseInt(strings[0]));
student.setName(strings[1]);
student.setHeight(Double.parseDouble(strings[2]));
student.setSex(strings[3].charAt(0));
student.setAge(Integer.parseInt(strings[4]));
context.write(student, NullWritable.get());
}
}
}
6. 完成HDFS文件上传的代码,将D盘的name.txt文件进行上传到HDFS的test目录下
public void testUpload() throws URISyntaxException, IOException, InterruptedException {
// 1. 获取HDFS文件系统对象
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://bigdata102:8020"), configuration, "cris");
// 2. 上传文件,默认可以覆盖上传文件;
// delSrc 表示是否删除源文件(默认是false),
// overwrite 表示是否覆盖同名文件(默认是true)
// src表示源文件路径,des表示上传路径
fileSystem.copyFromLocalFile(false, new Path("D:/name.txt"), new Path("/test"));
// 3. 关闭资源
fileSystem.close();
}
7. 针对如下数据,将男生和女生的的数据进行分区统计排序(按照身高进行升序排序,如果身高相同则根据年龄降序排序),最后输出为两个文件(请写出对应的reducer代码)
id name height sex age
1-张三-170-男-18
2-李四-181-男-19
3-王五-165-女-17
4-赵六-185-男-20
5-田七-168-女-21
6-马八-168-女-18
7-陈九-170-男-16
public class StudentReducer extends Reducer<Student, NullWritable, Student, NullWritable> {
@Override
protected void reduce(Student key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
for (NullWritable value : values) {
context.write(key, value);
}
}
}
8. 使用HDFS代码创建新目录test
public static void main(String[] args) throws URISyntaxException, IOException, InterruptedException {
// 1. 获取HDFS文件对象
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://bigdata102:8020"), configuration, "cris");
// 2. 创建新目录
fileSystem.mkdirs(new Path("/test"));
// 3. 关闭文件系统
fileSystem.close();
}
9. 使用HDFS代码下载文件 /test/name.txt 到本地的D盘
public void testDownload() throws URISyntaxException, IOException, InterruptedException {
// 1. 创建HDFS文件对象
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://bigdata102:8020"), configuration, "cris");
// 2. 调用下载的方法:delSrc 表示是否删除源文件(默认是false),rawLocalFileSystem 表示是否启动本地文件校验机制(默认是false)
// 默认如果本地存在同名文件,则覆盖
fileSystem.copyToLocalFile(false, new Path("/test/name.txt"), new Path("D:/name.txt"), true);
// 3. 关闭HDFS文件对象
fileSystem.close();
}
10. 使用HDFS代码完成文件的重命名 /test/name.txt 改名为 /test/name2.txt
public void testMoveFile() throws URISyntaxException, IOException, InterruptedException {
// 创建文件系统对象
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://bigdata102:8020"), configuration, "cris");
// 执行文件移动或者更名的方法
fileSystem.rename(new Path("/test/name.txt"), new Path("/test/name2.txt"));
// 关闭文件系统
// fileSystem.close();
}