hadoop 典型编程题

244 阅读5分钟

持续创作,加速成长!这是我参与「掘金日新计划 · 10 月更文挑战」的第13天,10月更文诚意加码,激发写作潜力|掘金·日新计划 - 掘金 (juejin.cn)点击查看活动详情

编程题

 

1. 写出 wordcount的核心代码(mapper和reducer)

public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> {  
    //    定义往外写的value为全局变量,因为它的值恒等于1,不会改变  
    private LongWritable outValue = new LongWritable(1);  
    private Text outKey = new Text();  
  
    @Override  
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {  
//        把读取进来的每一行文本转换成字符串  
        String string = value.toString();  
  
//        根据空格切割每行文本,转换成字符串数组  
        String[] strings = string.split(" ");  
  
//      循环每个字符串数组    
        for (String word : strings) {  
//            设置 outKey的值,也就是每个单词  
            outKey.set(word);  
//            map阶段往reduce阶段输出的key-value键值对  
            context.write(outKey, outValue);  
        }  
    }  
}

public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {  
  
//    往外输出的value,就是1累加后的结果  
    private LongWritable outValue = new LongWritable();  
  
    @Override  
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {  
//        定义的累加的合  
        long sum = 0;  
//        对相同单词的迭代器做循环  
        for (LongWritable value : values) {  
//            每次循环得到的1,进行累加  
            sum += value.get();  
        }  
//        设置累加后的和  
        outValue.set(sum);  
//        reduce阶段往外写最终的每个单词出现的次数  
        context.write(key, outValue);  
    }  
}

 

 

 

2. 针对如下数据,将男生和女生的的数据进行分区统计排序(按照身高进行升序排序,如果身高相同则根据年龄降序排序),最后输出为两个文件(请写出对应的Student代码)

id name height sex age

1-张三-170-男-18

2-李四-181-男-19

3-王五-165-女-17

4-赵六-185-男-20

5-田七-168-女-21

6-马八-168-女-18

7-陈九-170-男-16

 

public class Student implements WritableComparable<Student> {  
    private int id;  
    private String name;  
    private Double height;  
    private char sex;  
    private int age;  
  
    @Override  
    public String toString() {  
        return id + "\t" + name + "\t" + height + "\t" + sex + "\t" + age;  
    }  
  
    public Student(int id, String name, Double height, char sex, int age) {  
        this.id = id;  
        this.name = name;  
        this.height = height;  
        this.sex = sex;  
        this.age = age;  
    }  
  
    public Student() {  
    }  
  
    public int getId() {  
        return id;  
    }  
  
    public void setId(int id) {  
        this.id = id;  
    }  
  
    public String getName() {  
        return name;  
    }  
  
    public void setName(String name) {  
        this.name = name;  
    }  
  
    public Double getHeight() {  
        return height;  
    }  
  
    public void setHeight(Double height) {  
        this.height = height;  
    }  
  
    public char getSex() {  
        return sex;  
    }  
  
    public void setSex(char sex) {  
        this.sex = sex;  
    }  
  
    public int getAge() {  
        return age;  
    }  
  
    public void setAge(int age) {  
        this.age = age;  
    }  
  
    @Override  
    public int compareTo(Student o) {  
//        根据身高升序排序  
        if (this.height > o.getHeight()) {  
            return 1;  
        } else if (this.height < o.getHeight()) {  
            return -1;  
        } else {  
//            根据年龄降序排序  
            if (this.age > o.getAge()) {  
                return -1;  
            } else {  
                return 1;  
            }  
        }  
  
    }  
  
    @Override  
    public void write(DataOutput out) throws IOException {  
        out.writeInt(id);  
        out.writeUTF(name);  
        out.writeDouble(height);  
        out.writeChar(sex);  
        out.writeInt(age);  
    }  
  
    @Override  
    public void readFields(DataInput in) throws IOException {  
        this.id = in.readInt();  
        this.name = in.readUTF();  
        this.height = in.readDouble();  
        this.sex = in.readChar();  
        this.age = in.readInt();  
    }  
}

 

3. 将首字母从a-p的单词分为一个区进行词频统计,将首字母从q-z的单词分为一个区进行词频统计(写出分区代码即可)

public class WordCountPartitioner extends Partitioner<Text, LongWritable> {  
    /**  
     * 自定义的分区器,根据单词的首字母返回对应的分区号  
     *  
     * @param text  
     * @param longWritable  
     * @param numPartitions  
     * @return  
     */  
    @Override  
    public int getPartition(Text text, LongWritable longWritable, int numPartitions) {  
        String word = text.toString();  
//        取出首字母  
        char firstChar = word.charAt(0);  
        if (firstChar >= 'a' && firstChar <= 'p') {  
            return 0;  
        }else{  
            return 1;  
        }  
    }  
}

 

4. 针对如下数据,将男生和女生的的数据进行分区统计排序(按照身高进行升序排序,如果身高相同则根据年龄降序排序),最后输出为两个文件(请写出对应的分区代码)

id name height sex age

1-张三-170-男-18

2-李四-181-男-19

3-王五-165-女-17

4-赵六-185-男-20

5-田七-168-女-21

6-马八-168-女-18

7-陈九-170-男-16

public class StudentPartitioner extends Partitioner<Student, NullWritable> {  
    @Override  
    public int getPartition(Student student, NullWritable nullWritable, int numPartitions) {  
        if (student.getSex() == '男') {  
            return 0;  
        } else {  
            return 1;  
        }  
    }  
}

 

5. 针对如下数据,将男生和女生的的数据进行分区统计排序(按照身高进行升序排序,如果身高相同则根据年龄降序排序),最后输出为两个文件(请写出对应的mapper代码)

id name height sex age

1-张三-170-男-18

2-李四-181-男-19

3-王五-165-女-17

4-赵六-185-男-20

5-田七-168-女-21

6-马八-168-女-18

7-陈九-170-男-16

   

public class StudentMapper extends Mapper<LongWritable, Text, Student, NullWritable> {  
    private int count = 0;  
    private Student student = new Student();  
  
    @Override  
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {  
        if (count == 0) {  
            count += 1;  
        } else {  
            String string = value.toString();  
            String[] strings = string.split("-");  
            student.setId(Integer.parseInt(strings[0]));  
            student.setName(strings[1]);  
            student.setHeight(Double.parseDouble(strings[2]));  
            student.setSex(strings[3].charAt(0));  
            student.setAge(Integer.parseInt(strings[4]));  
            context.write(student, NullWritable.get());  
        }  
    }  
}

 

6. 完成HDFS文件上传的代码,将D盘的name.txt文件进行上传到HDFS的test目录下

    public void testUpload() throws URISyntaxException, IOException, InterruptedException {  
//        1. 获取HDFS文件系统对象  
        Configuration configuration = new Configuration();  
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://bigdata102:8020"), configuration, "cris");  
  
//        2. 上传文件,默认可以覆盖上传文件;  
//        delSrc 表示是否删除源文件(默认是false),  
//        overwrite 表示是否覆盖同名文件(默认是true)  
//        src表示源文件路径,des表示上传路径  
        fileSystem.copyFromLocalFile(false, new Path("D:/name.txt"), new Path("/test"));  
  
//        3. 关闭资源  
        fileSystem.close();  
    }

   

7. 针对如下数据,将男生和女生的的数据进行分区统计排序(按照身高进行升序排序,如果身高相同则根据年龄降序排序),最后输出为两个文件(请写出对应的reducer代码)

id name height sex age

1-张三-170-男-18

2-李四-181-男-19

3-王五-165-女-17

4-赵六-185-男-20

5-田七-168-女-21

6-马八-168-女-18

7-陈九-170-男-16

public class StudentReducer extends Reducer<Student, NullWritable, Student, NullWritable> {  
    @Override  
    protected void reduce(Student key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {  
        for (NullWritable value : values) {  
            context.write(key, value);  
        }  
    }  
}

 

8. 使用HDFS代码创建新目录test

    public static void main(String[] args) throws URISyntaxException, IOException, InterruptedException {  
//        1. 获取HDFS文件对象  
        Configuration configuration = new Configuration();  
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://bigdata102:8020"), configuration, "cris");  
  
//        2. 创建新目录  
        fileSystem.mkdirs(new Path("/test"));  
  
//        3. 关闭文件系统  
        fileSystem.close();  
  
    }

 

9. 使用HDFS代码下载文件 /test/name.txt 到本地的D盘

    public void testDownload() throws URISyntaxException, IOException, InterruptedException {  
//       1. 创建HDFS文件对象  
        Configuration configuration = new Configuration();  
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://bigdata102:8020"), configuration, "cris");  
  
//        2. 调用下载的方法:delSrc 表示是否删除源文件(默认是false),rawLocalFileSystem 表示是否启动本地文件校验机制(默认是false)  
//        默认如果本地存在同名文件,则覆盖  
        fileSystem.copyToLocalFile(false, new Path("/test/name.txt"), new Path("D:/name.txt"), true);  
  
//        3. 关闭HDFS文件对象  
        fileSystem.close();  
  
    }

 

10. 使用HDFS代码完成文件的重命名 /test/name.txt 改名为 /test/name2.txt

    public void testMoveFile() throws URISyntaxException, IOException, InterruptedException {  
//        创建文件系统对象  
      Configuration configuration = new Configuration();  
      FileSystem fileSystem = FileSystem.get(new URI("hdfs://bigdata102:8020"), configuration, "cris");  
  
//        执行文件移动或者更名的方法  
       fileSystem.rename(new Path("/test/name.txt"), new Path("/test/name2.txt"));  
  
  
//        关闭文件系统  
//        fileSystem.close();  
  
    }