HadoopMapReduce流量统计案例|周末学习

224 阅读2分钟

本文已参与周末学习计划,点击链接查看详情:juejin.cn/post/696572…

Hadoop的MapReduce框架解决流量统计案例:
需要统计的文本文件格式如下:

1363157985066 	13726238888	00-FD-07-A4-72-B8:CMCC	120.196.100.82	i02.c.aliimg.com		24	27	2481	24681	200
1363157993055 	13560436666	C4-17-FE-BA-DE-D9:CMCC	120.196.100.99			18	15	1116	954	200

第一列为时间戳,第二列为手机号,倒数第二列为下行流量,倒数第三列为上行流量。现在需要以手机作为key,统计每个手机下使用的总上行流量、总下行流量和总流量数据。

项目结构如下:

-flowCount
 -FlowBean.java
 -FlowCountMapper.java
 -FlowCountReducer.java
 -JobSubmitter.java

FlowBean.java:
构造具有上行流量、下行流量和总流量属性对象 FlowCountMapper.java:
负责处理文本文件的每一行,把每行转化手机号为key,value为FlowBean对象 FlowCountReducer.java:
负责统计每个手机号的流量使用情况
JobSubmitter.java:
负责提交任务

详细代码如下: FlowBean.java:

package flowCount;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;

public class FlowBean implements Writable{
	private int upFlow;
	private int dFlow;
	private int amountFlow;
	private String phone;
	public FlowBean() {}
	public FlowBean(String phone, int upFlow, int dFlow) {
		this.phone = phone;
		this.upFlow = upFlow;
		this.dFlow = dFlow;
		this.amountFlow = upFlow + dFlow;
	}

	public int getUpFlow() {
		return upFlow;
	}

	public void setUpFlow(int upFlow) {
		this.upFlow = upFlow;
	}

	public int getdFlow() {
		return dFlow;
	}

	public void setdFlow(int dFlow) {
		this.dFlow = dFlow;
	}

	public int getAmountFlow() {
		return amountFlow;
	}

	public void setAmountFlow(int amountFlow) {
		this.amountFlow = amountFlow;
	}
	
	public void write(DataOutput out) throws IOException {
		// TODO Auto-generated method stub
		out.writeInt(upFlow);
		out.writeUTF(phone);
		out.writeInt(dFlow);
		out.writeInt(amountFlow);
	}
	
	public void readFields(DataInput in) throws IOException {
		// TODO Auto-generated method stub
		this.upFlow = in.readInt();
		this.phone = in.readUTF();
		this.dFlow = in.readInt();
		this.amountFlow = in.readInt();
		
	}

	public String getPhone() {
		return phone;
	}

	public void setPhone(String phone) {
		this.phone = phone;
	}

	@Override
	public String toString() {
		return this.upFlow + "," +  this.dFlow + "," + this.amountFlow;
	}

}

FlowCountMapper.java

package flowCount;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;

public class FlowCountMapper extends Mapper<LongWritable, Text, Text, FlowBean> {
	protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
		String line = value.toString();
		String[] fields = line.split("\t");

		String phone = fields[1];

		int upFlow = Integer.parseInt(fields[fields.length - 3]);
		int dFlow = Integer.parseInt(fields[fields.length - 2]);

		context.write(new Text(phone), new FlowBean(phone, upFlow, dFlow));
	}

}

FlowCountReducer.java

package flowCount;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class FlowCountReducer extends Reducer<Text, FlowBean, Text, FlowBean>{

	@Override
	protected void reduce(Text key, Iterable<FlowBean> values, Reducer<Text, FlowBean, Text, FlowBean>.Context context)
			throws IOException, InterruptedException {

		int upSum = 0;
		int dSum = 0;
		
		for(FlowBean value:values){
			upSum += value.getUpFlow();
			dSum += value.getdFlow();
		}
		
		
		context.write(key, new FlowBean(key.toString(), upSum, dSum));
		
	}
	

}

JobSubmitter.java

package flowCount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class JobSubmitter {

	public static void main(String[] args) throws Exception {

		Configuration conf = new Configuration();

		Job job = Job.getInstance(conf);

		job.setJarByClass(JobSubmitter.class);

		job.setMapperClass(FlowCountMapper.class);
		job.setReducerClass(FlowCountReducer.class);
		
		job.setNumReduceTasks(3);

		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(FlowBean.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(FlowBean.class);

		FileInputFormat.setInputPaths(job, new Path("/mrdata/flow/input"));
		FileOutputFormat.setOutputPath(job, new Path("/mrdata/flow/output"));

		job.waitForCompletion(true);

	}

}

运行步骤

  1. 提交待统计文本文件到HDFS中
hadoop fs -put 本地文件路径 HDFS绝对路径 

2.打包成flowCount.jar
3.执行命令

hadoop jar flowCount.jar flowCount.JobSubmitter