核心类
主程序相关的类
org.apache.hadoop.mapred.YarnChild The main() for MapReduce task processes.
- main function
- new JobConf from job.xml
- using main args (contains AppMaster address) communicate with AppMaster, get org.apache.hadoop.mapred.MapTask from am
- invoke MapTask run method
task任务执行类
- org.apache.hadoop.mapred.MapTask
task任务执行类
框架API对外接口相关的类
框架API对外接口相关的类,具体上下万Context里包含有输入输出RecordReader/RecordWriter的实现
- org.apache.hadoop.mapreduce.Mapper
- org.apache.hadoop.mapreduce.Mapper.Context
输入input相关的类
- org.apache.hadoop.mapreduce.RecordReader
- MapTask$NewTrackingRecordReader
- org.apache.hadoop.mapreduce.lib.input.LineRecordReader
输出onput相关的类
- org.apache.hadoop.mapreduce.RecordWriter
- MapTask$NewOutputCollector
- MapTask$MapOutputBuffer
核心算法
Map端的shuffle过程是对Map的结果进行分区、排序、溢写、合并分区,最后写入磁盘;最终会得到一个分区有序的文件,即先按分区排序,再按key排序。
input-解决split切分数据单元问题
split是否会造成map处理数据单元被分割到两个split的问题,mapreduce框架并未考虑处理此问题,而是通过用户指定的InputFormat来自行解决处理。 比如TextInputFormat的RecordReader进行初始化时,除第一个split之外,都会从第二行开始读取数据 LineRecordReader.initialize(InputSplit, TaskAttemptContext)
// If this is not the first split, we always throw away first record
// because we always (except the last split) read one extra line in
// next() method.
if (start != 0) {
start += in.readLine(new Text(), 0, maxBytesToConsume(start));
}
output-sortAndSpill
MapTask$MapOutputBuffer.sortAndSpill()
private void sortAndSpill() throws IOException, ClassNotFoundException, InterruptedException {
try {
// create spill file
final SpillRecord spillRec = new SpillRecord(partitions);
final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);//ex: attempt_1611734493242_0026_m_000000_0_spill_0.out
out = rfs.create(filename);
final int mstart = kvend / NMETA;
final int mend = 1 + // kvend is a valid record
(kvstart >= kvend
? kvstart
: kvmeta.capacity() + kvstart) / NMETA;
sorter.sort(MapOutputBuffer.this, mstart, mend, reporter);//QuikSort排序
int spindex = mstart;
final IndexRecord rec = new IndexRecord();
final InMemValBytes value = new InMemValBytes();
for (int i = 0; i < partitions; ++i) {
IFile.Writer<K, V> writer = null;
try {
long segmentStart = out.getPos();
partitionOut = CryptoUtils.wrapIfNecessary(job, out, false);
writer = new Writer<K, V>(job, partitionOut, keyClass, valClass, codec,
spilledRecordsCounter);
if (combinerRunner == null) {
// spill directly
DataInputBuffer key = new DataInputBuffer();
while (spindex < mend &&
kvmeta.get(offsetFor(spindex % maxRec) + PARTITION) == i) {
final int kvoff = offsetFor(spindex % maxRec);
int keystart = kvmeta.get(kvoff + KEYSTART);
int valstart = kvmeta.get(kvoff + VALSTART);
key.reset(kvbuffer, keystart, valstart - keystart);
getVBytesForOffset(kvoff, value);
writer.append(key, value);
++spindex;
}
} else {
int spstart = spindex;
while (spindex < mend &&
kvmeta.get(offsetFor(spindex % maxRec)
+ PARTITION) == i) {
++spindex;
}
// Note: we would like to avoid the combiner if we've fewer
// than some threshold of records for a partition
if (spstart != spindex) {//sort之后可能会触发combine操作,只有超过分区记录阈值
combineCollector.setWriter(writer);
RawKeyValueIterator kvIter =
new MRResultIterator(spstart, spindex);
combinerRunner.combine(kvIter, combineCollector);
}
}
// close the writer
writer.close();//写入spill文件
if (partitionOut != out) {
partitionOut.close();
partitionOut = null;
}
// record offsets
rec.startOffset = segmentStart;
rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job);
rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job);
spillRec.putIndex(rec, i);
writer = null;
} finally {
if (null != writer) writer.close();
}
}
if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
// create spill index file
Path indexFilename =
mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
* MAP_OUTPUT_INDEX_RECORD_LENGTH);
spillRec.writeToFile(indexFilename, job);
} else {
indexCacheList.add(spillRec);
totalIndexCacheMemory +=
spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
}
LOG.info("Finished spill " + numSpills);
++numSpills;
} finally {
if (out != null) out.close();
if (partitionOut != null) {
partitionOut.close();
}
}
}
combine
Task$NewCombiner.combine()
public void combine(RawKeyValueIterator iterator, OutputCollector<K,V> collector) throws IOException, InterruptedException, ClassNotFoundException {
// make a reducer
org.apache.hadoop.mapreduce.Reducer<K,V,K,V> reducer =
(org.apache.hadoop.mapreduce.Reducer<K,V,K,V>)
ReflectionUtils.newInstance(reducerClass, job);
org.apache.hadoop.mapreduce.Reducer.Context
reducerContext = createReduceContext(reducer, job, taskId,
iterator, null, inputCounter,
new OutputConverter(collector),
committer,
reporter, comparator, keyClass,
valueClass);
reducer.run(reducerContext);//combine就是在map阶段执行reduce操作
}
merge
MapTask.mergeParts()
Path finalOutputFile =
mapOutputFile.getOutputFileForWrite(finalOutFileSize);//最终文件输出结果:output/attempt_1611734493242_0026_m_000000_0/file.out
Path finalIndexFile =
mapOutputFile.getOutputIndexFileForWrite(finalIndexFileSize);//最终文件输出结果:output/attempt_1611734493242_0026_m_000000_0/file.out.index
//The output stream for the final single output file
FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);//创建file.out文件
FSDataOutputStream finalPartitionOut = null;
sortPhase.addPhases(partitions); // Divide sort phase into sub-phases
IndexRecord rec = new IndexRecord();
final SpillRecord spillRec = new SpillRecord(partitions);
for (int parts = 0; parts < partitions; parts++) {
//create the segments to be merged
List<Segment<K,V>> segmentList =
new ArrayList<Segment<K, V>>(numSpills);
for(int i = 0; i < numSpills; i++) {
IndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);
Segment<K,V> s =
new Segment<K,V>(job, rfs, filename[i], indexRecord.startOffset,
indexRecord.partLength, codec, true);
segmentList.add(i, s);
if (LOG.isDebugEnabled()) {
LOG.debug("MapId=" + mapId + " Reducer=" + parts +
"Spill =" + i + "(" + indexRecord.startOffset + "," +
indexRecord.rawLength + ", " + indexRecord.partLength + ")");
}
}
int mergeFactor = job.getInt(MRJobConfig.IO_SORT_FACTOR,
MRJobConfig.DEFAULT_IO_SORT_FACTOR);
// sort the segments only if there are intermediate merges
boolean sortSegments = segmentList.size() > mergeFactor;
//merge
@SuppressWarnings("unchecked")
RawKeyValueIterator kvIter = Merger.merge(job, rfs,
keyClass, valClass, codec,
segmentList, mergeFactor,
new Path(mapId.toString()),
job.getOutputKeyComparator(), reporter, sortSegments,
null, spilledRecordsCounter, sortPhase.phase(),
TaskType.MAP);
//write merged output to disk
long segmentStart = finalOut.getPos();
finalPartitionOut = CryptoUtils.wrapIfNecessary(job, finalOut, false);
Writer<K, V> writer =
new Writer<K, V>(job, finalPartitionOut, keyClass, valClass, codec,
spilledRecordsCounter);
if (combinerRunner == null || numSpills < minSpillsForCombine) {//如果spill文件个数小于默认3,则直接进行merge;否则进行combine
Merger.writeFile(kvIter, writer, reporter, job);
} else {
combineCollector.setWriter(writer);
combinerRunner.combine(kvIter, combineCollector);
}
//close
writer.close();
if (finalPartitionOut != finalOut) {
finalPartitionOut.close();
finalPartitionOut = null;
}
sortPhase.startNextPhase();
// record offsets
rec.startOffset = segmentStart;
rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job);
rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job);
spillRec.putIndex(rec, parts);
}
spillRec.writeToFile(finalIndexFile, job);//写入file.out.index文件
finalOut.close();//写入file.out文件
if (finalPartitionOut != null) {
finalPartitionOut.close();
}
for(int i = 0; i < numSpills; i++) {
rfs.delete(filename[i],true);
}
}