mapreduce-5.mapThe main() for MapReduce task processes. Map端

核心类

主程序相关的类

org.apache.hadoop.mapred.YarnChild The main() for MapReduce task processes.

main function

new JobConf from job.xml
using main args (contains AppMaster address) communicate with AppMaster, get org.apache.hadoop.mapred.MapTask from am
invoke MapTask run method

task任务执行类

org.apache.hadoop.mapred.MapTask

task任务执行类

框架API对外接口相关的类

框架API对外接口相关的类，具体上下万Context里包含有输入输出RecordReader/RecordWriter的实现

org.apache.hadoop.mapreduce.Mapper
org.apache.hadoop.mapreduce.Mapper.Context

输入input相关的类

org.apache.hadoop.mapreduce.RecordReader
- MapTask$NewTrackingRecordReader
- org.apache.hadoop.mapreduce.lib.input.LineRecordReader

输出onput相关的类

org.apache.hadoop.mapreduce.RecordWriter
- MapTask$NewOutputCollector
- MapTask$MapOutputBuffer

核心算法

Map端的shuffle过程是对Map的结果进行分区、排序、溢写、合并分区，最后写入磁盘；最终会得到一个分区有序的文件，即先按分区排序，再按key排序。

input-解决split切分数据单元问题

split是否会造成map处理数据单元被分割到两个split的问题，mapreduce框架并未考虑处理此问题，而是通过用户指定的InputFormat来自行解决处理。比如TextInputFormat的RecordReader进行初始化时，除第一个split之外，都会从第二行开始读取数据 LineRecordReader.initialize(InputSplit, TaskAttemptContext)

// If this is not the first split, we always throw away first record
// because we always (except the last split) read one extra line in
// next() method.
if (start != 0) {
  start += in.readLine(new Text(), 0, maxBytesToConsume(start));
}

output-sortAndSpill

MapTask$MapOutputBuffer.sortAndSpill()

private void sortAndSpill() throws IOException, ClassNotFoundException, InterruptedException {
  try {
    // create spill file
    final SpillRecord spillRec = new SpillRecord(partitions);
    final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);//ex: attempt_1611734493242_0026_m_000000_0_spill_0.out
    out = rfs.create(filename);

    final int mstart = kvend / NMETA;
    final int mend = 1 + // kvend is a valid record
      (kvstart >= kvend
      ? kvstart
      : kvmeta.capacity() + kvstart) / NMETA;
    sorter.sort(MapOutputBuffer.this, mstart, mend, reporter);//QuikSort排序
    int spindex = mstart;
    final IndexRecord rec = new IndexRecord();
    final InMemValBytes value = new InMemValBytes();
    for (int i = 0; i < partitions; ++i) {
      IFile.Writer<K, V> writer = null;
      try {
        long segmentStart = out.getPos();
        partitionOut = CryptoUtils.wrapIfNecessary(job, out, false);
        writer = new Writer<K, V>(job, partitionOut, keyClass, valClass, codec,
                                  spilledRecordsCounter);
        if (combinerRunner == null) {
          // spill directly
          DataInputBuffer key = new DataInputBuffer();
          while (spindex < mend &&
              kvmeta.get(offsetFor(spindex % maxRec) + PARTITION) == i) {
            final int kvoff = offsetFor(spindex % maxRec);
            int keystart = kvmeta.get(kvoff + KEYSTART);
            int valstart = kvmeta.get(kvoff + VALSTART);
            key.reset(kvbuffer, keystart, valstart - keystart);
            getVBytesForOffset(kvoff, value);
            writer.append(key, value);
            ++spindex;
          }
        } else {
          int spstart = spindex;
          while (spindex < mend &&
              kvmeta.get(offsetFor(spindex % maxRec)
                        + PARTITION) == i) {
            ++spindex;
          }
          // Note: we would like to avoid the combiner if we've fewer
          // than some threshold of records for a partition
          if (spstart != spindex) {//sort之后可能会触发combine操作，只有超过分区记录阈值
            combineCollector.setWriter(writer);
            RawKeyValueIterator kvIter =
              new MRResultIterator(spstart, spindex);
            combinerRunner.combine(kvIter, combineCollector);
          }
        }

        // close the writer
        writer.close();//写入spill文件
        if (partitionOut != out) {
          partitionOut.close();
          partitionOut = null;
        }

        // record offsets
        rec.startOffset = segmentStart;
        rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job);
        rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job);
        spillRec.putIndex(rec, i);

        writer = null;
      } finally {
        if (null != writer) writer.close();
      }
    }

    if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
      // create spill index file
      Path indexFilename =
          mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
              * MAP_OUTPUT_INDEX_RECORD_LENGTH);
      spillRec.writeToFile(indexFilename, job);
    } else {
      indexCacheList.add(spillRec);
      totalIndexCacheMemory +=
        spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
    }
    LOG.info("Finished spill " + numSpills);
    ++numSpills;
  } finally {
    if (out != null) out.close();
    if (partitionOut != null) {
      partitionOut.close();
    }
  }
}

combine

Task$NewCombiner.combine()

public void combine(RawKeyValueIterator iterator, OutputCollector<K,V> collector) throws IOException, InterruptedException, ClassNotFoundException {
  // make a reducer
  org.apache.hadoop.mapreduce.Reducer<K,V,K,V> reducer =
    (org.apache.hadoop.mapreduce.Reducer<K,V,K,V>)
      ReflectionUtils.newInstance(reducerClass, job);
  org.apache.hadoop.mapreduce.Reducer.Context 
       reducerContext = createReduceContext(reducer, job, taskId,
                                            iterator, null, inputCounter, 
                                            new OutputConverter(collector),
                                            committer,
                                            reporter, comparator, keyClass,
                                            valueClass);
  reducer.run(reducerContext);//combine就是在map阶段执行reduce操作
}

merge

MapTask.mergeParts()

  Path finalOutputFile =
      mapOutputFile.getOutputFileForWrite(finalOutFileSize);//最终文件输出结果：output/attempt_1611734493242_0026_m_000000_0/file.out
  Path finalIndexFile =
      mapOutputFile.getOutputIndexFileForWrite(finalIndexFileSize);//最终文件输出结果：output/attempt_1611734493242_0026_m_000000_0/file.out.index

  //The output stream for the final single output file
  FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);//创建file.out文件
  FSDataOutputStream finalPartitionOut = null;

  sortPhase.addPhases(partitions); // Divide sort phase into sub-phases

  IndexRecord rec = new IndexRecord();
  final SpillRecord spillRec = new SpillRecord(partitions);
  for (int parts = 0; parts < partitions; parts++) {
    //create the segments to be merged
    List<Segment<K,V>> segmentList =
      new ArrayList<Segment<K, V>>(numSpills);
    for(int i = 0; i < numSpills; i++) {
      IndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);
      Segment<K,V> s =
        new Segment<K,V>(job, rfs, filename[i], indexRecord.startOffset,
                         indexRecord.partLength, codec, true);
      segmentList.add(i, s);
      if (LOG.isDebugEnabled()) {
        LOG.debug("MapId=" + mapId + " Reducer=" + parts +
           "Spill =" + i + "(" + indexRecord.startOffset + "," +
            indexRecord.rawLength + ", " + indexRecord.partLength + ")");
      }
    }

    int mergeFactor = job.getInt(MRJobConfig.IO_SORT_FACTOR,
        MRJobConfig.DEFAULT_IO_SORT_FACTOR);
    // sort the segments only if there are intermediate merges
    boolean sortSegments = segmentList.size() > mergeFactor;
    //merge
    @SuppressWarnings("unchecked")
    RawKeyValueIterator kvIter = Merger.merge(job, rfs,
                   keyClass, valClass, codec,
                   segmentList, mergeFactor,
                   new Path(mapId.toString()),
                   job.getOutputKeyComparator(), reporter, sortSegments,
                   null, spilledRecordsCounter, sortPhase.phase(),
                   TaskType.MAP);
    //write merged output to disk
    long segmentStart = finalOut.getPos();
    finalPartitionOut = CryptoUtils.wrapIfNecessary(job, finalOut, false);
    Writer<K, V> writer =
        new Writer<K, V>(job, finalPartitionOut, keyClass, valClass, codec,
                         spilledRecordsCounter);
    if (combinerRunner == null || numSpills < minSpillsForCombine) {//如果spill文件个数小于默认3，则直接进行merge；否则进行combine
      Merger.writeFile(kvIter, writer, reporter, job);
    } else {
      combineCollector.setWriter(writer);
      combinerRunner.combine(kvIter, combineCollector);
    }

    //close
    writer.close();
    if (finalPartitionOut != finalOut) {
      finalPartitionOut.close();
      finalPartitionOut = null;
    }

    sortPhase.startNextPhase();

    // record offsets
    rec.startOffset = segmentStart;
    rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job);
    rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job);
    spillRec.putIndex(rec, parts);
  }
  spillRec.writeToFile(finalIndexFile, job);//写入file.out.index文件
  finalOut.close();//写入file.out文件
  if (finalPartitionOut != null) {
    finalPartitionOut.close();
  }
  for(int i = 0; i < numSpills; i++) {
    rfs.delete(filename[i],true);
  }
}