mapreduce-5.map

229 阅读2分钟

核心类

image.png

主程序相关的类

org.apache.hadoop.mapred.YarnChild The main() for MapReduce task processes.

  • main function
  1. new JobConf from job.xml
  2. using main args (contains AppMaster address) communicate with AppMaster, get org.apache.hadoop.mapred.MapTask from am
  3. invoke MapTask run method

task任务执行类

  • org.apache.hadoop.mapred.MapTask

task任务执行类

框架API对外接口相关的类

框架API对外接口相关的类,具体上下万Context里包含有输入输出RecordReader/RecordWriter的实现

  • org.apache.hadoop.mapreduce.Mapper
  • org.apache.hadoop.mapreduce.Mapper.Context

输入input相关的类

  • org.apache.hadoop.mapreduce.RecordReader
    • MapTask$NewTrackingRecordReader
    • org.apache.hadoop.mapreduce.lib.input.LineRecordReader

输出onput相关的类

  • org.apache.hadoop.mapreduce.RecordWriter
    • MapTask$NewOutputCollector
    • MapTask$MapOutputBuffer

核心算法

image.png Map端的shuffle过程是对Map的结果进行分区、排序、溢写、合并分区,最后写入磁盘;最终会得到一个分区有序的文件,即先按分区排序,再按key排序。

image.png

input-解决split切分数据单元问题

split是否会造成map处理数据单元被分割到两个split的问题,mapreduce框架并未考虑处理此问题,而是通过用户指定的InputFormat来自行解决处理。 比如TextInputFormat的RecordReader进行初始化时,除第一个split之外,都会从第二行开始读取数据 LineRecordReader.initialize(InputSplit, TaskAttemptContext)

// If this is not the first split, we always throw away first record
// because we always (except the last split) read one extra line in
// next() method.
if (start != 0) {
  start += in.readLine(new Text(), 0, maxBytesToConsume(start));
}

output-sortAndSpill

MapTask$MapOutputBuffer.sortAndSpill()

private void sortAndSpill() throws IOException, ClassNotFoundException, InterruptedException {
  try {
    // create spill file
    final SpillRecord spillRec = new SpillRecord(partitions);
    final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size);//ex: attempt_1611734493242_0026_m_000000_0_spill_0.out
    out = rfs.create(filename);

    final int mstart = kvend / NMETA;
    final int mend = 1 + // kvend is a valid record
      (kvstart >= kvend
      ? kvstart
      : kvmeta.capacity() + kvstart) / NMETA;
    sorter.sort(MapOutputBuffer.this, mstart, mend, reporter);//QuikSort排序
    int spindex = mstart;
    final IndexRecord rec = new IndexRecord();
    final InMemValBytes value = new InMemValBytes();
    for (int i = 0; i < partitions; ++i) {
      IFile.Writer<K, V> writer = null;
      try {
        long segmentStart = out.getPos();
        partitionOut = CryptoUtils.wrapIfNecessary(job, out, false);
        writer = new Writer<K, V>(job, partitionOut, keyClass, valClass, codec,
                                  spilledRecordsCounter);
        if (combinerRunner == null) {
          // spill directly
          DataInputBuffer key = new DataInputBuffer();
          while (spindex < mend &&
              kvmeta.get(offsetFor(spindex % maxRec) + PARTITION) == i) {
            final int kvoff = offsetFor(spindex % maxRec);
            int keystart = kvmeta.get(kvoff + KEYSTART);
            int valstart = kvmeta.get(kvoff + VALSTART);
            key.reset(kvbuffer, keystart, valstart - keystart);
            getVBytesForOffset(kvoff, value);
            writer.append(key, value);
            ++spindex;
          }
        } else {
          int spstart = spindex;
          while (spindex < mend &&
              kvmeta.get(offsetFor(spindex % maxRec)
                        + PARTITION) == i) {
            ++spindex;
          }
          // Note: we would like to avoid the combiner if we've fewer
          // than some threshold of records for a partition
          if (spstart != spindex) {//sort之后可能会触发combine操作,只有超过分区记录阈值
            combineCollector.setWriter(writer);
            RawKeyValueIterator kvIter =
              new MRResultIterator(spstart, spindex);
            combinerRunner.combine(kvIter, combineCollector);
          }
        }

        // close the writer
        writer.close();//写入spill文件
        if (partitionOut != out) {
          partitionOut.close();
          partitionOut = null;
        }

        // record offsets
        rec.startOffset = segmentStart;
        rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job);
        rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job);
        spillRec.putIndex(rec, i);

        writer = null;
      } finally {
        if (null != writer) writer.close();
      }
    }

    if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
      // create spill index file
      Path indexFilename =
          mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
              * MAP_OUTPUT_INDEX_RECORD_LENGTH);
      spillRec.writeToFile(indexFilename, job);
    } else {
      indexCacheList.add(spillRec);
      totalIndexCacheMemory +=
        spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
    }
    LOG.info("Finished spill " + numSpills);
    ++numSpills;
  } finally {
    if (out != null) out.close();
    if (partitionOut != null) {
      partitionOut.close();
    }
  }
}

combine

Task$NewCombiner.combine()

public void combine(RawKeyValueIterator iterator, OutputCollector<K,V> collector) throws IOException, InterruptedException, ClassNotFoundException {
  // make a reducer
  org.apache.hadoop.mapreduce.Reducer<K,V,K,V> reducer =
    (org.apache.hadoop.mapreduce.Reducer<K,V,K,V>)
      ReflectionUtils.newInstance(reducerClass, job);
  org.apache.hadoop.mapreduce.Reducer.Context 
       reducerContext = createReduceContext(reducer, job, taskId,
                                            iterator, null, inputCounter, 
                                            new OutputConverter(collector),
                                            committer,
                                            reporter, comparator, keyClass,
                                            valueClass);
  reducer.run(reducerContext);//combine就是在map阶段执行reduce操作
}

merge

MapTask.mergeParts()

  Path finalOutputFile =
      mapOutputFile.getOutputFileForWrite(finalOutFileSize);//最终文件输出结果:output/attempt_1611734493242_0026_m_000000_0/file.out
  Path finalIndexFile =
      mapOutputFile.getOutputIndexFileForWrite(finalIndexFileSize);//最终文件输出结果:output/attempt_1611734493242_0026_m_000000_0/file.out.index

  //The output stream for the final single output file
  FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);//创建file.out文件
  FSDataOutputStream finalPartitionOut = null;

  sortPhase.addPhases(partitions); // Divide sort phase into sub-phases

  IndexRecord rec = new IndexRecord();
  final SpillRecord spillRec = new SpillRecord(partitions);
  for (int parts = 0; parts < partitions; parts++) {
    //create the segments to be merged
    List<Segment<K,V>> segmentList =
      new ArrayList<Segment<K, V>>(numSpills);
    for(int i = 0; i < numSpills; i++) {
      IndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);
      Segment<K,V> s =
        new Segment<K,V>(job, rfs, filename[i], indexRecord.startOffset,
                         indexRecord.partLength, codec, true);
      segmentList.add(i, s);
      if (LOG.isDebugEnabled()) {
        LOG.debug("MapId=" + mapId + " Reducer=" + parts +
           "Spill =" + i + "(" + indexRecord.startOffset + "," +
            indexRecord.rawLength + ", " + indexRecord.partLength + ")");
      }
    }

    int mergeFactor = job.getInt(MRJobConfig.IO_SORT_FACTOR,
        MRJobConfig.DEFAULT_IO_SORT_FACTOR);
    // sort the segments only if there are intermediate merges
    boolean sortSegments = segmentList.size() > mergeFactor;
    //merge
    @SuppressWarnings("unchecked")
    RawKeyValueIterator kvIter = Merger.merge(job, rfs,
                   keyClass, valClass, codec,
                   segmentList, mergeFactor,
                   new Path(mapId.toString()),
                   job.getOutputKeyComparator(), reporter, sortSegments,
                   null, spilledRecordsCounter, sortPhase.phase(),
                   TaskType.MAP);
    //write merged output to disk
    long segmentStart = finalOut.getPos();
    finalPartitionOut = CryptoUtils.wrapIfNecessary(job, finalOut, false);
    Writer<K, V> writer =
        new Writer<K, V>(job, finalPartitionOut, keyClass, valClass, codec,
                         spilledRecordsCounter);
    if (combinerRunner == null || numSpills < minSpillsForCombine) {//如果spill文件个数小于默认3,则直接进行merge;否则进行combine
      Merger.writeFile(kvIter, writer, reporter, job);
    } else {
      combineCollector.setWriter(writer);
      combinerRunner.combine(kvIter, combineCollector);
    }

    //close
    writer.close();
    if (finalPartitionOut != finalOut) {
      finalPartitionOut.close();
      finalPartitionOut = null;
    }

    sortPhase.startNextPhase();

    // record offsets
    rec.startOffset = segmentStart;
    rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job);
    rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job);
    spillRec.putIndex(rec, parts);
  }
  spillRec.writeToFile(finalIndexFile, job);//写入file.out.index文件
  finalOut.close();//写入file.out文件
  if (finalPartitionOut != null) {
    finalPartitionOut.close();
  }
  for(int i = 0; i < numSpills; i++) {
    rfs.delete(filename[i],true);
  }
}