# BAT 经典算法笔试题 —— 磁盘多路归并排序

·  阅读 9870

## 算法思路

1. 如果取出来的元素和当前数组中的最小元素相等，那么就可以直接将这个元素输出。再继续下一轮循环。不可能取出比当前数组最小元素还要小的元素，因为输入文件本身也是有序的。

1. 否则就需要将元素插入到当前的数组中的指定位置，继续保持数组有序。然后将数组中当前最小的元素输出并移除。再进行下一轮循环。

3. 如果遇到文件结尾，那就无法继续调用 next() 方法了，这时可以直接将数组中的最小元素输出并移除，数组也跟着变小了。再进行下一轮循环。当数组空了，说明所有的文件都处理完了，算法就可以结束了。

## 二分查找

``````public class Collections {
...
public static <T> int binarySearch(List<T> list, T key) {
...
if (found) {
return index;
} else {
return -(insertIndex+1);
}
}
...
}

## 输入文件类

``````class MergeSource implements Closeable {
private String cachedLine;
private String filename;

public MergeSource(String filename) {
this.filename = filename;
try {
} catch (FileNotFoundException e) {
}
}

public boolean hasNext() {
String line;
try {
if (line == null || line.isEmpty()) {
return false;
}
this.cachedLine = line.trim();
return true;
} catch (IOException e) {
}
return false;
}

public int next() {
if (this.cachedLine == null) {
if (!hasNext()) {
throw new IllegalStateException("no content");
}
}
int num = Integer.parseInt(this.cachedLine);
this.cachedLine = null;
return num;
}

@Override
public void close() throws IOException {
}
}

## 内存有序数组元素类

``````class Bin implements Comparable<Bin> {
int num;
MergeSource source;

Bin(MergeSource source, int num) {
this.source = source;
this.num = num;
}

@Override
public int compareTo(Bin o) {
return this.num - o.num;
}

}

List<Bin> prepare() {
List<Bin> bins = new ArrayList<>();
for (MergeSource source : sources) {
Bin newBin = new Bin(source, source.next());
}
Collections.sort(bins);
return bins;
}

## 输出文件类

``````class MergeOut implements Closeable {
private PrintWriter writer;

public MergeOut(String filename) {
try {
FileOutputStream out = new FileOutputStream(filename);
this.writer = new PrintWriter(out);
} catch (FileNotFoundException e) {
}
}

public void write(Bin bin) {
writer.println(bin.num);
}

@Override
public void close() throws IOException {
writer.flush();
writer.close();
}
}

## 准备输入文件的内容

``````List<String> generateFiles(int n, int minEntries, int maxEntries) {
List<String> files = new ArrayList<>();
for (int i = 0; i < n; i++) {
String filename = "input-" + i + ".txt";
PrintWriter writer;
try {
writer = new PrintWriter(new FileOutputStream(filename));
int entries = rand.nextInt(minEntries, maxEntries);
List<Integer> nums = new ArrayList<>();
for (int k = 0; k < entries; k++) {
int num = rand.nextInt(10000000);
}
Collections.sort(nums);
for (int num : nums) {
writer.println(num);
}
writer.flush();
writer.close();
} catch (FileNotFoundException e) {
}
}
return files;
}

## 排序算法

``````public void sort() {
List<Bin> bins = prepare();
while (true) {
// 取数组中最小的元素
MergeSource current = bins.get(0).source;
if (current.hasNext()) {
// 从输入文件中取出下一个元素
Bin newBin = new Bin(current, current.next());
// 二分查找，也就是和数组中已有元素进行比较
int index = Collections.binarySearch(bins, newBin);
if (index == 0) {
// 算法思路情况1
this.out.write(newBin);
} else {
// 算法思路情况2
if (index < 0) {
index = -(index+1）;
}
Bin minBin = bins.remove(0);
this.out.write(minBin);
}
} else {
// 算法思路情况3:遇到文件尾
Bin minBin = bins.remove(0);
this.out.write(minBin);
if (bins.isEmpty()) {
break;
}
}
}
}

## 全部代码

``````package leetcode;

import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class DiskMergeSort implements Closeable {

public static List<String> generateFiles(int n, int minEntries, int maxEntries) {
List<String> files = new ArrayList<>();
for (int i = 0; i < n; i++) {
String filename = "input-" + i + ".txt";
PrintWriter writer;
try {
writer = new PrintWriter(new FileOutputStream(filename));
List<Integer> nums = new ArrayList<>();
for (int k = 0; k < entries; k++) {
}
Collections.sort(nums);
for (int num : nums) {
writer.println(num);
}
writer.close();
} catch (FileNotFoundException e) {
}
}
return files;
}

private List<MergeSource> sources;
private MergeOut out;

public DiskMergeSort(List<String> files, String outFilename) {
this.sources = new ArrayList<>();
for (String filename : files) {
}
this.out = new MergeOut(outFilename);
}

static class MergeOut implements Closeable {
private PrintWriter writer;

public MergeOut(String filename) {
try {
this.writer = new PrintWriter(new FileOutputStream(filename));
} catch (FileNotFoundException e) {
}
}

public void write(Bin bin) {
writer.println(bin.num);
}

@Override
public void close() throws IOException {
writer.flush();
writer.close();
}
}

static class MergeSource implements Closeable {
private String cachedLine;

public MergeSource(String filename) {
try {
} catch (FileNotFoundException e) {
}
}

public boolean hasNext() {
String line;
try {
if (line == null || line.isEmpty()) {
return false;
}
this.cachedLine = line.trim();
return true;
} catch (IOException e) {
}
return false;
}

public int next() {
if (this.cachedLine == null) {
if (!hasNext()) {
throw new IllegalStateException("no content");
}
}
int num = Integer.parseInt(this.cachedLine);
this.cachedLine = null;
return num;
}

@Override
public void close() throws IOException {
}
}

static class Bin implements Comparable<Bin> {
int num;
MergeSource source;

Bin(MergeSource source, int num) {
this.source = source;
this.num = num;
}

@Override
public int compareTo(Bin o) {
return this.num - o.num;
}
}

public List<Bin> prepare() {
List<Bin> bins = new ArrayList<>();
for (MergeSource source : sources) {
Bin newBin = new Bin(source, source.next());
}
Collections.sort(bins);
return bins;
}

public void sort() {
List<Bin> bins = prepare();
while (true) {
MergeSource current = bins.get(0).source;
if (current.hasNext()) {
Bin newBin = new Bin(current, current.next());
int index = Collections.binarySearch(bins, newBin);
if (index == 0 || index == -1) {
this.out.write(newBin);
if (index == -1) {
throw new IllegalStateException("impossible");
}
} else {
if (index < 0) {
index = -index - 1;
}
Bin minBin = bins.remove(0);
this.out.write(minBin);
}
} else {
Bin minBin = bins.remove(0);
this.out.write(minBin);
if (bins.isEmpty()) {
break;
}
}
}
}

@Override
public void close() throws IOException {
for (MergeSource source : sources) {
source.close();
}
this.out.close();
}

public static void main(String[] args) throws IOException {
List<String> inputs = DiskMergeSort.generateFiles(100, 10000, 20000);
// 运行多次看算法耗时
for (int i = 0; i < 20; i++) {
DiskMergeSort sorter = new DiskMergeSort(inputs, "output.txt");
long start = System.currentTimeMillis();
sorter.sort();
long duration = System.currentTimeMillis() - start;
System.out.printf("%dms\n", duration);
sorter.close();
}
}
}