java超快速文本去重复源码

324 阅读1分钟

把代码过程经常用到的一些代码记录起来,下面的代码内容是关于java超快速文本去重复的代码。

import java.util.HashSet; import java.util.Iterator; import java.util.Set;

public class SpeedClear {

public static void main(String[] args) {
	if(args.length==0){
		print();
		System.exit(1);
	}
	if(args.length!=2){
		System.out.println("Format error...");
		System.exit(1);
	}
	String pathname = args[0];
	String newPath = args[1];
}

public static void clear(String pathname, String newPath) {

	System.out.println("Start... ");
	

		File file = new File(pathname);
	BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file));
	


		OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(new File(newPath)),"utf-8") ;

		Set<String> set = new HashSet<String>();
		int x = 0;
			if(x%30000==0){
				System.out.print("..") ;
			}
			x++;
		}
		fis.close();
		
		
		for (String xxser : set) {
			out.write(xxser+"rn");
			
		}
		System.out.println("") ;
		System.out.println("size = " + set.size());
		System.out.println("End...");
	}catch(Exception e){
	
		System.out.println("文件太大了,建议先100MB大小..") ;
	}
	
	
}


	public static void  print(){
	System.out.println("ttTo repeat tt");
	System.out.println();
	System.out.println("  format: java -Xmx1000m SpeedClear c:\old.txt c:\new.txttt");
	System.out.println();
	System.out.println("ttAuthor:xxser	QQ:616100108");
	
}

}