Java 中,怎么获取一个文件中单词出现的最高频率?

57 阅读2分钟

import java.util.List;

import java.util.Map;

public class Count {

public static void main(String[] args) {

long start = System.currentTimeMillis();

BufferedReader reader = new BufferedReader(new FileReader(

"D:\main.txt"));

StringBuffer buffer = new StringBuffer();

String line = null;

while ((line = reader.readLine()) != null) {

buffer.append(line);

}

reader.close();

String str = = buffer.toString();

str = str.replace(',', ' ');//将逗号用空格替换

str = str.replace('.', ' ');//将句号用空格替换

String[] strings = str.split("\s+"); // “\s+”代表一个或多个空格,是正则表达式

// String[] strings = str.split(" +"); // “ +”在我的机器上也能代表一个或多个空格

Map<String, Integer> map = new HashMap<String, Integer>();

List list = new ArrayList();//存储每个不重复的单词

for(String s : strings){

if(map.containsKey(s)){//如果map中已经包含该单词,则将其个数+1

int x = map.get(s);

x++;

map.put(s, x);

}else{ //如果map中没用包含该单词,代表该单词第一次出现,则将其放入map并将个数设置为1

map.put(s, 1);

list.add(s);//将其添加到list中,代表它是一个新出现的单词

}

}

int max=0;//记录出现次数最多的那个单词的出现次数

String maxString = null;//记录出现次数最多的那个单词的值

/*

  • 从list中取出每个单词,在map中查找其出现次数

  • 并没有真正排序,而只是记录下出现次数最多的那个单词

*/

for(String s : list){

int x = map.get(s);

if(x>max){

maxString = s;

max = x;

}

}

System.out.println(maxString);

long end = System.currentTimeMillis();

System.out.println("共耗时:" + (end - start) + "毫秒");

}

}

2.java统计一串英文出现频率最高的字母


package com.algorithm.interview;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileReader;

import java.util.Collections;

import java.util.HashMap;

import java.util.LinkedList;

import java.util.List;

import java.util.Map;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

public class Frequency {

public void count() throws Exception {

File file = new File("D:\main.txt");

FileReader fileReader = new FileReader(file);

BufferedReader reader = new BufferedReader(fileReader);

StringBuilder builder = new StringBuilder();

String line = "";

while ((line = reader.readLine()) != null) {

builder.append(line);

}

Pattern pattern = Pattern.compile("[a-zA-Z]+");

String content = builder.toString();

Matcher matcher = pattern.matcher(content);

Map<String, Integer> map = new HashMap<String, Integer>();

String word = "";

Integer times = 0;

while (matcher.find()) {

word = matcher.group();

if (map.containsKey(word)) {

times = map.get(word);

map.put(word, times+1);

} else {

map.put(word, 1);

}

}

List<Map.Entry<String, Integer>> list = new LinkedList<Map.Entry<String, Integer>>(

map.entrySet());// put Entry to List

Compare compare = new Compare();//rewrite Comparator

for (int i = 0; i < 5; i++) {

Map.Entry<String, Integer> entry = Collections.max(list, compare);// max

String key = entry.getKey();

Integer value = entry.getValue();

int index = list.indexOf(entry);//get max's index

System.out.println(key + " " + value);

list.remove(index);//remove max

}

}

public static void main(String[] args) {

try {

Frequency frequency = new Frequency();

frequency.count();

} catch (Exception e) {

e.printStackTrace();

}

}

}

3.java统计一串英文单词字数并排序


import java.util.ArrayList;

import java.util.Collections;

import java.util.Comparator;

import java.util.HashMap;

import java.util.List;

import java.util.Map;

import java.util.StringTokenizer;

import java.util.Map.Entry;

public class wordCount {

public static void main(String arg[]) {

int wordCount = 0; //用于统计单词的总个数

Map<String, Integer> map = new HashMap<String, Integer>();//用于统计各个单词的个数,排序