|
| 1 | +import java.io.*; |
| 2 | +import java.util.*; |
| 3 | + |
| 4 | +/* display the most frequent K words in the file and the times it appear |
| 5 | + in the file – shown in order (ignore case and periods) */ |
| 6 | + |
| 7 | +public class TopKWords { |
| 8 | + public static void main(String[] a) { |
| 9 | + // you can replace the filePath with yours |
| 10 | + CountWords cw = new CountWords("/Users/lisanaaa/Desktop/words.txt"); |
| 11 | + Map<String, Integer> dictionary = cw.getDictionary(); // get the words dictionary: {word: frequency} |
| 12 | + |
| 13 | + // we change the map to list for convenient sort |
| 14 | + List<Map.Entry<String, Integer>> list = new ArrayList<>(dictionary.entrySet()); |
| 15 | + |
| 16 | + // sort by lambda valueComparator |
| 17 | + list.sort(Comparator.comparing( |
| 18 | + m -> m.getValue()) |
| 19 | + ); |
| 20 | + |
| 21 | + Scanner input = new Scanner(System.in); |
| 22 | + Integer k = new Integer(input.nextLine()); |
| 23 | + while (k > list.size()) { |
| 24 | + System.out.println("Retype a number, your number is too large"); |
| 25 | + input = new Scanner(System.in); |
| 26 | + k = new Integer(input.nextLine()); |
| 27 | + } |
| 28 | + for (int i = 0; i<k; i++) { |
| 29 | + System.out.println(list.get(list.size()-i-1)); |
| 30 | + } |
| 31 | + } |
| 32 | +} |
| 33 | + |
| 34 | +class CountWords { |
| 35 | + private String fileName; |
| 36 | + |
| 37 | + public CountWords(String fileName) { |
| 38 | + this.fileName = fileName; |
| 39 | + } |
| 40 | + |
| 41 | + public Map<String, Integer> getDictionary() { |
| 42 | + Map<String, Integer> dictionary = new HashMap<>(); |
| 43 | + FileInputStream fis = null; |
| 44 | + |
| 45 | + try { |
| 46 | + |
| 47 | + fis = new FileInputStream(fileName); // open the file |
| 48 | + int in = 0; |
| 49 | + StringBuffer sb = new StringBuffer(); // load the word |
| 50 | + in = fis.read(); // read one character |
| 51 | + boolean notEnd = true; // signal whether is the end of file |
| 52 | + |
| 53 | + while (notEnd) { |
| 54 | + // when in == -1 means get the end of the file |
| 55 | + if (-1 == in) { |
| 56 | + notEnd = false; //if false, end the while loop |
| 57 | + } |
| 58 | + if (Character.isLetter((char)in)) { |
| 59 | + sb.append((char)in); //if get a letter, put it in StringBuffer |
| 60 | + } else { |
| 61 | + // this branch means an entire word has just been read |
| 62 | + if (sb.length() > 0) { |
| 63 | + //see whether word exists in StringBuffer or not |
| 64 | + if (dictionary.containsKey(sb.toString())) { |
| 65 | + //if exist, count++ |
| 66 | + dictionary.put(sb.toString(), dictionary.get(sb.toString()) + 1); |
| 67 | + } else { |
| 68 | + // if not exist, initiate count of this word with 1 |
| 69 | + dictionary.put(sb.toString(), 1); |
| 70 | + } |
| 71 | + } |
| 72 | + sb = new StringBuffer(); //reload the StringBuffer |
| 73 | + } |
| 74 | + in = fis.read(); //read the character |
| 75 | + } |
| 76 | + return dictionary; |
| 77 | + } |
| 78 | + catch (IOException e) { |
| 79 | + e.printStackTrace(); |
| 80 | + } |
| 81 | + finally { |
| 82 | + try { |
| 83 | + // you always have to close the I/O streams |
| 84 | + fis.close(); |
| 85 | + } |
| 86 | + catch (IOException e) { |
| 87 | + e.printStackTrace(); |
| 88 | + } |
| 89 | + } |
| 90 | + return null; |
| 91 | + } |
| 92 | +} |
0 commit comments