From 331b841a5cf2ccfce6b3e0364c14244971895c64 Mon Sep 17 00:00:00 2001 From: David Foster Date: Sun, 5 Jan 2014 20:42:47 -0800 Subject: [PATCH] things: Port to Java. --- 10-things/tf_10.java | 179 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 10-things/tf_10.java diff --git a/10-things/tf_10.java b/10-things/tf_10.java new file mode 100644 index 0000000..84cd7f6 --- /dev/null +++ b/10-things/tf_10.java @@ -0,0 +1,179 @@ +import java.io.*; +import java.util.*; + +public class tf_10 { + /* + * The main function + */ + public static void main(String[] args) throws IOException { + new WordFrequencyController(args[0]).run(); + } +} + +/* + * The classes + */ + +abstract class TFExercise { + public String getInfo() { + return this.getClass().getName(); + } +} + +class WordFrequencyController extends TFExercise { + private DataStorageManager storageManager; + private StopWordManager stopWordManager; + private WordFrequencyManager wordFreqManager; + + public WordFrequencyController(String pathToFile) throws IOException { + this.storageManager = new DataStorageManager(pathToFile); + this.stopWordManager = new StopWordManager(); + this.wordFreqManager = new WordFrequencyManager(); + } + + public void run() { + for (String word : this.storageManager.calculateWords()) { + if (!this.stopWordManager.isStopWord(word)) { + this.wordFreqManager.incrementCount(word); + } + } + + int numWordsPrinted = 0; + for (WordFrequencyPair pair : this.wordFreqManager.sorted()) { + System.out.println(pair.getWord() + " - " + pair.getFrequency()); + + numWordsPrinted++; + if (numWordsPrinted >= 25) { + break; + } + } + } +} + +/** Models the contents of the file. */ +class DataStorageManager extends TFExercise { + private String data; + + public DataStorageManager(String pathToFile) throws IOException { + byte[] dataBytes; + RandomAccessFile f = new RandomAccessFile(new File(pathToFile), "r"); + try { + dataBytes = new byte[(int) f.length()]; + f.readFully(dataBytes); + } finally { + f.close(); + } + + this.data = new String(dataBytes, "UTF-8").replaceAll("[\\W_]+", " ").toLowerCase(); + } + + public String[] calculateWords() { + return this.data.split("\\s+"); + } + + public String getInfo() { + return super.getInfo() + ": My major data structure is a " + this.data.getClass().getName(); + } +} + +/** Models the stop word filter. */ +class StopWordManager extends TFExercise { + private Set stopWords; + + public StopWordManager() throws IOException { + this.stopWords = new HashSet(); + + Scanner f = new Scanner(new File("../stop_words.txt"), "UTF-8"); + try { + f.useDelimiter(","); + while (f.hasNext()) { + this.stopWords.add(f.next()); + } + } finally { + f.close(); + } + + // Add single-letter words + for (char c = 'a'; c <= 'z'; c++) { + this.stopWords.add("" + c); + } + } + + public boolean isStopWord(String word) { + return this.stopWords.contains(word); + } + + public String getInfo() { + return super.getInfo() + ": My major data structure is a " + this.stopWords.getClass().getName(); + } +} + +/** Keeps the word frequency data. */ +class WordFrequencyManager extends TFExercise { + private Map wordFreqs; + + public WordFrequencyManager() { + this.wordFreqs = new HashMap(); + } + + public void incrementCount(String word) { + MutableInteger count = this.wordFreqs.get(word); + if (count == null) { + this.wordFreqs.put(word, new MutableInteger(1)); + } else { + count.setValue(count.getValue() + 1); + } + } + + public List sorted() { + List pairs = new ArrayList(); + for (Map.Entry entry : wordFreqs.entrySet()) { + pairs.add(new WordFrequencyPair(entry.getKey(), entry.getValue().getValue())); + } + Collections.sort(pairs); + Collections.reverse(pairs); + return pairs; + } + + public String getInfo() { + return super.getInfo() + ": My major data structure is a " + this.wordFreqs.getClass().getName(); + } +} + +class MutableInteger { + private int value; + + public MutableInteger(int value) { + this.value = value; + } + + public int getValue() { + return value; + } + + public void setValue(int value) { + this.value = value; + } +} + +class WordFrequencyPair implements Comparable { + private String word; + private int frequency; + + public WordFrequencyPair(String word, int frequency) { + this.word = word; + this.frequency = frequency; + } + + public String getWord() { + return word; + } + + public int getFrequency() { + return frequency; + } + + public int compareTo(WordFrequencyPair other) { + return this.frequency - other.frequency; + } +} \ No newline at end of file