From 6148cff95e9f2ed6d6e9c85944045ea6dbe826a4 Mon Sep 17 00:00:00 2001 From: Joergen Edelbo Date: Tue, 8 Oct 2013 16:26:03 +0200 Subject: [PATCH] Added C++ implementation of 06-candy-factory. You will have to compile (make) the program before running the test. --- 06-candy-factory/Makefile | 8 ++ 06-candy-factory/tf-06.cpp | 180 ++++++++++++++++++++++++++++++++ 22-declared-intentions/tf-22.py | 0 3 files changed, 188 insertions(+) create mode 100644 06-candy-factory/Makefile create mode 100644 06-candy-factory/tf-06.cpp mode change 100644 => 100755 22-declared-intentions/tf-22.py diff --git a/06-candy-factory/Makefile b/06-candy-factory/Makefile new file mode 100644 index 0000000..e161935 --- /dev/null +++ b/06-candy-factory/Makefile @@ -0,0 +1,8 @@ +CPP_FLAGS=-std=gnu++0x + +tf-06: tf-06.cpp + $(CXX) $(CPP_FLAGS) -o $@ $< + +.PHONY: clean +clean: + rm tf-06 diff --git a/06-candy-factory/tf-06.cpp b/06-candy-factory/tf-06.cpp new file mode 100644 index 0000000..ede7f87 --- /dev/null +++ b/06-candy-factory/tf-06.cpp @@ -0,0 +1,180 @@ +#include +#include +#include +#include +#include + +using namespace std; + +struct Freq { + string word; + int freq; + Freq(string w, int f) : word(w), freq(f) {} +}; + + +// +// Helpers +// + +static int tospace(int c) +{ + if (!isalpha(c)) + return ' '; + else + return c; +} + +static vector get_stop_words() +{ + string word; + vector stop_words; + ifstream is("../stop_words.txt"); + + while (getline(is, word, ',')) { + stop_words.push_back(word); + } + + char w[2]; + w[1] = '\0'; + for (char c : "abcdefghijklmopqrstuvwxyz") { + w[0] = c; + stop_words.push_back(string(w)); + } + + sort(stop_words.begin(), stop_words.end()); + + return stop_words; +} + +static bool sort_by_freq(Freq x, Freq y) +{ + return y.freq < x.freq; +} + +// +// The functions +// + +/** Takes a path to a file and returns the entire + contents of the file as a string +*/ +string read_file(const char* path_to_file) +{ + string data; + ifstream is(path_to_file, std::ifstream::binary); + + if (is) { + is.seekg (0, is.end); + int length = is.tellg(); + is.seekg (0, is.beg); + + char* buffer = new char [length + 1]; + is.read(buffer,length); + buffer[length] = '\0'; + data = buffer; + delete [] buffer; + } + return data; +} + +/** Takes a string and returns a copy with all nonalphanumeric + chars replaced by white space +*/ +string filter_chars(string str_data) +{ + std::transform(str_data.begin(), str_data.end(), str_data.begin(), ::tospace); + return str_data; +} + + +/** Takes a string and returns a copy with all chars in lower case +*/ +string normalize(string str_data) +{ + std::transform(str_data.begin(), str_data.end(), str_data.begin(), ::tolower); + return str_data; +} + + +/** Takes a string and scans for words, returning + a list of words. +*/ +vector scan(string str_data) +{ + string word; + vector words; + istringstream is(str_data); + + while (is >> word) { + words.push_back(word); + } + + return words; +} + +/** Takes a list of words and returns a copy with all stop + words removed +*/ +vector remove_stop_words(vector words) +{ + vector stop_words = get_stop_words(); + vector filtered_list; + + filtered_list.reserve(words.size()); + for (string w : words) { + if (!binary_search(stop_words.begin(), stop_words.end(), w)) + filtered_list.push_back(w); + } + + return filtered_list; +} + +/** Takes a list of words and returns a dictionary associating + words with frequencies of occurrence +*/ +map frequencies(vector words) +{ + map freq; + + for (string w : words) { + map::iterator it = freq.find(w); + if (it != freq.end()) { + it->second++; + } + else { + freq.insert(pair(w,1)); + } + } + return freq; +} + +/** Takes a dictionary of words and their frequencies + and returns a list of pairs where the entries are + sorted by frequency +*/ +vector sort(map word_freq) +{ + vector out_list; + + out_list.reserve(word_freq.size()); + for (pair p : word_freq) { + out_list.push_back(Freq(p.first, p.second)); + } + sort(out_list.begin(), out_list.end(), sort_by_freq); + return out_list; +} + +// +// The main function +// + +int main(int argc, char* argv[]) +{ + vector word_freqs = sort(frequencies(remove_stop_words(scan(normalize(filter_chars(read_file(argv[1]))))))); + + for (vector::iterator it = word_freqs.begin(); it != word_freqs.begin()+25; it++) + cout << it->word << " - " << it->freq << endl; + + return 0; +} diff --git a/22-declared-intentions/tf-22.py b/22-declared-intentions/tf-22.py old mode 100644 new mode 100755