From 752bb35c0240b45cca142fb0dd15caf8718f35df Mon Sep 17 00:00:00 2001 From: Crista Lopes Date: Fri, 18 Oct 2013 16:59:09 -0700 Subject: [PATCH] Added plugins style --- 19-plugins/config.ini | 5 ++++ 19-plugins/plugins-src/compile.sh | 2 ++ 19-plugins/plugins-src/frequencies1.py | 18 ++++++++++++++ 19-plugins/plugins-src/frequencies2.py | 10 ++++++++ 19-plugins/plugins-src/words1.py | 31 +++++++++++++++++++++++++ 19-plugins/plugins-src/words2.py | 12 ++++++++++ 19-plugins/plugins/frequencies1.pyc | Bin 0 -> 644 bytes 19-plugins/plugins/frequencies2.pyc | Bin 0 -> 614 bytes 19-plugins/plugins/words1.pyc | Bin 0 -> 1173 bytes 19-plugins/plugins/words2.pyc | Bin 0 -> 687 bytes 19-plugins/tf-19.py | 23 ++++++++++++++++++ 11 files changed, 101 insertions(+) create mode 100644 19-plugins/config.ini create mode 100644 19-plugins/plugins-src/compile.sh create mode 100644 19-plugins/plugins-src/frequencies1.py create mode 100644 19-plugins/plugins-src/frequencies2.py create mode 100644 19-plugins/plugins-src/words1.py create mode 100644 19-plugins/plugins-src/words2.py create mode 100644 19-plugins/plugins/frequencies1.pyc create mode 100644 19-plugins/plugins/frequencies2.pyc create mode 100644 19-plugins/plugins/words1.pyc create mode 100644 19-plugins/plugins/words2.pyc create mode 100644 19-plugins/tf-19.py diff --git a/19-plugins/config.ini b/19-plugins/config.ini new file mode 100644 index 0000000..fa60412 --- /dev/null +++ b/19-plugins/config.ini @@ -0,0 +1,5 @@ +[Plugins] +;; Valid options: plugins/words1.pyc, plugins/words2.pyc +words = plugins/words1.pyc +;; Valid options: plugins/frequencies1.pyc, plugins/frequencies2.pyc +frequencies = plugins/frequencies1.pyc diff --git a/19-plugins/plugins-src/compile.sh b/19-plugins/plugins-src/compile.sh new file mode 100644 index 0000000..37dac1a --- /dev/null +++ b/19-plugins/plugins-src/compile.sh @@ -0,0 +1,2 @@ +python -m compileall . +cp *.pyc ../plugins diff --git a/19-plugins/plugins-src/frequencies1.py b/19-plugins/plugins-src/frequencies1.py new file mode 100644 index 0000000..5171cbb --- /dev/null +++ b/19-plugins/plugins-src/frequencies1.py @@ -0,0 +1,18 @@ +import operator + +def top25(word_list): + """ + Takes a list of words and returns a dictionary associating + words with frequencies of occurrence + """ + if type(word_list) is not list or word_list == []: + return {} + + word_freqs = {} + for w in word_list: + if w in word_freqs: + word_freqs[w] += 1 + else: + word_freqs[w] = 1 + return sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25] + diff --git a/19-plugins/plugins-src/frequencies2.py b/19-plugins/plugins-src/frequencies2.py new file mode 100644 index 0000000..016ce66 --- /dev/null +++ b/19-plugins/plugins-src/frequencies2.py @@ -0,0 +1,10 @@ +import operator, collections + +def top25(word_list): + """ + Takes a list of words and returns a dictionary associating + words with frequencies of occurrence + """ + counts = collections.Counter(w for w in word_list) + return counts.most_common(25) + diff --git a/19-plugins/plugins-src/words1.py b/19-plugins/plugins-src/words1.py new file mode 100644 index 0000000..b90594e --- /dev/null +++ b/19-plugins/plugins-src/words1.py @@ -0,0 +1,31 @@ +import sys, re, string + +def extract_words(path_to_file): + """ + Takes a path to a file and returns the non-stop + words, after properly removing nonalphanumeric chars + and normalizing for lower case + """ + if type(path_to_file) is not str or not path_to_file: + return [] + + try: + with open(path_to_file) as f: + str_data = f.read() + except IOError as e: + print "I/O error({0}) when opening {1}: {2}".format(e.errno, path_to_file, e.strerror) + return [] + + pattern = re.compile('[\W_]+') + word_list = pattern.sub(' ', str_data).lower().split() + + try: + with open('../stop_words.txt') as f: + stop_words = f.read().split(',') + except IOError as e: + print "I/O error({0}) when opening ../stops_words.txt: {1}".format(e.errno, e.strerror) + return [] + + stop_words.extend(list(string.ascii_lowercase)) + return [w for w in word_list if not w in stop_words] + diff --git a/19-plugins/plugins-src/words2.py b/19-plugins/plugins-src/words2.py new file mode 100644 index 0000000..f314f22 --- /dev/null +++ b/19-plugins/plugins-src/words2.py @@ -0,0 +1,12 @@ +import sys, re, string + +def extract_words(path_to_file): + """ + Takes a path to a file and returns the non-stop + words, after properly removing nonalphanumeric chars + and normalizing for lower case + """ + words = re.findall('[a-z]{2,}', open(path_to_file).read().lower()) + stopwords = set(open('../stop_words.txt').read().split(',')) + return [w for w in words if w not in stopwords] + diff --git a/19-plugins/plugins/frequencies1.pyc b/19-plugins/plugins/frequencies1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a204d1f0fed46ca6c4dc59c64c334831e95ebf49 GIT binary patch literal 644 zcmaJ`r&>F2-}8*eZ|%)qcDsL{i&05$L*xA&5f14>)2*a+;_`QDD5Xh>+lbqM%mI`jeg^BGfB2Fw{pFhF#Mm#t9*h0> t2|(>b)zTfct9oi*@2On=laaU`^#Jc3|JQaKl_v{dHFNt-j6`YD)3434iPiuB literal 0 HcmV?d00001 diff --git a/19-plugins/plugins/frequencies2.pyc b/19-plugins/plugins/frequencies2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30e528606d3712b9e4a9391d8529badf6297e259 GIT binary patch literal 614 zcmah`!A=4(5S=c93i05;a4iq9L1Z!Q$=~+Swopp7dY*7Qe?2 z(3vhl4<nN@f z(tOdnQG#4%;M9PQKPQ6#bE*+svjMn%6+rt%z(&BFI3f-!8lMxsDQVp%Puj~wX2zqm zQ@OG(rMO6?(^$HK-%_K{*dlRjnRsuNNzfFtP+q}T1|Q|r>1C-4Wr&Z6tWu?O6zc%o zNoLue&j}fl`Ow-*yc5eBPhu2vcQ-K^h#8G+Iv2?4Rh1?W{4uBRe|@&wS*-mbWqUJS z=;wv&A25!~8(`XWg&AUtJY};?hqgTKQ#`O`f!c+)+@KdA1&G?kN&t#efG9FX-ApIOB zhbC6USx+RxK_Ekqdp#kQ1)8i$DhDDCznI8wDtVlaD2pdlU_&<49isMjeUk@Lrm}JT zpt#3Jy&&OKCNZGnR`G-%KP*&*0zd3+clKAQ{{UB)Xl*vv4bP6{*wjbbo~@}z|H(ST zsdFF|X%!8VlU@id<1ElF8A?m*Fnp{b1L5nc$-nu^aMp$xU2Pz=E|W4aiFe87V$ZJv^`otBJK#7Khp<432b=__kmis;Q5tkRtQ~0HM4v>{y`D(so%d z(t&AgN2eXrR;xw#luE}JT3FYV2}M#v6FpBTBVWhXsnPCSQ5!yLkPmiI3uHC-tlF(+ zHr7kDC#q6-dMc%r%=8qMIx9IYTV^ZtU1McunccGc+qAzz`xVx(N?XWSwN2@^X+Zlr aE$2o5U3rdTo(@uf7zfYHdr(H?JI-$|;RX!= literal 0 HcmV?d00001 diff --git a/19-plugins/plugins/words2.pyc b/19-plugins/plugins/words2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..699a7e5b4988dd217e0f69c66517609c278db6e5 GIT binary patch literal 687 zcmZutv2qhJ5MAkl1A!EDlwu}@3+EV8GYm6OW>T0`hTzfM$+=79(;=;K9PG;cLVk+h z;RE2U97uykT0QN)-M8A}*WtnZryW;xzclW5IO#P^jjhNNP5xm;6ZO}xX`-pt^aF%K zm>-7)vfAHRXO#lnyFc%7rv`?Ju8^;YHLZ!ZimnRU!nr4Qt7(JKBZ;E!Lv%njXiD}* z(8dX86Fz7pwamM!^)Sq3;7?!#gnL}*m- ztnF&+Jw#L|qvJMwO$#M3i-nX2`!O!592~0Um<^MUJ^;r55`Q1yDmqQrJ zCOrizKj+v<*{}EzZ+FAdR=n#s3l1bh<}z>)p~Pg$9V literal 0 HcmV?d00001 diff --git a/19-plugins/tf-19.py b/19-plugins/tf-19.py new file mode 100644 index 0000000..13d8b46 --- /dev/null +++ b/19-plugins/tf-19.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +import sys, ConfigParser, imp + +def load_plugins(): + config = ConfigParser.ConfigParser() + config.read("config.ini") + words_plugin = config.get("Plugins", "words") + frequencies_plugin = config.get("Plugins", "frequencies") + global tfwords, tffreqs + tfwords = imp.load_compiled('tfwords', words_plugin) + tffreqs = imp.load_compiled('tffreqs', frequencies_plugin) + +# +# The main function +# + +load_plugins() +word_freqs = tffreqs.top25(tfwords.extract_words(sys.argv[1])) + +for tf in word_freqs: + print tf[0], ' - ', tf[1] +