diff --git a/04-code-golf/tf-04-pn.py b/04-code-golf/tf-04-pn.py old mode 100644 new mode 100755 index 87c6b3e..3071f64 --- a/04-code-golf/tf-04-pn.py +++ b/04-code-golf/tf-04-pn.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python # My golf score is slightly lower! # Best wishes, Peter Norvig diff --git a/04-code-golf/tf-04.py b/04-code-golf/tf-04.py index 668fedd..4dcce5e 100755 --- a/04-code-golf/tf-04.py +++ b/04-code-golf/tf-04.py @@ -1,8 +1,6 @@ #!/usr/bin/env python -import re, string, sys +import heapq, re, sys -stops = set(open("../stop_words.txt").read().split(",") + list(string.ascii_lowercase)) -words = [x.lower() for x in re.split("[^a-zA-Z]+", open(sys.argv[1]).read()) if len(x) > 0 and x.lower() not in stops] -unique_words = list(set(words)) -unique_words.sort(lambda x, y: cmp(words.count(y), words.count(x))) -print "\n".join(["%s - %s" % (x, words.count(x)) for x in unique_words[:25]]) +words = re.findall("[a-z]{2,}", open(sys.argv[1]).read().lower()) +for w in heapq.nlargest(25, set(words) - set(open("../stop_words.txt").read().split(",")), words.count): + print w, "-", words.count(w)