Files
exercises-in-programming-style/05-pipeline/tf-05.clj
2014-03-14 10:19:21 -07:00

47 lines
1.2 KiB
Clojure
Executable File

":";exec java -cp "$HOME/.m2/repository/org/clojure/clojure/1.5.1/clojure-1.5.1.jar" clojure.main $0 $*
; Sort of a hack; Clojure isn't really intended as a scripting language. :-/
(require '[clojure.string :refer [split]]
'[clojure.java.io :refer [reader]]
'[clojure.pprint :refer [pprint]])
(defn stopwords
"Reads a set of comma-separated stopwords from the given filename."
[file]
(-> file
slurp
(split #"\s+,\s+")
set))
(defn words
"Splits a string into a sequence of words."
[string]
(clojure.string/split string #"[^a-zA-Z]+"))
(defn normalize
"Normalizes split words to terms."
[word]
(.toLowerCase word))
(defn too-short?
"Is a word too short for consideration?"
[word]
(<= (.length word) 2))
; Lazily split the file into lines, explode lines into words, normalize into
; terms, reject unsuitable candidates, compute frequencies, and take the top
; 25.
(with-open [f (reader "../pride-and-prejudice.txt")]
(->> f
line-seq
(mapcat words)
(map normalize)
(remove too-short?)
(remove (stopwords "../stop_words.txt"))
frequencies
(sort-by second)
reverse
(take 25)
pprint))