Ruby Spell Checker
Inspired by Peter Norvig’s genius article, while learning Ruby back in August, I wrote this piece of code. Writing it made me realize how powerful (at least for prototyping) and intuitive Ruby is. I hope you enjoy it.
#this is a script that reads a text file, makes a histogram of all the
#words and then tells you the frequency of a random word of your choice
#In other words, it could be used as a spellchecker/suggestor
#http://snippets.dzone.com/posts/show/280
class String
def swap!(a,b)
self[a], self[b] = self[b], self[a]
self
end
def swap(a,b)
newword = self.dup
newword[a], newword[b] = newword[b], newword[a]
newword
end
end
class Novel
def initialize
@number_of_words = 0
@dictionary = Hash.new(0)
end
def add_word_to_dictionary(word)
@number_of_words += 1
@dictionary[word.downcase] += 1
end
def english_word?(word)
@dictionary[word.downcase] != 0
end
def get_word_frequency(word)
Float(@dictionary[word.downcase]) / Float(@number_of_words)
end
def read_novel(novel)
IO.read(novel).scan(/w+/).each {|word| add_word_to_dictionary word}
end
def correct_word(word)
if english_word?(word)
return word
else
perms = self.single_letter_insert(word)
perms += self.swap_distance_one(word)
perms += self.swap_distance_two(word)
perms += self.single_letter_delete(word)
unique_permutations = perms.uniq
probabilities = unique_permutations.collect {|perm| get_word_frequency(perm)}
unique_permutations.find_all {|perm| get_word_frequency(perm) > probabilities.max * 0.2}
end
end
#these are the different permutations on a word (i.e. when misspelled)
def single_letter_insert(word)
perms = Array.new
for i in 0..word.length
perms += ('a'..'z').collect {|letter| word[0...i] + letter + word[i...word.length] }
end
perms
end
def single_letter_delete(word)
(0...word.length).collect {|i| word[0...i]+word[(i+1)...word.length] }
end
def swap_distance_one(word)
(0...(word.length - 1)).collect {|i| word.swap(i,i+1)}
end
def swap_distance_two(word)
self.swap_distance_one(word).collect {|perm1| swap_distance_one(perm1)}.flatten
end
end
thisnov = Novel.new
thisnov.read_novel('MarkTwain_AdventuresOfHuckleberryFinn.txt')
puts thisnov.english_word?("Michel")
puts thisnov.correct_word("te")