Monday, July 25, 2005

Finding anagrams with ana.py

# WHAT: generate anagrams from a list of words.
# GPL(C)
import os
import sys
import commands
import string
import re
wf = os.path.expandvars('$DOC/dict/words')
if not os.path.isfile(wf) :
    print "Cannot read wf=",wf
    sys.exit()
f = open(wf,'r')
anagrams = {}
count = 0
while 1:
    word = f.readline()
    if not word:
        break
    word = word.rstrip("\n\r").lower()
    if len(word) <= 2: # skip trivial anagrams.
        continue
    charlist = list(word)
    # charlist.sort(lambda x, y : cmp( string.lower(x), string.lower(y)) )
    charlist.sort()
    sword = string.join( charlist, '' )
    if not anagrams.has_key(sword):
        anagrams[sword] = set()
    anagrams[sword].add(word)
    count += 1

f.close()

key = anagrams.keys()
key.sort()
acount = 0
for sword in key:
    anas = anagrams[sword]
    if len(anas) <= 1:
        continue
    acount = acount + 1
    for word in anas:
        print word,
    print

print 'Found %d anagrams in %d words from %s' % (acount,count,wf)

sys.exit()

0 Comments:

Post a Comment

<< Home