Monday, July 25, 2005

Dictor.py program to annotate meanings.

> cat dictor.py # text2html by vim63
 # GPL(C)
# Annontate text with meanings of words in tooltips, or links to lookup the word.
# usage: python $0 infile.txt > outfile.html

import os
import sys
import string
import commands
import re

if len(sys.argv)>1 and os.path.isfile(sys.argv[1]):
    pass
else:
    print "Usage: dictor.py infile > outfile.html"
    sys.exit()

# Read ascii dictionary
dict = os.path.expandvars('$DOC/dict/dict.txt')
meanings = {}
if os.path.isfile(dict):
    f = open(dict, 'r')
    while 1:
        line = f.readline()
        if not line:
            break
        if not line.find('--'):
            continue
        line = line.rstrip("\n\r")
        try:
            word,meaning = line.split('--')
        except ValueError:
            continue
        for worda in word.split(','):
            meanings[worda.lower()] = meaning.strip()
    f.close()

# Read the text file and annotate it from the dictionary or link to web.
splitter = re.compile('([^a-zA-Z]+)')
for infile in sys.argv[1:]:
    if not os.path.isfile(infile):
        print 'not a file', infile
        continue
    else:
        print 'reading', infile
    i = open(infile,'r')
    while 1:
        line = i.readline()
        if not line:
            break
        line = line.rstrip("\n\r")
        for word in splitter.split(line):
            if meanings.has_key(word.lower()):
                print '<a href="%s">%s</a>' % ( meanings[word.lower()], word )
            elif splitter.search(word):
                word = word.replace('&','&amp;')
                word = word.replace('<','&lt;')
                word = word.replace('>','&gt;')
                word = word.replace('~','&ntilde;')
                word = word.expandtabs()
                word = word.replace(' ',' ')
                print '%s' % word,
            else:
                print '<a href="http://www.google.com/search?q=%s">%s</a>' % ( word, word )
        print '<br>'
    i.close()
pass

0 Comments:

Post a Comment

<< Home