Monday, July 25, 2005

Dictor.py program to annotate meanings.

> cat dictor.py # text2html by vim63
 # GPL(C)
# Annontate text with meanings of words in tooltips, or links to lookup the word.
# usage: python $0 infile.txt > outfile.html

import os
import sys
import string
import commands
import re

if len(sys.argv)>1 and os.path.isfile(sys.argv[1]):
    pass
else:
    print "Usage: dictor.py infile > outfile.html"
    sys.exit()

# Read ascii dictionary
dict = os.path.expandvars('$DOC/dict/dict.txt')
meanings = {}
if os.path.isfile(dict):
    f = open(dict, 'r')
    while 1:
        line = f.readline()
        if not line:
            break
        if not line.find('--'):
            continue
        line = line.rstrip("\n\r")
        try:
            word,meaning = line.split('--')
        except ValueError:
            continue
        for worda in word.split(','):
            meanings[worda.lower()] = meaning.strip()
    f.close()

# Read the text file and annotate it from the dictionary or link to web.
splitter = re.compile('([^a-zA-Z]+)')
for infile in sys.argv[1:]:
    if not os.path.isfile(infile):
        print 'not a file', infile
        continue
    else:
        print 'reading', infile
    i = open(infile,'r')
    while 1:
        line = i.readline()
        if not line:
            break
        line = line.rstrip("\n\r")
        for word in splitter.split(line):
            if meanings.has_key(word.lower()):
                print '<a href="%s">%s</a>' % ( meanings[word.lower()], word )
            elif splitter.search(word):
                word = word.replace('&','&amp;')
                word = word.replace('<','&lt;')
                word = word.replace('>','&gt;')
                word = word.replace('~','&ntilde;')
                word = word.expandtabs()
                word = word.replace(' ',' ')
                print '%s' % word,
            else:
                print '<a href="http://www.google.com/search?q=%s">%s</a>' % ( word, word )
        print '<br>'
    i.close()
pass

Output of ana.py is a list of anagrams

Only anagrams of words longer than 17 characters are shown to avoid blogbloat.
basiparachromatin marsipobranchiata
anatomicopathological pathologicoanatomical
glossolabiopharyngeal labioglossopharyngeal
glossolabiolaryngeal labioglossolaryngeal
anatomicopathologic pathologicoanatomic
laryngopharyngeal pharyngolaryngeal
mechanicochemical chemicomechanical
phrenicopericardiac pericardiacophrenic
siliceocalcareous calcareosiliceous
clinicopathological pathologicoclinical
duodenopancreatectomy pancreatoduodenectomy
myxochondrosarcoma chondromyxosarcoma
magnetoelectrical electromagnetical
anatomicophysiologic physiologicoanatomic
acetmethylanilide methylacetanilide
octakishexahedron hexakisoctahedron
pericardiopleural pleuropericardial
tracheolaryngotomy laryngotracheotomy
tetrakishexahedron hexakistetrahedron
galvanothermometer thermogalvanometer
esophagogastrostomy gastroesophagostomy
hysterolaparotomy laparohysterotomy
laryngopharyngitis pharyngolaryngitis
meningoencephalocele encephalomeningocele
psychoneurological neuropsychological
chronophotographic photochronographic
physiopsychological psychophysiological
microphotographic photomicrographic
pneumohydropericardium hydropneumopericardium
polygamodioecious dioeciopolygamous
meningoencephalitis encephalomeningitis
encephalomyelitis myeloencephalitis
cephalomeningitis meningocephalitis
chromophotolithograph photochromolithograph
lithochromography chromolithography
photochromography chromophotography
photochronography chronophotography
misconstitutional constitutionalism
epididymovasostomy vasoepididymostomy
hydropneumothorax pneumohydrothorax
representationism misrepresentation
cerebromeningitis meningocerebritis
incontrovertibility introconvertibility
spectromicroscope microspectroscope
cholecystoduodenostomy duodenocholecystostomy
chorioidoretinitis retinochorioiditis
thermoelectrometer electrothermometer
myomohysterectomy hysteromyomectomy

Finding anagrams with ana.py

# WHAT: generate anagrams from a list of words.
# GPL(C)
import os
import sys
import commands
import string
import re
wf = os.path.expandvars('$DOC/dict/words')
if not os.path.isfile(wf) :
    print "Cannot read wf=",wf
    sys.exit()
f = open(wf,'r')
anagrams = {}
count = 0
while 1:
    word = f.readline()
    if not word:
        break
    word = word.rstrip("\n\r").lower()
    if len(word) <= 2: # skip trivial anagrams.
        continue
    charlist = list(word)
    # charlist.sort(lambda x, y : cmp( string.lower(x), string.lower(y)) )
    charlist.sort()
    sword = string.join( charlist, '' )
    if not anagrams.has_key(sword):
        anagrams[sword] = set()
    anagrams[sword].add(word)
    count += 1

f.close()

key = anagrams.keys()
key.sort()
acount = 0
for sword in key:
    anas = anagrams[sword]
    if len(anas) <= 1:
        continue
    acount = acount + 1
    for word in anas:
        print word,
    print

print 'Found %d anagrams in %d words from %s' % (acount,count,wf)

sys.exit()

Saturday, July 23, 2005

Turbo C Source code for Instant Insanity Game

/* 
  What: Instant Insanity Game source in Turbo C. GPL(C)
   =======================
   Col1        Col4*4
    3   3   3   3     Row1
   405 405 405 405    Row2
    1   1   1   1     Row3
    2   2   2   2     Row4
   =======================
    c:\mosh\games>tcc -If:\tcc30\include -Lf:\tcc30\lib instanti.cpp
*/

#include "cubeinit.cpp"

int cube[4][6];  /* four cubes with six faces */

enum { W,R,G,B } virtual_colors;
static int screen_colors[5] = {WHITE, RED,GREEN,BLUE,YELLOW};

void setpuzzle(int p){
    int i,k;
    static int puzzle[1][4][6] = {
        {{W,W,W,G,R,B},{G,W,G,R,B,R},{W,W,B,R,G,B},{B,W,G,G,B,R}},
    };
    for(i=0;i<6;i++)
        for(k=0;k<4;k++)
            cube[k][i]=puzzle[p%(sizeof(puzzle)/sizeof(cube))][k][i];
}

int sameface(int cube[4][6]){ /* return 0, if all sides have different colors */
    int i,j,k,dup=0;
    for(i=0;i<4;i++)
      for(j=i+1;j<4;j++)
        for(k=0;k<4;k++)
            dup += (cube[i][k]==cube[j][k]);
    return dup;
}

void putcharc(int row, int kol, int c){
    int t;
    textcolor(screen_colors[c]);
    mvaddch(row,kol,'0'+c);
}

void drawcubes(int cc){
    int i;
    clrscr();
    for(i=0;i<4;i++){
        putcharc(4+i*4   ,1,cube[i][3]);
        putcharc(4+i*4   ,2,cube[i][0]);
        putcharc(4+i*4-1 ,3,cube[i][4]);
        putcharc(4+i*4+1 ,3,cube[i][5]);
        putcharc(4+i*4   ,3,cube[i][1]);
        putcharc(4+i*4   ,4,cube[i][2]);
        if(i==cc)
            putcharc(4+i*4,5,4);
    }
}

void upcube(int cc){
    int t=cube[cc][3];
    cube[cc][3]=cube[cc][0]; cube[cc][0]=cube[cc][1];
    cube[cc][1]=cube[cc][2]; cube[cc][2]=t;
}
void rocube(int cc){
    int t=cube[cc][3];
    cube[cc][3]=cube[cc][4]; cube[cc][4]=cube[cc][1];
    cube[cc][1]=cube[cc][5]; cube[cc][5]=t;
}
void main(){
    chtype ch=' ';
    int cc=0; /* current cube to operate on */
    initscr(); cbreak(); noecho(); refresh();
    setpuzzle(0);
    drawcubes(cc);

    do{
        textcolor(FGCOL); mvaddch(24,0,':');
        clrtoeol(); mvaddch(24,1,ch); ch = mvgetch(24,2); clrtoeol();
        amessage("ok.");
        switch(ch){
        case 'L': if(--cc<0)cc=3; break;
        case 'R': if(++cc>3)cc=0; break;
        case 'l': upcube(cc); break;
        case 'r': upcube(cc); upcube(cc); upcube(cc); break;
        case 'u': rocube(cc); break;
        case 'd': rocube(cc);  rocube(cc);  rocube(cc); break;
        case '1': cc=0; break;
        case '2': cc=1; break;
        case '3': cc=2; break;
        case '4': cc=3; break;
        }
        if( sameface(cube) == 0 ){
            printf("Done\n");
        }
        drawcubes(cc);
    }while(ch!='q');
    clear(); refresh();
    endwin();
}

Set of Sets in STL

 // WHAT: print set of set in C++ STL with generate_allsubets example.
 // file setofsets.cpp
 // Testing VC6.

 #include <set>
 #include <iostream>
 #include <algorithm>
 using namespace std;

 #pragma warning(disable:4786) // warning: debug identifier trunc to 255.

 typedef set< set<int> > tsos;
 typedef set<int> ts;

 void generate_allsubets( tsos& sos, ts& choices ){
     if( choices.size() < 1 ) return; // end of recursion.
     if( sos.size() < 1 )
         sos.insert(*new set<int>); // insert emptyset first time.
     int c1 = *(choices.begin());
     // make a copy of sos, in each subset insert c1.
     tsos sos_c1;
     for( tsos::iterator p = sos.begin(); p != sos.end(); ++p ){
         ts s_c1 = *p; // make a copy of the subset, then insert c1.
         s_c1.insert(c1);
         sos_c1.insert(s_c1);
     }
     // we have two sets of subset now: sos_c1 with c1, original sos without c1.
     // merge sos_c1 into sos to double the size of sos.

     #if 0 // USE set_union
     tsos both;
     set_union( sos.begin(), sos.end(), sos_c1.begin(), sos_c1.end(), 
                         insert_iterator<tsos>(both,both.begin()) );
     sos = both;
     #else
     for( tsos::iterator pc1 = sos_c1.begin(); pc1 != sos_c1.end(); ++pc1 )
         sos.insert( *pc1 );
     #endif

     // Recursively insert remaining choices.
     if( choices.erase(c1) > 0 ) // should always be able to erase c1.
         generate_allsubets( sos, choices );
 }

 void print_allsubsets( tsos& sos ){
     int i=0;
     for( tsos::iterator p = sos.begin(); p != sos.end(); ++p ){
         cout << "subset " << ++i << " = { ";
         for( ts::iterator q = p->begin(); q != p->end(); ++q )
              cout << *q << ", ";
         cout << "}\n";
     }
 }

 int main( int argc, char *argv[] ){
     set< set<int> > sos;
     set< int > choices;
     for(int i=0;i<3;i++)
         choices.insert(i);
     generate_allsubets( sos, choices );
     cout << "The "<< sos.size() << " subsets of {0.."<<i-1<<"} are\n";
     print_allsubsets( sos );
     return 0;
 }
 

64Bit C Programmer's Tips, Tricks and Traps

 P64 Gotcha List
 ----------------

 1. sizeof long array can overflow on lp64.

   uchar n = sizeof( 50*sizeof(long)); // 200 bytes in ilp32, 400 bytes in lp64.

 2. Use u U L l suffix on large const.

   long l = 1 <<32; // 0 on ilp32,lp64
   long l = 1L<<32; // 2^32 on lp64, 0 on ilp32.

   Solaris: see /usr/include/sys/inttypes.h

   INT64_C(5)  => 5ll for ILP32, and 5l for LP64.

   0x80000000L  ulong in lp32,  long in lp64.

 3. Bit fields are converted to int and sign-extended (or uint if too large) in expr.
   struct { uint b:19, r:13 }x;
   uint y = x.b<<13 // x.b first converted to int and sign extended!
   uint y = (uint)x.b << 13; // ok.

 4. Integer Arithmetic on ptr, ptr is trucated to int32.

   char *p="x";
   (int)p+1=='\0' // p is trunc to int32.
   (uintptr_t)p+1=='\0' // ok.

 5. Unprototyped func returning ptr is trucated to int32.

   // int login(...); -- implicit C decl.
   char *p = getenv("HOME");  // p is trucated to int32, crash.

 6. Saving ptr in int, losses upper 4 bytes of ptr.

   void *q;
   int p = (int)q; // truncated to int32

 7. Unions with long.

   union {
     long x;
     int  y; // on lp64, y gets lsb of x.
   }

 8. Print format specifier

   char *p="x";
   printf("p=0x%08x",p); // losses msb.
   printf("p=%p",   p); // ok.

   long x=LONG_MAX;

   printf("x=%d ",x); // truncated, wrong.
   printf("x=%ld",x); // ok.

 9. Casting *int into *long.

   int  *i;
   long *p;
   p+1; // p+=4 on ilp32
   p+1; // p+=8 on  lp64
   p = (long*) i; // p misaligned pointing to *i and *(i+1).
   i = ( int*) p; // i pointing to lsb of p.

 10. Unsigned bit fields (default sign and alignment).

   struct { b:12, int:0, d:5 } x;  // b is unsigned in lp64, signed
 in ilp32.
   // :0 affects alignment in ilp32, no effect in lp64.

 11. LP64 Perf Problems

   Larger exe, need more TLB, cache, mem (wrt same src compiled as ILP32).
   Long div slow in lp64, eg. 1L/9.
   Int Array indexes are sign-ext on every use, need more instruction in LP64, eg. int i; a[i];

 12. Expression sizing:

  Example.


   int x,y;
   long z = x + y;  // x+y done in 32bit, then assignment will resize result for z.

  Example.

   ulong y = (1 <<32); // overflows in lp32 and lp64.
   ulong y = (1L<<32); // overflows in lp32, ok 2^32 in lp64.

 13. Promotions:

   result = a op b;

   // 1. type of tmp is of max of a and b.
   // 2. tmp = a op b
   // 3. promote tmp to type of result, ptmp.
   // 4. result = ptmp;

  Example.

   long l=-1; uint i=1;
   if( l > i ) printf("ilp32"); // -1 is promoted to uint32.
   else     printf("lp64");  // both promoted to signed 64.

   if( l > (long)i ) // fix
     ...;

  Example.

   main(){  // k is -1 in LP32, 4294967295 in LP64
     int i = -2;
     unsigned int j = 1;
     long k = i+j;
     printf("int %d+ uint %ld= long k is %ld\n",i,j,k);
   }

 15. Prototype:

   // #include <stdlib.h> missing.
   int *i = malloc(sizeof(*i));
   // Ok in lp32, Crash on lp64 because
   // malloc will return 32bit int value because of missing prototype.

 }

 ////////////////////////////////////////////////////



 Examples

  > cc +DA2.0W +w1 +M2 enquire.c    # +w1 for warnings, +M2 for 64bits

  long double=128 bits

   # Sort LP64 warning by problem:


   > make 2>&1 | grep LP64 | xsort 'warning:\s+\S+:' | align warn > lp64.log
   > vim lp64.log
   :set isk+=46  " Make . part of tag, so file.c is a keyword for tags"

   > emacs lp64.log

   > M-x compilation-minor-mode
   > M-x next-error
   ; if cant find files, set
   ; (setq compilation-search-path '("dir1" "dir2")) C-xC-e

 /714: Function "malloc" called with no prototype or definition in scope
 /724: Initialization converts default int return type to pointer
 /722:.*cast converts 32 bit constant to pointer
 /725:.*cast converts 32 bit integer to pointer
 /727:.*cast truncates pointer into 32 bit integer
 /729:.*converts int* to long*
 /732:.*different types treated as unsigned
 /740:.*casting to a smaller size may truncate
 /530:.*casting from loose to strict alignment
 /720:.*Assignment may overflow

 // Example

 > catt -n x.c

 1  void main(){
 2   int *x, *q, i;
 3   long l=-1;
 4   x = malloc(sizeof(int)*10);
 5   (int*)i;
 6   (int)  &i;
 7   (long*)&i;
 8   i = l;
 9  }

 hp64> cc  +DA2.0W +M2 +w1 -Aa x.c

 /*
 line 4: warning 714: Function "malloc" called with no prototype or definition in scope.
 line 4: warning 724: LP64 migration: Assignment converts default int return type to pointer "x".
 line 5: warning 725: LP64 migration: Cast converts 32 bit integer to pointer.
 line 6: warning 727: LP64 migration: Cast truncates pointer into 32 bit integer.
 line 7: warning 530: LP64 migration: Casting from loose to strict alignment: Resulting pointer may be misaligned.
 line 7: warning 729: LP64 migration: Cast converts int* to long*.
 line 8: warning 720: LP64 migration: Assignment may overflow integer "i".
 */

 /////////////////////////////////////////////////

 A. Sizes
     Ansi sizeof() is always unsigned, try:
     if( 0-sizeof(int) > 0 ) printf("See: 0-sizeof(int) is unsigned!\n");

   ILP32: win32 hp11 solaris linux

   ANSI C Integer Requirements, see C book.  moshtag=ansi_int

     sizeof(int) >= sizeof(short)  >= 2.
     sizeof(long) >=sizeof(int)
     sizeof(long) >= 4.

   LP64:  hp11_64 solaris_64 (assume default for 64bit).

     New: sizeof(long)==sizeof(void*)==8, rest unchanged.

   P64:  win64

     New: sizeof(void*)==8, rest unchanged.

 //////////////////////////////////////////////////////////

 Linux64                       moshtag=linux64

   char = 8 bits, signed
   short=16 int=32 long=64 float=32 double=64 bits
   long double=128 bits
   Type size_t is unsigned long
   ALIGNMENTS
     char=1 short=2 int=4 long=8 float=4 double=8 long double=16
     char*=8 int*=8 func*=8
   short: BA
   int:  DCBA
   long:  HGFEDCBA
   Maximum long = 9223372036854775807 (= 2**63-1)
   Minimum long = -9223372036854775808
   Maximum unsigned short = 65535
   Maximum unsigned int = 4294967295
   Maximum unsigned long = 18446744073709551615
   PROMOTIONS
     unsigned short promotes to signed int
     long+unsigned gives signed long

 HP64 ALIGNMENTS

   char=1 short=2 int=4 long=8 float=4 double=8 long double=16
   char*=8 int*=8 func*=8