/* This is an implementation of a linear search through the .hat file, * building a list of all identifiers found, with application counts. */ #include #include #include #include #include #include #include "art.h" #define DEBUG 0 #if DEBUG #define HIDE(x) x #else #define HIDE(x) #endif #define MAX_STRING 1024 char* rmext (char* word, char* ext) { if (ext==(char*)0) return word; else { char *e = ext; char *i = strdup(word); char *c = i; while (*c) c++; while (*e) e++; while ((*--c==*--e) && e!=ext) ; if ((e==ext) && (*c==*e)) *c='\0'; return i; } } char* basename (char* path, char* ext) { char *c = path; while (*c) c++; while (*c!='/' && c!=path) c--; if (c==path) return rmext(path,ext); else { c++; return rmext(strdup(c),ext); } } char* dirname (char* path) { char *start, *c; start = strdup(path); c = start; while (*c) c++; while (*c!='/' && c!=start) c--; if (c==start) { return "."; } else { *c='\0'; return start; } } /* ftell() is hugely expensive, so we keep a record of the current file * position at all times. All routines must be VERY CAREFUL to update * this global correctly. */ FileOffset position; FILE *HatFile; char* progname, *dir; /* Open a file for reading, given: * the base name of the file * the file extension */ FILE* openFile (char* base, char* ext) { char filename[MAX_STRING]; FILE* file; struct stat buf; strcpy(filename,base); strcat(filename,ext); if (file = fopen(filename,"r")) { return file; } else { fprintf(stderr,"%s: cannot open %s/%s\n",progname,dir,filename); exit(1); } } /* m_readString() reads a null-terminated string from the current position * in the file. */ char* m_readString (void) { char c, buf[MAX_STRING]; int i; i=0; while (c=fgetc(HatFile)) { buf[i++] = c; } buf[i] = '\0'; HIDE(fprintf(stderr,"readString -> %s\n",buf);) position += (i+1)*sizeof(char); return strdup(buf); } /* m_read() is just like fread() except it advances the global counter * recording the file position. */ int m_read(void* buf, size_t siz, int num, FILE* file) { int err; err = fread(buf,siz,num,file); position += num*siz; return err; } /* m_readFO() reads a single FileOffset from the file and ensures it is * in host-endian order. */ FileOffset m_readFO (void) { FileOffset fo; m_read(&fo,sizeof(FileOffset),1,HatFile); HIDE(fprintf(stderr,"readFO -> 0x%x\n",ntohl(fo));) return ntohl(fo); } /* m_peek() takes a sneaky look at the next byte of the file, to * determine whether we want to go ahead and read it now. */ char m_peek (void) { char c; c = (char)fgetc(HatFile); ungetc(c,HatFile); return c; } /* We need to distinguish between three different kinds of identifier: * variables defined at the top level * variables defined in a local scope * data constructors */ typedef enum { topid, localid, constructor } idkind; /* There are only four things we are interested in for each identifier: * - its name * - its kind * - how many times it was applied and gave a result * - how many times it was applied but did not give a result */ typedef struct { char* name; idkind kind; int uses; /* number of evaluated applications */ int pending; /* number of unevaluated applications */ } item; /* There are two finitemap structures that associate file pointers with items. * The first maps a NmType pointer to the unique item stored there. * The second maps a Trace pointer to an item - in this map, each item * might be pointed to by several trace pointers, e.g. different usage * sites in the source lead to different TNm nodes, each pointing to * the same NmType; e.g.2. a TAp node can also map to an item, for * instance in a partial application ((f a) b). * * When searching, if we find a TNm, we check its fun pointer against * the first map, and add the address of the TNm to the second map. * * If we find a TAp, we check its fun pointer against the second map. * (If that fails, we ignore it and continue...?) * If this application is undersaturated (how do we tell?) then * it should also be added to the second map. * (We can tell whether an application is undersaturated because its * SAT points back to the application itself.) */ GTree *map1, *map2, *globals, *locals, *constrs; int fileoffset_compare (FileOffset i, FileOffset j) /* for ordering the tree */ { if (iname = id; it->kind = k; it->uses = 0; it->pending = 0; g_tree_insert(map1,(gpointer)node,(gpointer)it); } item * map2_insert (FileOffset usage, FileOffset defn) { item *it=(item*)0; it = g_tree_lookup(map1,(gpointer)defn); if (it) { if (strcmp(it->name,">=")==0) fprintf(stderr,"map2: %s at 0x%x (%d)\n",it->name,usage,it->uses); g_tree_insert(map2,(gpointer)usage,(gpointer)defn); } return it; } int item_sort (FileOffset node, item *it, void* dummy) { item *already; switch (it->kind) { case topid: already = g_tree_lookup(globals,(gpointer)it->name); if (already) { it->uses += already->uses; it->pending += already->pending; } g_tree_insert(globals,(gpointer)it->name,(gpointer)it); break; case localid: already = g_tree_lookup(locals,(gpointer)it->name); if (already) { it->uses += already->uses; it->pending += already->pending; } g_tree_insert(locals, (gpointer)it->name,(gpointer)it); break; case constructor: already = g_tree_lookup(constrs,(gpointer)it->name); if (already) { it->uses += already->uses; it->pending += already->pending; } g_tree_insert(constrs,(gpointer)it->name,(gpointer)it); break; default: break; } return FALSE; } int item_print (char *name, item *it, void* dummy) { fprintf(stdout,"%6d: %s",it->uses,name); if (it->pending) fprintf(stdout," [+%d]\n",it->pending); else fprintf(stdout,"\n"); return FALSE; } /* m_oneNode() skips past one (possibly two) nodes in the file. * As a side-effect, if it finds a NmType identifier, it adds it to * the global structure 'map1'. If it finds a TNm, it adds an entry in * map2 from that TNm to the relevant NmType in map1. If it finds a TAp, * it instead looks up the fun ptr in map2, then looks up that NmType * in map1, and finally increments the usage counter. * * The case in which two nodes are skipped is where the first is a TAp. * We must determine whether it is an undersaturated application by * checking if it is followed by a SATC which points back here. If it * is undersaturated, then the whole TAp is added to map2 and we do not * increment the usage. -- NOT YET IMPLEMENTED */ void m_oneNode (void) { char c; FileOffset node = position; /*fprintf(stdout,"\n0x%x: ",position); fflush(stdout);*/ m_read(&c,sizeof(char),1,HatFile); switch ((int)c>>5) { /* upper 3 bits identify the NodeType */ case Trace: switch ((int)(c&0x1f)) {/* lower 5 bits identify the TraceType */ case TAp: { char size, next; int i; FileOffset fun, defn; item *it; m_read(&size,sizeof(char),1,HatFile); m_readFO(); /* skip parent */ fun = m_readFO(); /* fun ptr */ defn = (FileOffset)g_tree_lookup(map2,(gpointer)fun); for (i=0; i<1+size; i++) m_readFO(); /* args and srcref */ if (defn) { it = g_tree_lookup(map1,(gpointer)defn); next = m_peek(); switch (next) { case ((Trace<<5) | TSatC): if (it) it->uses += 1; break; case ((Trace<<5) | TSatA): case ((Trace<<5) | TSatB): if (it) it->pending += 1; break; default: break; } } } break; case TNm: { FileOffset fo; char next; item *it; m_readFO(); /* skip parent */ fo = m_readFO(); /* pointer to value */ it = map2_insert(node,fo); m_readFO(); /* skip srcref */ if (it && (it->kind==constructor)) it->uses += 1; else { next = m_peek(); /* look at next node */ switch (next) { case ((Trace<<5) | TSatC): m_read(&c,sizeof(char),1,HatFile);/* skip tag */ fo = m_readFO(); /* get Result */ if (it && (fo/=node)) it->uses += 1; break; case ((Trace<<5) | TSatA): case ((Trace<<5) | TSatB): if (it) it->pending += 1; break; default: break; } } } break; case TInd: { m_readFO(); m_readFO(); } break; case THidden: { FileOffset fo, defn; item *it; fo = m_readFO(); defn = (FileOffset)g_tree_lookup(map2,(gpointer)fo); if (defn) map2_insert(node,defn); // if (defn) { // it = (item*)g_tree_lookup(map1,(gpointer)defn); // if (it) it->uses += 1; // } } break; case TSatA: case TSatB: case TSatC: case TSatAL: case TSatBL: case TSatCL: { m_readFO(); } break; default: break; } break; case ModuleInfo: m_readString(); /* skip module name */ m_readString(); /* skip srcfile name */ break; case NmType: switch ((int)(c&0x1f)) {/* lower 5 bits identify the NmType */ case NTInt: { int x; m_read(&x,sizeof(int),1,HatFile); } break; case NTChar: { char x; m_read(&x,sizeof(char),1,HatFile); } break; case NTFloat: { float x; m_read(&x,sizeof(float),1,HatFile); } break; case NTDouble: { double x; m_read(&x,sizeof(double),1,HatFile); } break; case NTInteger: { char size; int n, tmp; m_read(&size,sizeof(char),1,HatFile); for (n=0; n