#include #include extern char **argvec; #include "str.h" #include "config.h" #include "random.h" #include "externs.h" #include "strings.h" #define MAXSHORT 127 /* max length for short-string code */ #define GRAIN 4 /* grain size for short strings - must be > sizeof(void *) */ /* strings 0 long use (char *)0 */ /* strings 1 long use one of 255 statically-allocated strings */ /* strings 2-MAXSHORT long use a block that's the minimal number of GRAINs */ /* strings over MAXSHORT long allocate in sizeof(HDR) grains from an arena, and are referred to by handles. */ /* strings matching those read at startup just point to them */ /* the low two bits of the returned pointer are flags: ...xxx00 means a non-duplicate string (<=MAXSHORT), possibly len=1 ...xxx01 means a non-duplicate handle (>MAXSHORT) ...xxx10 means a duplicate string, pointer points to the DUPSTR ...xxx11 is an error */ typedef struct lumps LUMPS; typedef struct hdr HDR; typedef struct strlist STRLIST; typedef struct dupstr DUPSTR; struct lumps { void *free; int size; } ; struct hdr { unsigned int size : 29; unsigned int free : 1; unsigned int first : 1; unsigned int last : 1; HDR *next; HDR *prev; } ; struct strlist { int height; int count; STRLIST *ptrs[1]; /* actually, [.height] */ } ; #define SL_LINK(slp,n) ((slp)->ptrs[n]) #define SL_STR(slp) ((char *)&(slp)->ptrs[(slp)->height]) #define MAXHEIGHT 16 #define RATIO 2 struct dupstr { DUPSTR *left; DUPSTR *right; unsigned long int hash; char *s; } ; #define MINHOLE (((MAXSHORT+1)/sizeof(hdr))+1) /* min # grains in free blk */ static char *_1_strings[255]; static LUMPS shorts[(MAXSHORT/GRAIN)+1]; static void *malthread; static void *freehandles; static HDR arena[2]; static HDR *hand; static STRLIST *slhead[MAXHEIGHT]; static DUPSTR *duproot; static void *dsfree; static void *smalloc_(int nb, int *sp, const char *file, int line) #define smalloc(nb,sp) smalloc_((nb),(sp),__FILE__,__LINE__) { void *x; int rs; x = muck_malloc_roundup(nb+sizeof(void *),&rs,file,line); *(void **)x = malthread; malthread = x; *sp = rs - sizeof(void *); return(((char *)x)+sizeof(void *)); } static void tfreeall(void) { void *x; while (malthread) { x = malthread; malthread = *(void **)x; free(x); } } /* static unsigned int power_2(unsigned int n) { while (n & (n-1)) n += n & ~(n-1); return(n); } */ static void *pop_lump(LUMPS *l) { char *x; if (! l->free) { int nb; char *f; nb = l->size * 500; if (nb < 16000) nb = 16000; x = smalloc(nb,&nb); while (((int)x) & 3) { x ++; nb --; } f = l->free; while (nb >= l->size) { *(void **)x = f; f = x; x += l->size; nb -= l->size; } l->free = f; while (nb >= GRAIN) { *(void **)x = freehandles; freehandles = x; x += GRAIN; nb -= GRAIN; } } x = l->free; l->free = *(void **)x; return(x); } static void push_lump(void *x, LUMPS *l) { *(void **)x = l->free; l->free = x; } static void *gethandle(void) { char *x; if (! freehandles) { int nb; x = smalloc(8000,&nb); while (((int)x) & 3) { x ++; nb --; } while (nb >= GRAIN) { *(void **)x = freehandles; freehandles = x; x += GRAIN; nb -= GRAIN; } } x = freehandles; freehandles = *(void **)x; return(x); } static void puthandle(void *h) { *(void **)h = freehandles; freehandles = h; } static HDR *new_arena_block(int n) { char *hcp; HDR *h; n = (n + 2) * sizeof(HDR); hcp = smalloc(n,&n); while (((int)hcp) & 3) { hcp ++; n --; } h = (HDR *) hcp; n /= sizeof(HDR); h->size = n; h->free = 1; h->first = 1; h->last = 1; h->next = &arena[0]; h->prev = arena[0].prev; arena[0].prev = h; h->prev->next = h; h[1].next = &arena[0]; h[1].prev = arena[1].prev; arena[1].prev = h; h[1].prev[1].next = h; return(h); } static void *arena_alloc(int ng) { HDR *hand0; hand0 = hand; if (! hand) hand = new_arena_block(ng); while (1) { if (hand->size > ng) { int ngleft; HDR *h; ngleft = hand->size - (ng+1); if (ngleft < 2) { h = hand; hand = hand[1].next; hand[1].prev = h[1].prev; h[1].prev[1].next = hand; } else { h = hand; hand += ng + 1; hand->next = h->next; hand->prev = h; h->next = hand; hand->next->prev = hand; hand->size = ngleft; hand->free = 1; hand->first = 0; hand->last = h->last; h->last = 0; hand[1].next = h[1].next; hand[1].prev = h[1].prev; hand[1].next[1].prev = hand; hand[1].prev[1].next = hand; h->size = ng + 1; h->last = 0; } h->free = 0; return(h+1); } hand = (hand+1)->next; if (hand == hand0) hand = new_arena_block(ng); } } static void arena_free(void *x) { HDR *h; HDR *p; HDR *n; h = ((HDR *)x) - 1; p = h->prev; n = h->next; if (!h->first && p->free) { if (!h->last && n->free) { p->size += h->size + n->size; p->next = n->next; n->next->prev = p; p->last = n->last; p[1].next = n[1].next; n[1].next[1].prev = p; if (hand == n) hand = p; } else { p->size += h->size; p->next = h->next; h->next->prev = p; p->last = h->last; } } else { if (!h->last && n->free) { h->size += n->size; h->next = n->next; n->next->prev = h; h->free = 1; h->last = n->last; h[1].next = n[1].next; h[1].prev = n[1].prev; n[1].prev[1].next = h; n[1].next[1].prev = h; if (hand == n) hand = h; } else { for (n=h->next;!n->free;n=n->next) ; h[1].next = n; h[1].prev = n[1].prev; n[1].prev = h; h[1].prev[1].next = h; h->free = 1; if (hand == n) hand = h; } } } static int arena_memused(void *x) { return((((HDR *)x)-1)->size*sizeof(HDR)); } static void dump_arena_h(HDR *h) { fprintf(stderr,"%x: size=%u free=%u first=%u last=%u next=%x prev=%x",(int)h,h->size,h->free,h->first,h->last,(int)h->next,(int)h->prev); if (h->free) { fprintf(stderr," fnext=%x fprev=%x",(int)h[1].next,(int)h[1].prev); } fprintf(stderr,"\n"); } /* This routine is largely for calling from debuggers */ void dump_arena(void); void dump_arena(void) { HDR *h; dump_arena_h(&arena[0]); for (h=arena[0].next;h!=&arena[0];h=h->next) dump_arena_h(h); fflush(stderr); } /* Vaguely CRCish hash function for strings */ /* (No, CRC it ain't - I XOR in eight bits, then do one bit of CRC) */ static unsigned long int compute_hash(const void *sarg) { register unsigned long int h; register const unsigned char *s; h = 0; s = sarg; while (*s) { h ^= *s++; h = (h >> 1) ^ ((h & 1) ? 0xedb88320 : 0); } return(h); } static int dscmp(DUPSTR *d, unsigned long int h, const char *s) { if (d->hash < h) return(-1); if (d->hash > h) return(1); return(strcmp(d->s,s)); } static DUPSTR *mergelist(DUPSTR *l1, DUPSTR *l2) { DUPSTR *list; DUPSTR **lt; lt = &list; while (l1 || l2) { if (!l2 || (l1 && (dscmp(l1,l2->hash,l2->s) < 0))) { *lt = l1; lt = &l1->left; l1 = l1->left; } else { *lt = l2; lt = &l2->left; l2 = l2->left; } } *lt = 0; return(list); } static DUPSTR *sortlist(DUPSTR *list) { DUPSTR *l[2]; DUPSTR *e; int flip; l[0] = 0; l[1] = 0; flip = 0; while (list) { e = list; list = e->left; e->left = l[flip]; l[flip] = e; } if (! l[1]) return(l[0]); return(mergelist(sortlist(l[0]),sortlist(l[1]))); } static DUPSTR *maketree(DUPSTR *list, int len) { DUPSTR *l_t; DUPSTR *pivot; int l_n; int r_n; int i; if (len < 1) return(0); r_n = len >> 1; l_n = len - r_n - 1; if (l_n == 0) { l_t = 0; pivot = list; } else { l_t = list; for (i=l_n-1;i>0;i--,list=list->left) ; pivot = list->left; list->left = 0; } pivot->right = maketree(pivot->left,r_n); pivot->left = maketree(l_t,l_n); return(pivot); } static int readfile(FILE *f, void *bytes, int nb) { if (fread(bytes,1,nb,f) != nb) return(1); if (getc(f) != EOF) return(1); return(0); } static int skipline(char **sp, int *nbp) { char *s; int n; s = *sp; n = *nbp; for (;(*s!='\n')&&(n>0);s++,n--) ; if (n <= 0) return(1); *s++ = '\0'; *sp = s; *nbp = n - 1; return(0); } static void load_strings(const char *fn) { FILE *f; DUPSTR *dsv; char *bytes; int dsvx; int fmtver; int n; int nb; int c; f = fopen(fn,"r"); if (f == 0) { fprintf(stderr,"%s: note: can't open strings file %s\n",argvec[0],fn); return; } if (fscanf(f,"%d",&fmtver) != 1) { fclose(f); fprintf(stderr,"%s: warning: invalid strings file %s (no format version)\n",argvec[0],fn); return; } switch (fmtver) { default: fclose(f); fprintf(stderr,"%s: warning: %s: unknown format version %d\n",argvec[0],fn,fmtver); return; break; case 1: if (fscanf(f,"%d%d",&n,&nb) != 2) { fclose(f); fprintf(stderr,"%s: warning: invalid strings file %s (no counts)\n",argvec[0],fn); return; } while (1) { c = getc(f); if (c == EOF) { fclose(f); fprintf(stderr,"%s: warning: invalid strings file %s (missing \\n after counts)\n",argvec[0],fn); return; } if (c == '\n') break; } if (n < 1) { fclose(f); return; } if (sizeof(DUPSTR) & 3) { fclose(f); fprintf(stderr,"%s: sorry, sizeof(DUPSTR) is not a multiple of 4, can't do duplicate string stuff\n",argvec[0]); return; } dsfree = malloc((n*sizeof(DUPSTR))+nb+3); bytes = dsfree; while (((int)bytes) & 3) bytes ++; dsv = (void *)bytes; bytes += n * sizeof(DUPSTR); if (readfile(f,bytes,nb)) { fclose(f); fprintf(stderr,"%s: warning: invalid strings file %s (byte count wrong)\n",argvec[0],fn); free(dsfree); return; } fclose(f); for (dsvx=0;dsvxs = bytes; if (skipline(&bytes,&nb)) { fprintf(stderr,"%s: warning: invalid strings file %s (file too short)\n",argvec[0],fn); free(dsfree); return; } d->hash = compute_hash(d->s); } if (nb != 0) { fprintf(stderr,"%s: warning: invalid strings file %s (file too long)\n",argvec[0],fn); free(dsfree); return; } for (dsvx=1;dsvxright : d->left; } return(d); } STR store_str(const char *s) { unsigned long int h; unsigned int l; char *x; DUPSTR *d; if (!s || !*s) return(NOSTR); if (!s[1]) return(_1_strings[(255&s[0])-1]); if (duproot) { h = compute_hash(s); d = dupstr_find(h,s); if (d) return((STR)(2+(int)d)); } l = strlen(s); if (l <= MAXSHORT) { x = pop_lump(&shorts[l/GRAIN]); strcpy(x,s); if (3 & (int)x) { fprintf(stderr,"false handle marker 1"); abort(); } return(x); } else { void *h; x = arena_alloc((l/sizeof(HDR))+1); strcpy(x,s); h = gethandle(); *(void **)h = x; if (3 & (int)h) { fprintf(stderr,"false handle marker 2"); abort(); } return((void *)(1+(int)h)); } } char *fetch_str_(STR s, const char *file, int line) { if (s == NOSTR) return(0); switch (3 & (int)s) { case 0: return(dup_string_(s,file,line)); break; case 1: return(dup_string_(*(void **)(void *)(((int)s)-1),file,line)); break; case 2: return(dup_string_(((DUPSTR *)(((int)s)-2))->s,file,line)); break; } fprintf(stderr,"invalid flag bits\n"); abort(); } void free_str(STR s) { if (s == NOSTR) return; switch (3 & (int)s) { case 0: { int l; l = strlen((char *)s); if (l == 1) return; push_lump(s,&shorts[l/GRAIN]); } break; case 1: { void *h; h = (void *)(((int)s)-1); arena_free(*(void **)h); puthandle(h); } break; case 2: break; case 3: fprintf(stderr,"invalid flag bits\n"); abort(); break; } } void fputs_str(STR s, FILE *f) { if (s == NOSTR) { disk_write(s,f); return; } switch (3 & (int)s) { case 0: disk_write(s,f); break; case 1: { void *h; h = (void *)(((int)s)-1); disk_write(*(void **)h,f); } break; case 2: break; case 3: fprintf(stderr,"invalid flag bits\n"); abort(); break; } } STR copy_str(STR s) { if (s == NOSTR) return(s); switch (3 & (int)s) { case 0: return(store_str(s)); break; case 1: { void *h; h = (void *)(((int)s)-1); return(store_str(*(void **)h)); } break; case 2: return(s); break; } fprintf(stderr,"invalid flag bits\n"); abort(); } int str_memusage(STR s) { if (s == NOSTR) return(0); switch (3 & (int)s) { case 0: if (!((char *)s)[1]) return(0); return(shorts[strlen((char *)s)/GRAIN].size); break; case 1: { void *h; h = (void *)(((int)s)-1); return(GRAIN+arena_memused(*(void **)h)); } case 2: return(0); /* ??? Not sure what's most useful. */ break; } fprintf(stderr,"invalid flag bits\n"); abort(); } int strs_match(STR s1, STR s2) { if (s1 == s2) return(1); if ((s1 == NOSTR) || (s2 == NOSTR)) return(0); if ((3 & (int)s1) != (3 & (int)s2)) return(0); switch (3 & (int)s1) { case 0: return(!strcmp(s1,s2)); break; case 1: { void *h1; void *h2; h1 = (void *)(((int)s1)-1); h2 = (void *)(((int)s2)-1); return(!strcmp(*(void **)h1,*(void **)h2)); } break; case 2: return(s1==s2); break; } fprintf(stderr,"invalid flag bits\n"); abort(); } static STRLIST *strlist_find(const char *s, STRLIST ***pvec) { STRLIST *sl; STRLIST *sl2; int h; int c; for (h=MAXHEIGHT;(h>=0)&&!slhead[h];h--) pvec[h] = &slhead[h]; while (h >= 0) { sl = slhead[h]; c = strcmp(SL_STR(sl),s); if (c == 0) return(sl); if (c < 0) break; pvec[h] = &slhead[h]; h --; } if (h < 0) return(0); while (h >= 0) { while (1) { sl2 = sl->ptrs[h]; if (! sl2) break; c = strcmp(SL_STR(sl2),s); if (c == 0) return(sl2); if (c > 0) break; sl = sl2; } pvec[h] = &sl->ptrs[h]; h --; } return(0); } void record_string(const char *s) { int h; STRLIST *sl; STRLIST *i; STRLIST **ptrs[MAXHEIGHT+1]; if (!s || !s[0] || !s[1]) return; i = strlist_find(s,&ptrs[0]); if (i) { i->count ++; return; } h = 1; while ((h < MAXHEIGHT) && !rnd(RATIO)) h ++; sl = malloc(sizeof(STRLIST)+((h-1)*sizeof(STRLIST *))+strlen(s)+1); sl->height = h; sl->count = 1; strcpy(SL_STR(sl),s); for (h--;h>=0;h--) { sl->ptrs[h] = *ptrs[h]; *ptrs[h] = sl; } } void dump_strings(void) { STRLIST *sl; STRLIST *list; STRLIST **slp; int l; int n; int nb; slp = &list; n = 0; for (sl=slhead[0];sl;sl=sl->ptrs[0]) { if (sl->count < 2) continue; l = strlen(SL_STR(sl)); if (l*(sl->count-1) > (int)sizeof(DUPSTR)) { n ++; nb += l + 1; *slp = sl; slp = &sl->ptrs[0]; } } *slp = 0; printf("1\n%d %d\n",n,nb); for (;list;list=list->ptrs[0]) printf("%s\n",SL_STR(list)); }