/* #define LZW_DEBUG /* debugging traces in modified lzw code */ /* #define PACK_DEBUG /* debugging traces in packing code */ #include #include #include #include "defs.h" #include "str_z.h" #include "config.h" #include "interface.h" #define TBLMAX 65536 #define UNZSIZE 65536 static char cchars[] /* core chars */ = " !\"#$%&'()*+,-./0123456789:;<=>?" "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_" "`abcdefghijklmnopqrstuvwxyz{|}~\t"; #define NCC 96 /* # of chars in cchars */ #ifdef DISKSTRINGS_Z static unsigned char ccinv[1<?@ABC" "DEFGHIJKLMNOPQRST" "UVWXYZ_abcdefghij" "klmnopqrstuvwxyz~"; #define NFC 85 /* # of chars in fchars */ static unsigned char fcinv[1< TBL__N #undef TBL__N #define TBL__N F__N #endif #if defined(DISKSTRINGS_Z) && (C__N > TBL__N) #undef TBL__N #define TBL__N C__N #endif static unsigned short int tbl[TBLMAX][TBL__N]; #define PADCODE 0 /* this has to be all bits zero */ #define CODESHIFT 1 static unsigned int tblsize; static unsigned int codebits; static unsigned int tblgrow; static unsigned int lasttblsize; static unsigned int lastcodebits; static unsigned int lasttblgrow; static unsigned int tblfree; static unsigned int nextcode; #ifdef DISKSTRINGS_Z static unsigned long int obuf; static unsigned int obfill; #endif static char accum[UNZSIZE]; static int accptr; static int tblptr; static unsigned long int ipart; static unsigned int ipfill; static unsigned long int ibuf; static unsigned long int ifact; static unsigned long int ifactlim; #ifdef DISKSTRINGS_Z static FILE *f; #endif #define BMAX (sizeof(unsigned long int)*CHAR_BIT) #ifdef LZW_DEBUG static FILE *zdf; static int zdf_in; static FILE *uzdf; #endif #ifdef PACK_DEBUG static FILE *pdf; static FILE *updf; static int updf_in; #endif static void make_ccinv(const char *clist, unsigned char *inv) { int i; for (i=0;i<(1<>=1) putc((val&bit)?'1':'0',fp); } static void pdbits2(unsigned long int val, unsigned int total, unsigned int right, FILE *fp) { if (right < total) pdbits(val>>right,total-right,fp); putc('.',fp); if (right > 0) pdbits(val,right,fp); } #endif static void inctblsize(void) { lasttblsize = tblsize; lastcodebits = codebits; lasttblgrow = tblgrow; tblsize ++; if (tblsize > tblgrow) { codebits ++; tblgrow <<= 1; } } static void uninctblsize(void) { tblsize = lasttblsize; codebits = lastcodebits; tblgrow = lasttblgrow; } #ifdef DISKSTRINGS_Z static void putfc(int c) { if ((c < 0) || (c >= NFC)) { fprintf(stderr,"bad putfc %d [NFC=%d]\n",c,NFC); abort(); } putc(fchars[c],f); #ifdef PACK_DEBUG putc(fchars[c],pdf); #endif } /* note that if NFC has more bits than the first code output, this may output one or more all-zero-bits codes. This is why PADCODE is 0. */ static void dumpbits(int nb, int skip0s) { unsigned long int tst; #ifdef PACK_DEBUG fprintf(pdf,"[dumping "); pdbits(obuf,nb,pdf); putc(':',pdf); #endif if (skip0s && (obuf == 0)) { #ifdef PACK_DEBUG fprintf(pdf,"skipping]"); #endif return; } if (nb == BMAX) { putfc(obuf%NFC); obuf /= NFC; /* compute (ULONG_MAX+1)/NFC without risking overflow */ tst = (~0UL / NFC) + ((~0UL % NFC) == NFC-1); } else { tst = 1UL << nb; obuf &= tst - 1; } if (skip0s) tst = obuf; while (tst) { putfc(obuf%NFC); obuf /= NFC; tst /= NFC; } #ifdef PACK_DEBUG putc(']',pdf); #endif } static void emitcode(int c) { #ifdef PACK_DEBUG fprintf(pdf,"%5d=",c); pdbits(c,codebits,pdf); #endif if (obfill+codebits <= BMAX) { obuf |= ((unsigned long int)c) << obfill; obfill += codebits; #ifdef PACK_DEBUG fprintf(pdf," obuf now "); pdbits2(obuf,BMAX,obfill,pdf); putc('\n',pdf); #endif return; } #ifdef PACK_DEBUG putc(' ',pdf); #endif if (obfill < BMAX) { obuf |= ((unsigned long int)c) << obfill; dumpbits(BMAX,0); obuf = c >> (BMAX - obfill); obfill = codebits - (BMAX - obfill); } else { dumpbits(BMAX,0); obuf = c; obfill = codebits; } #ifdef PACK_DEBUG fprintf(pdf," obuf now "); pdbits2(obuf,BMAX,obfill,pdf); fprintf(pdf,"\n"); #endif } static void flushbits(void) { #ifdef PACK_DEBUG fprintf(pdf,"flushbits, obuf "); pdbits2(obuf,BMAX,obfill,pdf); putc(' ',pdf); #endif dumpbits(obfill,1); obfill = 0; #ifdef PACK_DEBUG putc('\n',pdf); #endif } #endif static void gotchar(int c) { if ((c < 0) || (c >= NCC)) { fprintf(stderr,"invalid gotchar(%d) [NCC=%d]\n",c,NCC); abort(); } accum[accptr++] = cchars[c]; #ifdef LZW_DEBUG putc(cchars[c],uzdf); #endif } static void gotchars(int c) { if (c == PADCODE) { fprintf(stderr,"gotchars(PADCODE)?\n"); abort(); } if (c >= NCC+CODESHIFT) { gotchars(tbl[c][U_PARENT]); } gotchar(tbl[c][U_BYCHAR]); } static void pruneleaves_unz(void) { int i; int j; char nonleaf[TBLMAX]; bzero(&nonleaf[0],TBLMAX); for (i=NCC+CODESHIFT;i= 0) { if (j < i-1) { fprintf(uzdf," %d-%d",j,i-1); } else { fprintf(uzdf," %d",j); } } j = -1; } else { if (j < 0) j = i; } } if (j >= 0) { if (j < TBLMAX-1) { fprintf(uzdf," %d-%d",j,TBLMAX); } else { fprintf(uzdf," %d",j); } } fprintf(uzdf,"]"); #endif } static void gotcode(int c) { int bychar; #ifdef PACK_DEBUG putc('\t',updf); pdbits(c,codebits,updf); fprintf(updf," = %d\n",c); #endif if (c == PADCODE) return; #ifdef LZW_DEBUG fprintf(uzdf,"%d",c); #endif if (tblptr == -1) { tblptr = c; nextcode = tblsize; inctblsize(); #ifdef LZW_DEBUG fprintf(uzdf," <"); #endif gotchars(c); #ifdef LZW_DEBUG fprintf(uzdf,">\n"); fflush(uzdf); #endif return; } if (tblptr == -2) { tblptr = c; if (tblsize < TBLMAX) { nextcode = tblsize; inctblsize(); } else { nextcode = tblfree; tblfree = tbl[tblfree][F_LINK]; } #ifdef LZW_DEBUG fprintf(uzdf," <"); #endif gotchars(c); #ifdef LZW_DEBUG fprintf(uzdf,">\n"); fflush(uzdf); #endif return; } if (c >= tblsize) { fprintf(stderr,"invalid code %d [tblsize=%d] in file\n",c,tblsize); panic("invalid code in db file"); } if (c == nextcode) { /* aXaXa special case */ bychar = tbl[tblptr][U_FIRST]; } else { bychar = tbl[c][U_FIRST]; } if (nextcode != PADCODE) { tbl[nextcode][U_PARENT] = tblptr; tbl[nextcode][U_BYCHAR] = bychar; tbl[nextcode][U_FIRST] = tbl[tblptr][U_FIRST]; #ifdef LZW_DEBUG fprintf(uzdf," %d/%d=%d",tblptr,bychar,nextcode); #endif } if ((tblsize == TBLMAX) && (tblfree == PADCODE)) pruneleaves_unz(); tblptr = c; if (tblsize < TBLMAX) { nextcode = tblsize; inctblsize(); } else { nextcode = tblfree; tblfree = tbl[tblfree][F_LINK]; } #ifdef LZW_DEBUG fprintf(uzdf,"<"); #endif gotchars(c); #ifdef LZW_DEBUG fprintf(uzdf,">\n"); fflush(uzdf); #endif } static void gotcodes(void) { #ifdef PACK_DEBUG pdbits(ibuf,BMAX,updf); if (ipfill > 0) { putc('.',updf); pdbits(ipart,ipfill,updf); } putc('\n',updf); #endif if (ipfill > 0) { int b; b = codebits; ipart |= ibuf << ipfill; ibuf >>= b - ipfill; gotcode(ipart&((1<= codebits) { int b; unsigned long int new; b = codebits; new = ipart >> b; gotcode(ipart&((1<= 0) { if (j < i-1) { fprintf(zdf," %d-%d",j,i-1); } else { fprintf(zdf," %d",j); } } j = -1; } } if (j >= 0) { if (j < TBLMAX-1) { fprintf(zdf," %d-%d",j,TBLMAX); } else { fprintf(zdf," %d",j); } } fprintf(zdf,"]"); #endif } #endif void unz_init(void) { int i; make_ccinv(fchars,fcinv); for (i=0;i -> "); updf_in = 0; #endif gotcodes(); } } char *unz_rv(void) { if ((ifact == 1) && (accptr == 0)) { accum[0] = '\0'; return(&accum[0]); } #ifdef PACK_DEBUG if (updf_in) fprintf(updf,"> "); fprintf(updf,"flushing -> "); updf_in = 0; #endif gotcodes(); if (ipart) { ibuf = 0; gotcodes(); if (ipart) { fprintf(stderr,"nonzero bits left in unz_rv\n"); abort(); } } uninctblsize(); ipfill = 0; accum[accptr] = '\0'; accptr = 0; tblptr = -2; return(&accum[0]); } #ifdef DISKSTRINGS_Z void z_init(FILE *fp) { int i; f = fp; make_ccinv(cchars,ccinv); for (i=0;i',zdf); fprintf(zdf,"[%d",tblptr); #endif if (tblsize < TBLMAX) { newcode = tblsize; inctblsize(); } else { newcode = tblfree; tblfree = tbl[newcode][F_LINK]; } if (newcode != PADCODE) { tbl[newcode][C_LINK] = tbl[tblptr][C_KIDS]; tbl[newcode][C_CHAR] = c; tbl[newcode][C_KIDS] = 0; tbl[tblptr][C_KIDS] = newcode; #ifdef LZW_DEBUG fprintf(zdf,",%d/%d=%d",tblptr,c,newcode); #endif } if ((tblsize == TBLMAX) && (tblfree == PADCODE)) pruneleaves_z(); #ifdef LZW_DEBUG fprintf(zdf,"]\n"); fflush(zdf); zdf_in = 0; #endif tblptr = c + CODESHIFT; } } void z_push(void) { #ifdef LZW_DEBUG if (zdf_in) putc('>',zdf); fprintf(zdf,"[%d]\n",tblptr); fflush(zdf); zdf_in = 0; #endif emitcode(tblptr); tblptr = -1; flushbits(); #ifdef PACK_DEBUG fflush(pdf); #endif } #endif