Path: coconuts.jaist!wnoc-tyo-news!tokyonet.ad.jp!tdi-extnews!tdi-rcac!wnoc-kyo-news!wnoc-sfc-news!venus.sun.com!cs.utexas.edu!howland.reston.ans.net!vixen.cso.uiuc.edu!newsfeed.internetmci.com!news.dacom.co.kr!usenet.seri.re.kr!news.imnet.ad.jp!ripspost.aist.go.jp!news.tisn.ad.jp!is.s.u-tokyo!yura From: yura@is.s.u-tokyo.ac.jp (YURA Shunsuke) Newsgroups: fj.sources Subject: dserver 2.1-beta patch for compressed dictionary Date: 25 Mar 1996 07:12:13 GMT Organization: Dept.of Information Science, The University of Tokyo, Japan Lines: 690 Distribution: fj Message-ID: <4j5h0e$2am@isnews.is.s.u-tokyo.ac.jp> Reply-To: yura@is.s.u-tokyo.ac.jp NNTP-Posting-Host: zaurak.is.s.u-tokyo.ac.jp Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-2022-JP 東大情報科学の由良といいます。 CD-ROM辞書検索プログラムの Dic ver.0.23 では辞書を圧縮する事ができます が、この圧縮辞書を dserver2.1β でも使えるようにしてみました。 dserver2.1.tar.gz の server2.1ディレクトリに移ってこのパッチを当てて下さい。 SunOS 4.1.4 と FreeBSD 2.1.0Rでは動作しました(辞書は、研究社の新英和・ 和英中辞典)。 Dic で作った圧縮辞書はエンディアン依存なので、dserver を実行させるマシ ンと同じエンディアンのマシンで圧縮辞書を作成して下さい。 なお、Dic ver.0.23 は ftp.tohoku.ac.jp://pub/TOWNS などにあるようです。 --------------------------- CUT HERE ----------------------------- *** server2.1.orig/dict.h Tue Jun 27 01:20:50 1995 --- server2.1/dict.h Fri Mar 22 21:45:31 1996 *************** *** 18,21 **** --- 18,27 ---- FILE *stream; Index *index; + int compressed; + char *buf; + int buffered; + unsigned long *comp_tab, comp_seek; + int frm, nfrm; + long ofs; } Dict; *** server2.1.orig/lookup.c Thu Nov 9 22:37:31 1995 --- server2.1/lookup.c Mon Mar 25 14:26:05 1996 *************** *** 195,198 **** --- 195,310 ---- } + /* extracted from cdio.c in dic023.lzh */ + decode(buf, len, fp) + char *buf; + FILE *fp; + { + int i, n, c, d, bit; + + n = 0; + while ( n < len && (bit = getc(fp)) != EOF ) { + for ( i = 1 ; i < 0x100 && n < len ; i <<= 1 ) { + if ( (bit & i) == 0 ) { + if ( (c = getc(fp)) == EOF || (d = getc(fp)) == EOF ) + break; + d |= (c << 8); + c = ((d & 0x1F) + 3); + d >>= 5; d &= 0x7FF; + while ( c-- > 0 && n < len ) + buf[n++] = buf[d++]; + } else { + if ( (c = getc(fp)) == EOF ) + break; + buf[n++] = c; + } + } + } + + return n; + } + + readframe(dict) + Dict *dict; + { + unsigned long ofs, sect; + sect = dict->frm - 1; + if (dict->comp_seek != (sect & 0xffffff00L)) { + dict->comp_seek = (sect & 0xffffff00L); + fseek(dict->stream, dict->comp_seek*sizeof(long) + 4L, 0); + fread(dict->comp_tab, sizeof(long), 256, dict->stream); + } + fseek(dict->stream, dict->comp_tab[sect & 0xff], 0); + decode(dict->buf, FRAMESIZE, dict->stream); + dict->buffered = 1; + } + + dict_seek(dict,ofs) + Dict *dict; + long ofs; + { + if (dict->compressed) { + dict->ofs += ofs; + if (dict->ofs >= FRAMESIZE) { + dict->buffered = 0; + while (dict->ofs >= FRAMESIZE) { + dict->ofs -= FRAMESIZE; + if (++dict->frm >= dict->nfrm) + return -1; + } + } + return 0; + } + else + return fseek (dict->stream, ofs, 1); + } + + dict_read(dict,ptr,size,count) + Dict *dict; + char *ptr; + { + int s; + if (dict->compressed) { + size *= count; + while (size > 0) { + if (!dict->buffered) + readframe (dict); + s = dict->ofs+size > FRAMESIZE ? FRAMESIZE - dict->ofs : size; + bcopy(dict->buf + dict->ofs, ptr, s); + ptr += s; + size -= s; + dict->ofs += s; + if (dict->ofs == FRAMESIZE) { + dict->ofs = 0; + dict->buffered = 0; + if (++dict->frm >= dict->nfrm) + return 0; + } + } + return count; + } + else + return fread (ptr, size, count, dict->stream); + } + + dict_getc(dict) + Dict *dict; + { + int c; + if (dict->compressed) { + if (!dict->buffered) + readframe(dict); + c = (unsigned char) dict->buf[dict->ofs]; + if (++dict->ofs == FRAMESIZE) { + dict->buffered = 0; + dict->ofs = 0; + if (++dict->frm >= dict->nfrm) + return EOF; + } + return c; + } + else + return getc (dict->stream); + } + init_dic(dict,stream) Dict *dict; *************** *** 202,214 **** static unsigned char b[BS]; static unsigned char null[BS]; ! int e; Index *new; dict->stream = stream; bzero(null,BS); ! seekframe(dict->stream,1); ! if(fread(b,BS,1,dict->stream)!=1) return -1; e = TWOBYTEUINT(b); --- 314,339 ---- static unsigned char b[BS]; static unsigned char null[BS]; ! static char buf[4]; ! int e, i; Index *new; dict->stream = stream; + fseek(stream, (long) 0, 0); + if (fread(buf, 4, 1, stream)!=1) return -1; + if (strncmp(buf, "EDIC", 4) == 0) { + dict->compressed = 1; + fread (&i, 4, 1, stream); + dict->nfrm = i / 4 - 1; + dict->buf = (char*) malloc(FRAMESIZE); + dict->buffered = 0; + dict->comp_tab = (unsigned long *) malloc(sizeof(long)*256); + dict->comp_seek = 0xffffffffL; + } + bzero(null,BS); ! seekframe(dict,1); ! if(dict_read(dict,b,BS,1)!=1) return -1; e = TWOBYTEUINT(b); *************** *** 221,225 **** for(; e>0; e--) { ! if(fread(b,BS,1,dict->stream)!=1) return -1; if(!bcmp(b,null,BS)) break; --- 346,350 ---- for(; e>0; e--) { ! if(dict_read(dict,b,BS,1)!=1) return -1; if(!bcmp(b,null,BS)) break; *************** *** 233,240 **** } ! seekframe(stream,f) ! FILE *stream; { ! return (fseek(stream, (long)(f-1)*FRAMESIZE , 0)); } --- 358,376 ---- } ! seekframe(dict,f) ! Dict *dict; { ! if (dict->compressed) { ! if (f >= dict->nfrm) ! return -1; ! if (!dict->buffered || dict->frm != f) { ! dict->frm = f; ! dict->buffered = 0; ! } ! dict->ofs = 0; ! return 0; ! } ! else ! return (fseek(dict->stream, (long)(f-1)*FRAMESIZE , 0)); } *************** *** 343,348 **** do { ! seekframe(dict->stream,frm++); ! fread(frame,FRAMESIZE,1,dict->stream); if((frame[0]&0x10) && frame[1]!=0) { /* koteityou assyuku type */ --- 479,484 ---- do { ! seekframe(dict,frm++); ! dict_read(dict,frame,FRAMESIZE,1); if((frame[0]&0x10) && frame[1]!=0) { /* koteityou assyuku type */ *************** *** 427,432 **** printf("#Newpage\n"); #endif ! seekframe(dict->stream,frm); ! fread(frame,FRAMESIZE,1,dict->stream); q=frame+4; } --- 563,568 ---- printf("#Newpage\n"); #endif ! seekframe(dict,frm); ! dict_read(dict,frame,FRAMESIZE,1); q=frame+4; } *************** *** 511,524 **** printf("%x,%x\n",frm,ofs); #endif ! if(seekframe(dict->stream,frm)) return ; ! if(fseek(dict->stream,(long)ofs,1)) return ; if (dict_set[env_p->dict_num].code == FULL) { ! hi = getc(dict->stream); ! lo = getc(dict->stream); } else { ! lo = getc(dict->stream); if (lo < 32) { hi = lo; ! lo = getc(dict->stream); } else hi = 0x23; } --- 647,660 ---- printf("%x,%x\n",frm,ofs); #endif ! if(seekframe(dict,frm)) return ; ! if(dict_seek(dict,(long)ofs)) return ; if (dict_set[env_p->dict_num].code == FULL) { ! hi = dict_getc(dict); ! lo = dict_getc(dict); } else { ! lo = dict_getc(dict); if (lo < 32) { hi = lo; ! lo = dict_getc(dict); } else hi = 0x23; } *************** *** 527,537 **** if(hi!=0x1f) puteucz2h(cur_s,hi,lo,env_p->dict_num); if (dict_set[env_p->dict_num].code == FULL) { ! hi = getc(dict->stream); ! lo = getc(dict->stream); } else { ! lo = getc(dict->stream); if (lo < 32) { hi = lo; ! lo = getc(dict->stream); } else hi = 0x23; } --- 663,673 ---- if(hi!=0x1f) puteucz2h(cur_s,hi,lo,env_p->dict_num); if (dict_set[env_p->dict_num].code == FULL) { ! hi = dict_getc(dict); ! lo = dict_getc(dict); } else { ! lo = dict_getc(dict); if (lo < 32) { hi = lo; ! lo = dict_getc(dict); } else hi = 0x23; } *************** *** 571,582 **** #endif ! if( seekframe(dict->stream,frm) ) return ; if(raw) { ! fread(frame,FRAMESIZE,1,dict->stream); write(cur_s,frame,FRAMESIZE); /*count=FRAMESIZE/2; while( count-- ) { ! hi = getc(dict->stream); ! lo = getc(dict->stream); sprintf(buf,"#x%02x%02x",hi,lo); write(cur_s,buf,strlen(buf)); --- 707,718 ---- #endif ! if( seekframe(dict,frm) ) return ; if(raw) { ! dict_read(dict,frame,FRAMESIZE,1); write(cur_s,frame,FRAMESIZE); /*count=FRAMESIZE/2; while( count-- ) { ! hi = dict_getc(dict); ! lo = dict_getc(dict); sprintf(buf,"#x%02x%02x",hi,lo); write(cur_s,buf,strlen(buf)); *************** *** 591,628 **** } */ ! if( fseek(dict->stream,(long)ofs,1)) return ; ! hi = getc(dict->stream); /* Fetch */ ! lo = getc(dict->stream); while(!(hi==0x1f && lo==0x02) && /* if Doc start */ !(hi==0x1f && lo==0x41) && /* if Key start */ !(hi==0x1f && lo==0x45)) { /* if Fig start */ if(hi==0x1f && lo==0x09){ /* if not /* if TAB skip 2byte */ ! hi = getc(dict->stream); ! lo = getc(dict->stream); } ! hi = getc(dict->stream); ! lo = getc(dict->stream); } /* Doc ,Key or Fig start */ /* if Key start */ if(hi==0x1f && lo==0x41) { ! hi = getc(dict->stream); /* skip 2 bytes */ ! lo = getc(dict->stream); } /* if Fig start */ if(hi==0x1f && lo==0x45) { ! hi = getc(dict->stream); /* skip 2 bytes */ ! lo = getc(dict->stream); fig=TRUE; } if (dict_set[env_p->dict_num].code == FULL) { ! hi = getc(dict->stream); ! lo = getc(dict->stream); } else { ! lo = getc(dict->stream); if (lo < 32) { hi = lo; ! lo = getc(dict->stream); } else hi = 0x23; } --- 727,764 ---- } */ ! if( dict_seek(dict,(long)ofs)) return ; ! hi = dict_getc(dict); /* Fetch */ ! lo = dict_getc(dict); while(!(hi==0x1f && lo==0x02) && /* if Doc start */ !(hi==0x1f && lo==0x41) && /* if Key start */ !(hi==0x1f && lo==0x45)) { /* if Fig start */ if(hi==0x1f && lo==0x09){ /* if not /* if TAB skip 2byte */ ! hi = dict_getc(dict); ! lo = dict_getc(dict); } ! hi = dict_getc(dict); ! lo = dict_getc(dict); } /* Doc ,Key or Fig start */ /* if Key start */ if(hi==0x1f && lo==0x41) { ! hi = dict_getc(dict); /* skip 2 bytes */ ! lo = dict_getc(dict); } /* if Fig start */ if(hi==0x1f && lo==0x45) { ! hi = dict_getc(dict); /* skip 2 bytes */ ! lo = dict_getc(dict); fig=TRUE; } if (dict_set[env_p->dict_num].code == FULL) { ! hi = dict_getc(dict); ! lo = dict_getc(dict); } else { ! lo = dict_getc(dict); if (lo < 32) { hi = lo; ! lo = dict_getc(dict); } else hi = 0x23; } *************** *** 635,640 **** if(TAB ) { ! hi = getc(dict->stream); /* skip 2bytes */ ! lo = getc(dict->stream); TAB =FALSE; } --- 771,776 ---- if(TAB ) { ! hi = dict_getc(dict); /* skip 2bytes */ ! lo = dict_getc(dict); TAB =FALSE; } *************** *** 665,669 **** case 0x14: skipcnt = 2; ! while(skipcnt--) hi = getc(dict->stream); break; case 0x0a: /* cr */ --- 801,805 ---- case 0x14: skipcnt = 2; ! while(skipcnt--) hi = dict_getc(dict); break; case 0x0a: /* cr */ *************** *** 675,681 **** break; case 0x31: /* fig start */ ! hi = getc(dict->stream); ! lo = getc(dict->stream); ! fread(rbuf,4,1,dict->stream); height = TWOBCDUINT(rbuf); width = TWOBCDUINT(rbuf+2); --- 811,817 ---- break; case 0x31: /* fig start */ ! hi = dict_getc(dict); ! lo = dict_getc(dict); ! dict_read(dict,rbuf,4,1); height = TWOBCDUINT(rbuf); width = TWOBCDUINT(rbuf+2); *************** *** 683,690 **** write(cur_s,buf,strlen(buf)); do { ! hi = getc(dict->stream); ! lo = getc(dict->stream); } while(!(hi==0x1f && lo==0x51 )); ! fread(rbuf,6,1,dict->stream); frmtmp = FOURBCDUINT(rbuf); ofstmp = TWOBCDUINT(rbuf+4); --- 819,826 ---- write(cur_s,buf,strlen(buf)); do { ! hi = dict_getc(dict); ! lo = dict_getc(dict); } while(!(hi==0x1f && lo==0x51 )); ! dict_read(dict,rbuf,6,1); frmtmp = FOURBCDUINT(rbuf); ofstmp = TWOBCDUINT(rbuf+4); *************** *** 695,699 **** if(fig) { skipcnt = 4; ! while(skipcnt--) hi = getc(dict->stream); } break; --- 831,835 ---- if(fig) { skipcnt = 4; ! while(skipcnt--) hi = dict_getc(dict); } break; *************** *** 705,715 **** case 0x41: /* key start with 0 0 */ /* skipcnt = 2; while(skipcnt--) */ ! hi = getc(dict->stream); ! lo = getc(dict->stream); break; case 0x44: /* fig start */ ! hi = getc(dict->stream); ! lo = getc(dict->stream); ! fread(rbuf,8,1,dict->stream); height = FOURBCDUINT(rbuf); width = FOURBCDUINT(rbuf+4); --- 841,851 ---- case 0x41: /* key start with 0 0 */ /* skipcnt = 2; while(skipcnt--) */ ! hi = dict_getc(dict); ! lo = dict_getc(dict); break; case 0x44: /* fig start */ ! hi = dict_getc(dict); ! lo = dict_getc(dict); ! dict_read(dict,rbuf,8,1); height = FOURBCDUINT(rbuf); width = FOURBCDUINT(rbuf+4); *************** *** 719,727 **** case 0x45: /* fig and music ref start */ /* skipcnt = 2; while(skipcnt--) */ ! hi = getc(dict->stream); ! lo = getc(dict->stream); break; case 0x48: /* music ref start */ ! fread(rbuf,10,1,dict->stream); frmtmp=SIXBCDUINT(rbuf); ofstmp=FOURBCDUINT(rbuf+6); --- 855,863 ---- case 0x45: /* fig and music ref start */ /* skipcnt = 2; while(skipcnt--) */ ! hi = dict_getc(dict); ! lo = dict_getc(dict); break; case 0x48: /* music ref start */ ! dict_read(dict,rbuf,10,1); frmtmp=SIXBCDUINT(rbuf); ofstmp=FOURBCDUINT(rbuf+6); *************** *** 729,733 **** break; case 0x51: /* pointer to fig page:offset(BCD)*/ ! fread(rbuf,6,1,dict->stream); frmtmp=FOURBCDUINT(rbuf); ofstmp=TWOBCDUINT(rbuf+4); --- 865,869 ---- break; case 0x51: /* pointer to fig page:offset(BCD)*/ ! dict_read(dict,rbuf,6,1); frmtmp=FOURBCDUINT(rbuf); ofstmp=TWOBCDUINT(rbuf+4); *************** *** 736,740 **** break; case 0x52: /* picture end with page:offset(BCD)*/ ! fread(rbuf,6,1,dict->stream); frmtmp=FOURBCDUINT(rbuf); ofstmp=TWOBCDUINT(rbuf+4); --- 872,876 ---- break; case 0x52: /* picture end with page:offset(BCD)*/ ! dict_read(dict,rbuf,6,1); frmtmp=FOURBCDUINT(rbuf); ofstmp=TWOBCDUINT(rbuf+4); *************** *** 745,749 **** ref = FALSE; if(fig==FALSE){ ! fread(rbuf,8,1,dict->stream); frmtmp=FOURBCDUINT(rbuf); ofstmp=FOURBCDUINT(rbuf+4); --- 881,885 ---- ref = FALSE; if(fig==FALSE){ ! dict_read(dict,rbuf,8,1); frmtmp=FOURBCDUINT(rbuf); ofstmp=FOURBCDUINT(rbuf+4); *************** *** 751,759 **** write(cur_s,buf,strlen(buf)); } else { ! if( seekframe(dict->stream,frmtmp) ) return ; ! if( fseek(dict->stream,(long)ofstmp,1)) return ; for( i=0; istream); } } --- 887,895 ---- write(cur_s,buf,strlen(buf)); } else { ! if( seekframe(dict,frmtmp) ) return ; ! if( dict_seek(dict,(long)ofstmp)) return ; for( i=0; istream); frmtmp=FOURBCDUINT(rbuf); ofstmp=TWOBCDUINT(rbuf+4); --- 901,905 ---- case 0x63: /* menu end with page:offset(BCD) */ ref = FALSE; ! dict_read(dict,rbuf,6,1); frmtmp=FOURBCDUINT(rbuf); ofstmp=TWOBCDUINT(rbuf+4); *************** *** 773,777 **** case 0x64: /* fig ref end with page:offset(BCD) */ honmon = TRUE; ! fread(rbuf,6,1,dict->stream); frmtmp=FOURBCDUINT(rbuf); ofstmp=TWOBCDUINT(rbuf+4); --- 909,913 ---- case 0x64: /* fig ref end with page:offset(BCD) */ honmon = TRUE; ! dict_read(dict,rbuf,6,1); frmtmp=FOURBCDUINT(rbuf); ofstmp=TWOBCDUINT(rbuf+4); *************** *** 783,791 **** case 0x65: /* ref end */ if (fig==TRUE) { ! if( seekframe(dict->stream,frmtmp) ) return ; ! if( fseek(dict->stream,(long)ofstmp,1)) return ; for( i=0; istream); } } --- 919,927 ---- case 0x65: /* ref end */ if (fig==TRUE) { ! if( seekframe(dict,frmtmp) ) return ; ! if( dict_seek(dict,(long)ofstmp)) return ; for( i=0; idict_num].code == FULL) { ! hi = getc(dict->stream); ! lo = getc(dict->stream); } else { ! lo = getc(dict->stream); if (lo < 32) { hi = lo; ! lo = getc(dict->stream); } else hi = 0x23; } --- 943,953 ---- if (dict_set[env_p->dict_num].code == FULL) { ! hi = dict_getc(dict); ! lo = dict_getc(dict); } else { ! lo = dict_getc(dict); if (lo < 32) { hi = lo; ! lo = dict_getc(dict); } else hi = 0x23; } --------------------------- CUT HERE ----------------------------- *-----------------------------------------------------* | 東京大学大学院 理学系研究科 情報科学専攻 | | 由良 俊介 YURA Shunsuke | *-----------------------------------------------------*