2 char *wartv = "Wart Version 2.14, 10 Nov 1999";
7 /* Use the real ones in this module only */
29 A small subset of "lex".
31 Authors: Jeff Damens, Frank da Cruz
32 Columbia University Center for Computing Activites.
33 First released November 1984.
34 Copyright (C) 1984, 2004,
35 Trustees of Columbia University in the City of New York.
36 All rights reserved. See the C-Kermit COPYING.TXT file or the
37 copyright text in the ckcmai.c module for disclaimer and permissions.
42 * lines to be copied | %state <state names...>
44 * <state> | <state,state,...> CHAR { actions }
47 * more lines to be copied
50 #include "ckcdeb.h" /* Includes */
53 /* Actually call printf, not our printf-catcher for Kermit */
63 /* Same deal for Macintosh */
82 The following "char" should be changed to "short", "int", or "long" if your
83 wart program will generate more than 127 states. Since wart is used mainly
84 with C-Kermit, which has about 80 states, "char" is adequate. This keeps
85 the program about 3K-4K smaller, which can be critical on 16-bit
90 Also use short or int if your compiler complains inordinately about
91 "integer conversion resulted in a change of sign"...
93 #define TBL_TYPE "short" /* C data type of state table */
95 #define TBL_TYPE "char" /* C data type of state table */
98 #define C_L 014 /* Formfeed */
100 #define SEP 1 /* Token types */
108 #define MAXSTATES 50 /* max number of states */
109 #define MAXWORD 50 /* max # of chars/word */
110 #define SBYTES ((MAXSTATES+6)/8) /* # of bytes for state bitmask */
112 /* Name of wart function in generated program */
118 /* Structure for state information */
121 CHAR states[SBYTES]; /* included states */
122 int anyst; /* true if this good from any state */
123 CHAR inchr; /* input character */
124 int actno; /* associated action */
126 }; /* next transition */
127 typedef struct transx *trans;
129 /* Function prototypes */
131 _PROTOTYP( VOID setwstate, (int, trans) );
132 _PROTOTYP( int teststate, (int, trans) );
133 _PROTOTYP( trans rdinput, (FILE *, FILE *) );
134 _PROTOTYP( VOID initial, (FILE *, FILE *) );
135 _PROTOTYP( int isin, (char *, int) );
136 _PROTOTYP( int isword, (int) );
137 _PROTOTYP( VOID rdword, (FILE *, char *) );
138 _PROTOTYP( VOID rdstates, (FILE *, FILE *) );
139 _PROTOTYP( trans newtrans, (void) );
140 _PROTOTYP( trans rdrules, (FILE *, FILE *) );
141 _PROTOTYP( VOID statelist, (FILE *, trans) );
142 _PROTOTYP( VOID copyact, (FILE *, FILE *, int) );
143 _PROTOTYP( int faction, (trans, int, int) );
144 _PROTOTYP( VOID emptytbl, (void) );
145 _PROTOTYP( VOID addaction, (int, int, int) );
146 _PROTOTYP( VOID writetbl, (FILE *) );
147 _PROTOTYP( VOID warray, (FILE *, char *, int [], int, char *) );
148 _PROTOTYP( VOID prolog, (FILE *) );
149 _PROTOTYP( VOID epilogue, (FILE *) );
150 _PROTOTYP( VOID copyrest, (FILE *, FILE *) );
151 _PROTOTYP( int gettoken, (FILE *) );
152 _PROTOTYP( VOID rdcmnt, (FILE *) );
153 _PROTOTYP( VOID clrhash, (void) );
154 _PROTOTYP( int hash, (char *) );
155 _PROTOTYP( VOID enter, (char *, int) );
156 _PROTOTYP( int lkup, (char *) );
157 _PROTOTYP( static char* copy, (char *s) );
159 /* Variables and tables */
161 /* lt 1992-10-08 Begin
162 * provide definition for deblog variable
163 * ckcdeb.h declares as extern. DECC AXP is strict about ref/def model
164 * Variable is unused herein, to the best of my knowledge.
172 static int lines, nstates, nacts;
174 static char tokval[MAXWORD];
176 static int tbl[MAXSTATES*96];
178 char *tbl_type = TBL_TYPE;
180 char *txt1 = "\n#define BEGIN state =\n\nint state = 0;\n\nint\n";
182 char *fname = FNAME; /* Generated function name goes here */
184 /* rest of program... */
191 /* Data type of state table is inserted here (short or int) */
197 debug(F000,\"PROTO input\",ckitoa(state),c+32);\n\
198 if (c < 0 || c > 95) c = 0;\n";
200 char *txt2b = " if ((actno = tbl[c + state*96]) != -1)\n\
203 /* this program's output goes here, followed by final text... */
205 char *txt3 = "\n }\n }\n}\n\n";
209 * turn on the bit associated with the given state
213 setwstate(state,t) int state; trans t; {
215 idx = state/8; /* byte associated with state */
216 msk = 0x80 >> (state % 8); /* bit mask for state */
217 t->states[idx] |= msk;
221 * see if the state is involved in the transition
225 teststate(state,t) int state; trans t; {
228 msk = 0x80 >> (state % 8);
229 return(t->states[idx] & msk);
234 * read input from here...
239 rdinput(infp,outfp) FILE *infp,*outfp; {
241 lines = 1; /* line counter */
242 nstates = 0; /* no states */
243 nacts = 0; /* no actions yet */
244 fprintf(outfp,"\n%c* WARNING -- This C source program generated by ",'/');
245 fprintf(outfp,"Wart preprocessor. */\n");
246 fprintf(outfp,"%c* Do not edit this file; edit the Wart-format ",'/');
247 fprintf(outfp,"source file instead, */\n");
248 fprintf(outfp,"%c* and then run it through Wart to produce a new ",'/');
249 fprintf(outfp,"C source file. */\n\n");
250 fprintf(outfp,"%c* Wart Version Info: */\n",'/');
251 fprintf(outfp,"char *wartv = \"%s\";\n\n",wartv);
253 initial(infp,outfp); /* read state names, initial defs */
254 prolog(outfp); /* write out our initial code */
255 x = rdrules(infp,outfp); /* read rules */
256 epilogue(outfp); /* write out epilogue code */
262 * initial - read initial definitions and state names. Returns
267 initial(infp,outfp) FILE *infp, *outfp; {
269 char wordbuf[MAXWORD];
270 while ((c = getc(infp)) != EOF) {
272 rdword(infp,wordbuf);
273 if (strcmp(wordbuf,"states") == 0)
274 rdstates(infp,outfp);
275 else if (strcmp(wordbuf,"%") == 0) return;
276 else fprintf(outfp,"%%%s",wordbuf);
279 if (c == '\n') lines++;
284 * boolean function to tell if the given character can be part of
289 isin(s,c) char *s; int c; {
290 for (; *s != '\0'; s++)
291 if (*s == (char) c) return(1);
296 static char special[] = ".%_-$@"; /* these are allowable */
297 return(isalnum(c) || isin(special,c));
301 * read the next word into the given buffer.
305 rdword(fp,buf) FILE *fp; char *buf; {
307 while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = (char) c;
308 *buf++ = '\0'; /* tie off word */
309 ungetc(c,fp); /* put break char back */
313 * read state names, up to a newline.
317 rdstates(fp,ofp) FILE *fp,*ofp; {
319 char wordbuf[MAXWORD];
320 while ((c = getc(fp)) != EOF && c != '\n') {
321 if (isspace(c) || c == C_L) continue; /* skip whitespace */
322 ungetc(c,fp); /* put char back */
323 rdword(fp,wordbuf); /* read the whole word */
324 enter(wordbuf,++nstates); /* put into symbol tbl */
325 fprintf(ofp,"#define %s %d\n",wordbuf,nstates);
331 * allocate a new, empty transition node
338 new = (trans) malloc(sizeof (struct transx));
339 for (i=0; i<SBYTES; i++) new->states[i] = 0;
347 * read all the rules.
352 rdrules(fp,out) FILE *fp,*out; {
355 head = cur = prev = NULL;
356 while ((curtok = gettoken(fp)) != SEP)
363 fatal("duplicate state list");
364 statelist(fp,cur); /* set states */
365 continue; /* prepare to read char */
368 if ((int)strlen(tokval) != 1)
369 fatal("multiple chars in state");
374 cur->actno = ++nacts;
375 cur->inchr = (char) (tokval[0] - 32);
382 copyact(fp,out,nacts);
384 default: fatal("bad input format");
390 * read a list of (comma-separated) states, set them in the
395 statelist(fp,t) FILE *fp; trans t; {
398 while (curtok != RBRACK) {
399 if (curtok != COMMA) fatal("missing comma");
400 if ((curtok = gettoken(fp)) != WORD) fatal("missing state name");
401 if ((sval = lkup(tokval)) == -1) {
402 fprintf(stderr,"state %s undefined\n",tokval);
403 fatal("undefined state");
406 curtok = gettoken(fp);
411 * copy an action from the input to the output file
415 copyact(inp,outp,actno) FILE *inp,*outp; int actno; {
417 fprintf(outp,"case %d:\n",actno);
418 while (c = getc(inp), (isspace(c) || c == C_L))
419 if (c == '\n') lines++;
423 while (bcnt > 0 && (c = getc(inp)) != EOF) {
424 if (c == '{') bcnt++;
425 else if (c == '}') bcnt--;
426 else if (c == '\n') lines++;
429 if (bcnt > 0) fatal("action doesn't end");
431 while (c != '\n' && c != EOF) {
437 fprintf(outp,"\n break;\n");
441 * find the action associated with a given character and state.
442 * returns -1 if one can't be found.
446 faction(hd,state,chr) trans hd; int state,chr; {
448 if (hd->anyst || teststate(state,hd))
449 if (hd->inchr == ('.' - 32) || hd->inchr == (char) chr)
463 for (i=0; i<nstates*96; i++) tbl[i] = -1;
467 * add the specified action to the output for the given state and chr.
471 addaction(act,state,chr) int act,state,chr; {
472 tbl[state*96 + chr] = act;
476 writetbl(fp) FILE *fp; {
477 warray(fp,"tbl",tbl,96*(nstates+1),TBL_TYPE);
482 * write an array to the output file, given its name and size.
486 warray(fp,nam,cont,siz,typ) FILE *fp; char *nam; int cont[],siz; char *typ; {
488 fprintf(fp,"%s %s[] = {\n",typ,nam);
489 for (i = 0; i < siz - 1; ) {
490 fprintf(fp," %2d,",cont[i]);
491 if ((++i % 16) == 0) putc('\n',fp);
493 fprintf(fp,"%2d\n};\n",cont[siz-1]);
499 If you get complaints about "main: return type is not blah",
500 define MAINTYPE on the CC command line, e.g. "CFLAGS=-DMAINTYPE=int".
520 The default case should be int, not VOID, but it's been this way for
521 years (no doubt for a reason) and who knows how many builds would break
525 #endif /* __GNUC__ */
528 #endif /* CK_SCOV5 */
529 #endif /* MAINTYPE */
531 main(argc,argv) int argc; char **argv; {
534 FILE *infile,*outfile;
537 if ((infile = fopen(argv[1],"r")) == NULL) {
538 fprintf(stderr,"Can't open %s\n",argv[1]);
539 fatal("unreadable input file");
541 } else infile = stdin;
544 if ((outfile = fopen(argv[2],"w")) == NULL) {
545 fprintf(stderr,"Can't write to %s\n",argv[2]);
546 fatal("bad output file");
548 } else outfile = stdout;
550 clrhash(); /* empty hash table */
551 head = rdinput(infile,outfile); /* read input file */
552 emptytbl(); /* empty our tables */
553 for (state = 0; state <= nstates; state++)
554 for (c = 1; c < 96; c++) /* find actions, */
555 addaction(faction(head,state,c),state,c); /* add to tbl */
557 copyrest(infile,outfile);
558 printf("%d states, %d actions\n",nstates,nacts);
564 * fatal error handler
569 fatal(msg) char *msg; {
570 fprintf(stderr,"error in line %d: %s\n",lines,msg);
575 prolog(outfp) FILE *outfp; {
577 while ((c = *txt1++) != '\0') putc(c,outfp);
578 while ((c = *fname++) != '\0') putc(c,outfp);
579 while ((c = *txt2++) != '\0') putc(c,outfp);
580 while ((c = *tbl_type++) != '\0') putc(c,outfp);
581 while ((c = *txt2a++) != '\0') putc(c,outfp);
582 while ((c = *txt2b++) != '\0') putc(c,outfp);
586 epilogue(outfp) FILE *outfp; {
588 while ((c = *txt3++) != '\0') putc(c,outfp);
592 copyrest(in,out) FILE *in,*out; {
594 while ((c = getc(in)) != EOF) putc(c,out);
598 * gettoken - returns token type of next token, sets tokval
599 * to the string value of the token if appropriate.
604 gettoken(fp) FILE *fp; {
606 while (1) { /* loop if reading comments... */
609 if (c == '\n') lines++;
610 } while ((isspace(c) || c == C_L)); /* skip whitespace */
615 if ((c = getc(fp)) == '%') return(SEP);
617 tokval[1] = (char) c;
627 if ((c = getc(fp)) == '*') {
628 rdcmnt(fp); /* skip over the comment */
630 } else { /* and keep looping */
631 ungetc(c,fp); /* put this back into input */
632 c = '/'; /* put character back, fall thru */
640 } else fatal("Invalid character in input");
646 * skip over a comment
651 rdcmnt(fp) FILE *fp; {
653 prcnt = star = 0; /* no star seen yet */
654 while (!((c = getc(fp)) == '/' && star)) {
655 if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment");
658 if (c == '\n') lines++;
663 * symbol table management for wart
666 * clrhash - empty hash table.
667 * enter - enter a name into the symbol table
668 * lkup - find a name's value in the symbol table.
672 #define HASHSIZE 101 /* # of entries in hash table */
675 char *name; /* symbol name */
677 struct sym *hnxt; /* next on collision chain */
678 } *htab[HASHSIZE]; /* the hash table */
681 * empty the hash table before using it...
687 for (i=0; i<HASHSIZE; i++) htab[i] = NULL;
691 * compute the value of the hash for a symbol
695 hash(name) char *name; {
697 for (sum = 0; *name != '\0'; name++) sum += (sum + *name);
698 sum %= HASHSIZE; /* take sum mod hashsize */
699 if (sum < 0) sum += HASHSIZE; /* disallow negative hash value */
704 * make a private copy of a string...
710 new = (char *) malloc((int)strlen(s) + 1);
716 * enter state name into the hash table
720 enter(name,svalue) char *name; int svalue; {
723 if (lkup(name) != -1) {
724 fprintf(stderr,"state \"%s\" appears twice...\n", name);
728 cur = (struct sym *)malloc(sizeof (struct sym));
729 cur->name = copy(name);
736 * find name in the symbol table, return its value. Returns -1
741 lkup(name) char *name; {
743 for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt)
744 if (strcmp(cur->name,name) == 0) return(cur->val);