/* multidigit state bug fixed */

/***********************************************************************
  
		    Finite state machine compiler
 
  This is not a full rule to state table compiler, but an attempt to
  make things nicer.  It takes table(s) in the following form and
  makes PC-KIMMO input for them.  It assumes that the first state
  defined is the initial state of the machine.
  
  machine harmony
  rejecting state 1
  a:b foobar
  others 1
  
  rejecting state foobar
  <xxx>:y 1
  others reject

  
 
***********************************************************************/

#include <stdio.h>
#include "fst.h"

#define max(x,y) ((x)>(y)?(x):(y))
#define LOGDIR "/afs/athena/user/k/k/kkkken/edu/6.863/lab1/fst/log/"
#define HELPFILE "/afs/athena/user/k/k/kkkken/edu/6.863/lab1/fst/fst.help"

char *lexical, *surface, *statename, *strconst;
FILE *output;
int lexdebug = 0;
char *yyfname = NULL;

char *any = "@";
char *boundary = "#";
char *null = "0";

char *letters[MAXALPHABET];
int numletters = 0;
Subset *subsets = NULL;
Machine *machines = NULL;


main(c, v)
     int c;
     char **v;
{
  int included = 0;

  log_me();
  output = stdout;
  if (c < 2) usage();
  while (++v, --c) {
    if (v[0][0] == '-') {
      switch (v[0][1]) {
      case 'o':
	if (!--c) usage();
	fclose (output);
	if (!(output = fopen(*++v, "w+"))) {
	  perror (*v);
	  exit(1);
	}
	break;
      case 'l':
	lexdebug++;
	break;
      default:
	usage();
      }
    } else {
      include (*v);
      included = 1;
    }
  }
  if (!included) {
    yyfname = "Standard input";
    doit();
  }
  alldone();
}


log_me()
{
  char filename[150];
  char *user = (char *) getenv("USER");
  FILE *f;
  
  if (user)
    sprintf (filename, "%s%s", LOGDIR, user);
  else
    sprintf (filename, "%s%d", LOGDIR, getuid());
  f = fopen(filename, "w+");
  if (f) fclose(f);
}

  
usage()
{
  FILE *f;
  char linebuf[1024];

  if (f = fopen(HELPFILE, "r")) {
    while (fgets(linebuf, sizeof(linebuf), f))
      fputs (linebuf, stderr);
    fclose (f);
  } else 
    fprintf (stderr, "usage: fst [-l] [-o outputfile] inputfile ...\n");
  exit(1);
}

char *stralloc(str)
     char *str;
{
  int n = strlen(str) + 1;
  char *nstr = (char *) malloc(n);
  bcopy (str, nstr, n);
  return nstr;
}
  
include (fname)
     char *fname;
{
  FILE *f, *oldyyin;
  char *oldyysbuf;
  char *oldcurfname;
  int oldyysbufsiz;
  char *oldyyfname;
  int oldyylineno; 
  
  if (!(f = fopen(fname, "r"))) {
    perror (f);
    return;
  }
  oldyyin = yyin;
  oldyyfname = yyfname;
  oldyylineno = yylineno;
  oldyysbufsiz = yysptr-yysbuf;
  if (oldyysbufsiz) {
    oldyysbuf = (char *) malloc(yysptr-yysbuf);
    bcopy (yysbuf, oldyysbuf, oldyysbufsiz);
  }

  yyin = f;
  yysptr = yysbuf;
  yyfname = stralloc(fname);
  yylineno = 1;
  doit();

  fclose(f);
  free(yyfname);
  yylineno = oldyylineno;
  yyfname = oldyyfname;
  yyin = oldyyin;
  yysptr = oldyysbuf + oldyysbufsiz;
  if (oldyysbufsiz) {
    bcopy (oldyysbuf, yysbuf, oldyysbufsiz);
    free (oldyysbuf);
  }
}
    
int numdigits (int n) {
/* assumes n > 0 */
     int d;
     for (d = 0; n ; d++, n/=10);
     return d;
}

int gettoken ()
{
  int code = yylex();
  if (lexdebug) 
    switch (code) {
    case MACHINE: printf ("[MACHINE]\n"); break;
    case REJECTING: printf ("[REJECTING]\n"); break;
    case STATE: printf ("[STATE]\n"); break;
    case INCLUDE: printf ("[INCLUDE]\n"); break;
    case OTHERS: printf ("[OTHERS]\n"); break;
    case KREJECT: printf ("[REJECT]\n"); break;
    case SUBSET: printf ("[SUBSET]\n"); break;
    case PAIR: printf ("pair: ( %s . %s )\n", lexical, surface); break;
    case STATENAME: printf ("state: %s\n", statename); break;
    case STRCONST: printf ("string constant: %s\n", strconst); break;
    default: printf ("[yylex returns %d]\n", code);
    }
  return code;
}

parse_error(msg)
     char *msg;
{
  fflush(stdout);
  fprintf(stderr, "\nParse error on line %d of %s:\n   %s\n",
	  yylineno, yyfname, msg);
  exit(1);
}

semantic_error (machine, state, col, msg)
     Machine *machine;
     State *state;
     int col;
     char *msg;
{
  fflush(stdout);
  fprintf (stderr, "\nSemantic error");
  if (yyfname) fprintf (stderr, " in %s, line %d", yyfname, yylineno);
  if (machine) fprintf (stderr, " in machine %s", machine->name);
  if (state) fprintf (stderr, " in state %s", state->name);
  if (machine && (col >= 0)) 
    fprintf (stderr, " for pair %s:%s",
	    machine->pairs[col].lexical,
	    machine->pairs[col].surface);
  fprintf (stderr, ":\n   %s\n", msg);
  exit (1);
}
    

int getstatebyname (m, name)
     Machine *m;
     char *name;
{
  State *s;
  
  if (name == REJECTSTATE) return 0;
  for (s = m->states; s; s = s->next)
    if (!strcmp(s->name, name))
      return s->number;
  return -1;
}

Subset *new_subset()
{
  Subset *s = (Subset *) malloc(sizeof(Subset));

  s->next = NULL;
  s->numletters = 0;
  return s;
}


Machine *new_machine()
{
  Machine *m = (Machine *) malloc(sizeof(Machine));
  m->used = 0;
  m->states = NULL;
  return m;
}

State *new_state()
{
  State *s = (State *) malloc(sizeof(State));
  bzero (s->newstate, sizeof(s->newstate));
  s->others = NULL;
  return s;
}

char *getlettercreate (name)
     char *name;
{
  int n;
  Subset *s;

  if (!strcmp(name, any)) return any;
  if (!strcmp(name, boundary)) return boundary;
  if (!strcmp(name, null)) return null;
  for (n = 0; n < numletters; n++)
    if (!strcmp(name, letters[n]))
      return letters[n];
  for (s = subsets; s; s = s->next)
    if (!strcmp(name, s->name))
      return s->name;
  if (numletters == MAXALPHABET)
    parse_error ("Too many letters ... increase MAXALPHABET and recompile me");
  return (letters[numletters++] = stralloc(name));
}

int getpaircreate (machine, lexical, surface)
     Machine *machine;
     char *lexical;
     char *surface;
{
  int col;
  
  for (col = 0; col < machine->used; col++)
    if (!strcmp(lexical, machine->pairs[col].lexical) &&
	!strcmp(surface, machine->pairs[col].surface))
      return col;
  if (machine->used == MAXCOLS)
    parse_error ("Too many pairs used in machine --- recompile me with MAXCOLS increased");
  col = machine->used++;
  machine->pairs[col].lexical = getlettercreate (lexical);
  machine->pairs[col].surface = getlettercreate (surface);
  return col;
}


write_preamble()
{
  int n, m;
  Subset *s;
  
  printf ("ALPHABET");
  for (n = 0; n < numletters; n++)
    printf (" %s", letters[n]);
  printf ("\nNULL %s\n\ANY %s\n\BOUNDARY %s\n", null, any, boundary);
  for (s = subsets; s; s = s->next) {
    printf ("SUBSET %s", s->name);
    for (n = 0; n < s->numletters; n++)
      printf (" %s", s->letters[n]);
    printf ("\n");
  }
  printf ("\nRULE \"Bogus rule for KIMMO brain lossage\" 1 %d\n",
	  numletters + 1);
  for (m = 0; m < 2; m++) {
    for (n = 0; n < numletters; n++)
      printf (" %s", letters[n]);
    printf (" @\n");
  }
  printf ("1: ");
  for (n = 0; n <= numletters; n++)
    printf (" 1", letters[n]);
  printf ("\n");
}

void write_machine (m)
     Machine *m;
{
  int numstates;
  State *state;
  int colwid[MAXCOLS];
  int col, newstate, otherstate, sl, ll;
  int nd;
  char *nsname;
  
  getpaircreate(m, any, any);		/* Add this to every machine */
  for (numstates = 0, state = m->states;
       state;
       state = state->next) 
    state->number = ++numstates;
  nd = numdigits(numstates);

  printf ("\nRULE \"%s\"   %d %d\n", m->name, numstates, m->used);
  printf ("    ");
  for (col = 0; col < m->used; col++) {
    ll = strlen(m->pairs[col].lexical);
    sl = strlen(m->pairs[col].surface);
    ll = max(ll,sl);
    colwid[col] = max(ll,nd)+1;
    printf ("%*s", colwid[col], m->pairs[col].lexical);
  }
  printf ("\n    ");
  for (col = 0; col < m->used; col++) 
    printf ("%*s", colwid[col], m->pairs[col].surface);
  printf ("\n");
  for (state = m->states; state; state = state->next) {
    otherstate = getstatebyname (m, state->others);
    printf ("%3d%c", state->number, state->rejecting ? '.' : ':');
    for (col = 0; col < m->used; col++) {
      nsname = state->newstate[col];
      newstate = nsname ? getstatebyname (m, nsname) : otherstate;
      if (newstate < 0)
	semantic_error (m, state, col, "Undefined target state");
      printf ("%*d", colwid[col], newstate);
    }
    printf ("\n");
  }
}

void free_machine (m)
     Machine *m;
{
  State *s;
  int col;
  
  free (m->name);
  for (s = m->states; s; s = s->next) {
    free (s->name);
    
/*
 * BUG BUG BUG... MEMORY LEAK...
 *
 * we're not freeing target state names.  That's because it's not as
 * easy as it looks (entries can be NULL, (char*)-1, or a malloc'ed
 * string, and there can be many references to a shared malloc'ed
 * string.)
 */

  }
  free (m);
}


static Machine *machine = NULL;	/* What machine are we building? */
static Machine *lastmachine = NULL;
static State *state = NULL;		/* What state in that machine? */
static State *laststate = NULL;

doit()
{
  int code;
  int rejecting = 0;
  char *target;
  int pair;
  Subset *s;
  
  while (code = gettoken()) {		/* until we reach end-of-file... */

  retry:
    
    switch (code) {

    case SUBSET:
      if (gettoken() != STATENAME)
	parse_error ("Subset name missing");
      s = new_subset();
      s->name = stralloc(statename);
      s->next = subsets;
      subsets = s;
      while ((code = gettoken()) == STATENAME) {
	if (s->numletters == MAXALPHABET)
	  parse_error ("Subset too large ... increase MAXALPHABET and recompile me");
	s->letters[s->numletters++] = getlettercreate(statename);
      }
      goto retry;
      
    case INCLUDE:
      if (gettoken() != STRCONST)
	parse_error ("Expecting string constant (filename) after `include'");
      include (strconst);
      break;
      
    case MACHINE:
      machine = new_machine();
      if (lastmachine)
	lastmachine->next = machine;
      else
	machines = machine;
      if (gettoken() != STRCONST)
	parse_error ("Expecting string constant after `machine'");
      machine->name = stralloc(strconst);
      state = NULL;
      laststate = NULL;
      lastmachine = machine;
      break;

    case REJECTING:
      rejecting = 1;
      if (gettoken() != STATE)
	parse_error ("Must see `state' after `rejecting'");
					/* Fall through */
    case STATE:
      if (!machine)
	semantic_error (NULL, NULL, -1, "Must be building a machine to add a state");
      if (gettoken() != STATENAME)
	parse_error ("Must see a state name after `state'");
      state = new_state();
      if (laststate) 
	laststate->next = state;
      else 
	machine->states = state;
      state->name = stralloc(statename);
      state->machine = machine;
      state->rejecting = rejecting;
      state->next = NULL;
      laststate = state;
      rejecting = 0;
      break;

    case PAIR:
      if (!(machine && state))
	semantic_error (machine, state, -1, "Must be building a state to add a pair");
      pair = getpaircreate (machine, lexical, surface);
      
      code = gettoken();
      switch (code) {
      case STATENAME: target = stralloc(statename); break;
      case KREJECT: target = REJECTSTATE; break;
      default:
	parse_error ("Must see a target state (or `reject') after a pair");
      }
      state->newstate[pair] = target;
      break;

    case OTHERS:
      if (!(machine && state))
	semantic_error (machine, state, -1, "Must be building a state to specify others");
      code = gettoken();
      switch (code) {
      case STATENAME: target = stralloc(statename); break;
      case KREJECT: target = REJECTSTATE; break;
      default:
	parse_error ("Must see a target state (or `reject') after a pair");
      }
      state->others = target;
      break;

      
    default:
      parse_error ("Expecting keyword");
      
    }
  }
}


alldone()
{
  Machine *machine;
  
  write_preamble();
  for (machine = machines; machine; machine = machine->next) 
    write_machine(machine);
  printf ("END\n");
}

    

yywrap()
{
  return 1;
}
