/*	PCKFUNCS.C - functions used in PC-KIMMO apart from user interface
 ***************************************************************************
 *
 *	void report_error(severity, err_msg, pline, filename, va_alist)
 *	int severity;
 *	struct message *err_msg;
 *	int *pline;
 *	char *filename;
 *	va_dcl
 *
 *	VOIDP myalloc(size)
 *	unsigned size;
 *
 *	VOIDP mystrdup(str)
 *	char *str;
 *
 *	VOIDP myrealloc(s,size)
 *	VOIDP s;
 *	unsigned size;
 *
 *	void myfree(s)
 *	VOIDP s;
 *
 *	int move_automata(lexChar, surfChar, config, lang)
 *	unsigned char lexChar;
 *	unsigned char surfChar;
 *	int *config;
 *	LANGUAGE *lang;
 *
 *	int final_config(config,lang)
 *	int *config;
 *	LANGUAGE *lang;
 *
 *	int valid_form(form,lang,logfp)
 *	unsigned char *form;
 *	LANGUAGE *lang;
 *	FILE *logfp;
 *
 *	RESULT *add_result(pres, pfeat, headp, nullchar, trace, logfp)
 *	unsigned char *pres;
 *	unsigned char *pfeat;
 *	RESULT *headp;
 *	unsigned char nullchar;
 *	int trace;
 *	FILE *logfp;
 *
 *	unsigned char *getline(infp, line_num, comment_char)
 *	FILE *infp;
 *	int  *line_num;
 *	unsigned char comment_char;
 *
 *	int strpos(s,c)
 *	unsigned char *s;
 *	unsigned char c;
 *
 *	unsigned char *strtok8(s1, s2)
 *	unsigned char *s1;
 *	unsigned char *s2;
 *
 *	TRIE *add_to_trie( trp, key, info, linkinfo, maxlevel )
 *	TRIE *trp;
 *	char *key;
 *	VOIDP info;
 *	(VOIDP)(*linkinfo)();
 *	int maxlevel;
 *
 *	void show_trie(trp, showinfo)
 *	TRIE *trp;
 *	void (*showinfo)();
 *
 *	void erase_trie(trp, eraseinfo)
 *	TRIE *trp;
 *	void (*eraseinfo)();
 *
 *	void free_result(resp)
 *	register RESULT *resp;
 *
 ***************************************************************************
 *	EDIT HISTORY
 *	13-Feb-87	STRTOK.C written by Steve McConnel
 *	31-Mar-87	STRPOS.C written by Steve McConnel
 *	28-Jul-88	MYALLOC.C written by Steve McConnel
 *	 1-Feb-89	FINALCON.C written by Dave Smith
 *	20-May-89	TRIE.C written by Steve McConnel (loosely based on
 *					TRISET.C and TRIACC.C from AMPLE)
 *	17-Jun-89	SH_UTIL.C written by Dave Smith
 *	15-Jul-89	GETLINE.C written by Dave Smith
 *	18-Jul-89	RECOGNIZ.C written by Dave Smith
 *	 5-Sep-89	MOVAUT.C written by Dave Smith
 *	10-Nov-88	SRMc - wrote myfree()
 *	20-May-89	SRMc - wrote mystrdup()
 *	13-Jul-89	hab  - de-"lint" MYALLOC.C and TRIE.C
 *	18-Sep-89	SRMc - regularize some comments and includes
 *			     - reorganize and label static functions
 *			     - replace malloc() with myalloc()
 *	19-Sep-89	SRMc - redefined subsets field of Language, added
 *				numsubsets field
 *			     - revised getline() to read arbitrarily long
 *				input, using internal static buffer to start
 *			     - redefined myrealloc() to follow realloc()
 *	21-Sep-89	SRMc - change char to unsigned char for 8-bit safety
 *			     - create strtok8() in STRTOK8.C (using unsigned
 *				char) from STRTOK.C
 *	23-Sep-89	SRMc - rename MORPH.H to KIMMO.H
 *	25-Sep-89	SRMc - fixed bug in chkMatch() which kept nulls in
 *				subsets from being found
 *			     - fix bug in getline() dynamic memory
 *			     - change getline()'s handling of comments
 *	26-Sep-89	SRMc - add rule_active field to Rule struct
 *			     - remove rulState field from Language struct
 *			     - wrote erase_trie()
 *	27-Sep-89	SRMc - replace typedef Rule with typedef RULE
 *				(major rewrite of moveAutomata())
 *			     - eliminate chkIfSub() and chkMatch()
 *	28-Sep-89	SRMc - make Lang global, remove "lang" arguments
 *			     - simplify tracing and logging variables
 *	30-Sep-89	SRMc - revise myrealloc() to call malloc() if it's
 *				handed a NULL pointer
 *	 2-Oct-89	SRMc - rename Language to LANGUAGE
 *	 3-Oct-89	SRMc - import code from a variety of places in
 *				previous user interface files for PC-KIMMO
 *				to form USERFUNC.C
 *	 5-Oct-89	SRMc - replace zero() with memset() in MYALLOC.C
 *	 6-Oct-89	SRMc - add comparisons to first char in ->lexchars
 *				and ->surfchars before calls to strchr()
 *				in the inner loop
 *			     - add check for ->lex_type or ->surf_type being
 *				ANY_CHAR
 *			     - add check for ->lex_type or ->surf_type being
 *				SUBSET_CHAR before calling strchr()
 *			     (note that these changes essentially restore
 *			      much of what had been done in chkMatch()
 *	 9-Oct-89	SRMc - fix bug in character matching in moveAutomata()
 *			     - define match_char() macro
 *			SRMc - write report_error() in REPORT.C
 *	10-Oct-89	SRMc - use report_error() for error reports
 *			     - change trie show function to not print anything
 *				out itself (for use of PC-KIMMO)
 *	11-Oct-89	SRMc - simplify the inner loop -- we handle issues
 *				of specificity during setup now
 *	12-Oct-89	SRMc - fix bug in matching pairs in the inner loop
 *				(life is more complicated than we thought...)
 *	14-Oct-89	SRMc - merge MOVAUT.C, FINALCON.C, part of USERFUNC.C,
 *				part of RECOGNIZ.C, and REPORT.C to form
 *				PCKFUNCS.C
 *			     - eliminate resHead and resTail as global
 *				variables
 *			     - renamed valForm() to valid_form()
 *			     - renamed recordResult() to add_result()
 *			     - revised interface to add_result()
 *			     - move getline() from GETRULES.C to GETLINE.C
 *	19-Oct-89	SRMc - finetuning of tracing output
 *			     - in getline(), skip past leading whitespace
 *				before returning the pointer to the input
 *	21-Oct-89	SRMc - import GETLINE.C, STRPOS.C, STRTOK8.C,
 *				MYALLOC.C, and TRIE.C
 *			     - make arguments to strpos() unsigned
 *			     - eliminate global variables
 *			     - rename moveAutomata() to move_automata()
 *			     - rename finalConfig() to final_config()
 *	24-Oct-89	SRMc - some delinting
 *	30-Oct-89	SRMc - move free_result() from USERFUNC.C to PCKFUNCS.C
 *	13-Dec-89	SRMc - add filename to report_error() argument list
 *	 2-Jan-90	SRMc - add function prototypes, more delinting
 *	 3-Jan-90	SRMc - will we never run out of lint?
 *	26-Jan-90	SRMc - port to SunOS 3.2
 *	 6-Jan-90	SRMc - #ifdef NOMEMSET, add memset() and memcpy()
 *				function definitions
 *			     - #ifdef BSD, use _doprnt() for vfprintf()
 *	19-Apr-90	EA   - #ifdef for THINK_C
 *	12-Jul-90	SRMc - add type casts to malloc() and realloc() calls
 *				in myalloc() and myrealloc(), as suggested by
 *				Greg Lee (lee@uhccux.uhcc.hawaii.edu) for port
 *				to AT&T UNIX PC 7300
 *			     - replace "void *" with "VOIDP", as also suggested
 *				by Greg Lee for port to ULTRIX
 ***************************************************************************
 * Copyright 1989, 1990 by the Summer Institute of Linguistics, Inc.
 * All rights reserved.
 */


/* marwan, 10-13-91 -- added this #define for integration with lisp */
#define NOMEMSET

#ifdef BSD
#include <strings.h>
#define vfprintf(fp,fmt,args) _doprnt(fmt,args,fp)
#ifdef __STDC__
extern VOIDP malloc(unsigned size);
extern VOIDP realloc(VOIDP ptr, unsigned size);
extern void free(char *ptr);
#else /*__STDC__*/
extern VOIDP malloc();
extern VOIDP realloc();
extern void free();
#endif /*__STDC__*/
#else /*BSD*/
#ifdef THINK_C
#include <string.h>
#include <MemoryMgr.h>    /* instead of memory.h */
#include <stdlib.h>       /* instead of malloc.h */
#else /*THINK_C*/
#endif /*THINK_C*/
#endif /*BSD*/

#ifdef __STDC__
/* standard library functions */
extern void exit(int status);
#else
/* standard library functions */
extern void exit();
#endif
/*
 *  error messages
 */
static struct message static3_Bad_form =
    { 800, "Form [ %s ] contains character not in alphabet: %c" };
static struct message Bad_rule =
    { 801, "RULE %d is invalid - input %c:%c is not specified by any column" };
static struct message No_memory =
    { 900, "Out of memory" };

#ifdef NOMEMSET
/***************************************************************************
 * NAME
 *    memset
 * ARGUMENTS
 *    dst - pointer to block of memory
 *    val - value to set each byte of the block to
 *    num - number of bytes in the block
 * DESCRIPTION
 *    Set the first num bytes of memory starting to location dst to the value
 *    val.
 * RETURN VALUE
 *    pointer to beginning of block of memory (same as dst)
 */
char *memset(dst,val,num)
char *dst;
int val;
int num;
{
register char *p;

for ( p = dst ; num ; --num )
    *p++ = val;		/* no attempt to be clever (or fast) here! */
return( dst );
}

/***************************************************************************
 * NAME
 *    memcpy
 * ARGUMENTS
 *    dst - pointer to destination block of memory
 *    src - pointer to source block of memory
 *    num - number of bytes in the block
 * DESCRIPTION
 *    Copy num bytes from location src to location dst.
 * RETURN VALUE
 *    pointer to beginning of destination block of memory (same as dst)
 */
char *memcpy(dst,src,num)
char *dst, *src;
int num;
{
register char *pd, *ps;

for ( pd = dst, ps = src ; num ; --num )
    *pd++ = *ps++;	/* no attempt to be clever (or fast) here! */
return( dst );
}
#endif

/****************************************************************************
 * NAME
 *    report_error
 * ARGUMENTS
 *    severity - indicates severity of the error
 *    err_msg  - pointer to message structure for this error
 *    pline    - pointer to file line number, or NULL
 *    filename - pointer to file name, or NULL
 *    va_alist - zero or more arguments for error message
 * DESCRIPTION
 *    Print an error message with zero or more arguments, also printing
 *    the associated error number and an optional file line number.
 * RETURN VALUE
 *    none
 */
/*VARARGS4*/
void report_error(severity, err_msg, pline, filename, va_alist)
int severity;
struct message *err_msg;
int *pline;
char *filename;
va_dcl
{
va_list ap;

if (severity == FATAL)
    fprintf(stderr, "\nERROR %03d", err_msg->number );
else
    fprintf(stderr, "\nWARNING %03d", err_msg->number );
if (pline != (int *)NULL)
    {
    fprintf(stderr, " on line %d", *pline);
    if (filename != (char *)NULL)
	fprintf(stderr, " of %s", filename);
    }
else if (filename != (char *)NULL)
    fprintf(stderr, " in %s", filename);
fprintf(stderr, ":\n    ");

va_start(ap);

/* marwan, 10-8-91 -- replaced vfprintf with fprintf */
/* vfprintf(stderr, err_msg->string, ap ); */
fprintf(stderr, "%s", err_msg->string);

va_end(ap);

putc('\n', stderr);
if (severity == FATAL)
    putc('\n', stderr);
}

/***************************************************************************
 * NAME
 *    nomem
 * ARGUMENTS
 *    none
 * DESCRIPTION
 *    Die with an appropriate error message.
 * RETURN VALUE
 *    doesn't return!
 */
static void nomem()
{
report_error(FATAL, &No_memory, (int *)NULL, (char *)NULL);
exit(2);
}

/***************************************************************************
 * NAME
 *    myalloc
 * ARGUMENTS
 *    size - number of bytes to allocate
 * DESCRIPTION
 *    "Safe" interface to malloc() -- abort the program with an error message
 *    if we run out of memory.
 * RETURN VALUE
 *    pointer to beginning of area allocated
 */
VOIDP myalloc(size)
unsigned size;
{
register VOIDP p;

p = (VOIDP)malloc(size);
if (p == NULL)
    nomem();
memset((char *)p,'\0',size);
return(p);
}

/***************************************************************************
 * NAME
 *    mystrdup
 * ARGUMENTS
 *    str - pointer to character string to duplicate
 * DESCRIPTION
 *    Create a duplicate of an existing NUL-terminated character string.
 * RETURN VALUE
 *    pointer to the newly allocated and copied duplicate
 */
VOIDP mystrdup(str)
char *str;
{
return( (VOIDP)strcpy((char *)myalloc((unsigned)strlen(str)+1), str) );
}

/***************************************************************************
 * NAME
 *    myrealloc
 * ARGUMENTS
 *    s    - pointer to string in overlarge allocated buffer
 *    size - new size, either smaller or larger
 * DESCRIPTION
 *    Adjust an allocated buffer to a new size.  Abort the program with an
 *    error message if we run out of memory.
 * RETURN VALUE
 *    pointer to reallocated block
 */
VOIDP myrealloc(s,size)
VOIDP s;
unsigned size;
{
register VOIDP p;

if (s == NULL)
    p = (VOIDP)malloc(size);
else
    p = (VOIDP)realloc(s, size);
if (p == NULL)
    nomem();
return( p );
}

/***************************************************************************
 * NAME
 *    myfree
 * ARGUMENTS
 *    s - pointer to block to deallocate
 * DESCRIPTION
 *    interface to free() -- release previously allocated memory
 * RETURN VALUE
 *    none
 */
void myfree(s)
VOIDP s;
{
if (s == NULL)
    return;		/* protect against braindead callers */
free(s);
}

/****************************************************************************
 * NAME
 *    move_automata
 * ARGUMENTS
 *    lexChar  - character from the lexicon
 *    surfChar - character from the input string
 *    config   - vector of automata states
 *    lang     - pointer to LANGUAGE data structure
 * DESCRIPTION
 *    Move all of the automata to the next state based on the input
 *    characters and the current state.
 * RETURN VALUE
 *    0 if successful, or the number of the rule (automaton) that blocked
 *    if unsuccessful
 */
int move_automata(lexChar, surfChar, config, lang)
unsigned char lexChar, surfChar;
int *config;
LANGUAGE *lang;
{
int i, j, num;
RULE *rp;
register unsigned char *lexp, *surfp;
register struct fsa_column *cp, *mp;

num = lang->num_rules;
for (rp = lang->automata, i = 0 ; i < num ; ++i, ++rp, ++config)
    {
    if (!rp->rule_active)
	continue;			/* ignore if rule turned off */
    for (   cp = rp->columns, mp = (struct fsa_column *)NULL, j = 0 ;
	    j < rp->num_cols ;
	    ++j, ++cp )
	{
	lexp  = cp->lex_chars;
	surfp = cp->surf_chars;
	/*
	 *  check the easy case where either lexChar or surfChar must
	 *    match a single character (ALPHABET_CHAR|NULL_CHAR)
	 */
	if (	((lexChar == *lexp) && (surfChar == *surfp)) ||
		(   (cp->lex_type & (ALPHABET_CHAR|NULL_CHAR)) &&
		    (lexChar == *lexp) &&
		    (cp->surf_type & (SUBSET_CHAR|ANY_CHAR)) &&
		    (strchr((char *)surfp, surfChar) != (char *)NULL) ) ||
		(   (cp->surf_type & (ALPHABET_CHAR|NULL_CHAR)) &&
		    (surfChar == *surfp) &&
		    (cp->lex_type & (SUBSET_CHAR|ANY_CHAR)) &&
		    (strchr((char *)lexp, lexChar) != (char *)NULL) ) )
	    {
	    mp = cp;
	    break;
	    }
	/*
	 *  check the harder case where both lexChar or surfChar may
	 *    match multiple characters (SUBSET_CHAR|ANY_CHAR)
	 */
	if (	(cp->lex_type & (SUBSET_CHAR|ANY_CHAR)) &&
		(cp->surf_type & (SUBSET_CHAR|ANY_CHAR)) )
	    {
	    for ( ; *lexp != NUL ; ++lexp, ++surfp )
		{
		if ((lexChar == *lexp) && (surfChar == *surfp))
		    {
		    mp = cp;
		    break;
		    }
		}
	    if (mp != (struct fsa_column *)NULL)
		break;
	    }
	}
    if (mp == (struct fsa_column *)NULL)
	{
	report_error(FATAL, &Bad_rule, (int *)NULL, (char *)NULL,
						i+1, lexChar, surfChar );
	return(i+1);
	}

    if ((*config = mp->transitions[ (*config) - 1 ]) == 0)
	return(i+1);	/* number of the rule that blocked */
    }
return(0);		/* not blocked by any rule */
}

/****************************************************************************
 * NAME
 *    final_config
 * ARGUMENTS
 *    config - final configuration states of the rules
 *    lang   - pointer to LANGUAGE data structure
 * DESCRIPTION
 *    determine if, in the final configuration of the automata, the current
 *    state of each of the rules is one of the defined final states for that
 *    rule
 * RETURN VALUE
 *    zero if okay, otherwise the number of the first automata which fails
 */
int final_config(config,lang)
int *config;
LANGUAGE *lang;
{
register int i;
register RULE *rp;
int num;
/*
 *  for each rule (automata), check whether the state indicated by the
 *    final configuration is a valid final state
 */
num = lang->num_rules;
for ( rp = lang->automata, i = 0 ; i < num ; ++i, ++rp )
    {
    if (!rp->rule_active)
	continue;		/* ignore if rule turned off */
    if (rp->final_states[config[i]-1])
	continue;		/* valid final state for this rule */
    return( i + 1 );		/* failed this rule -- return rule number */
    }
/*
 * if all rules are in a final state, final_config passes
 */
return(0);
}

/****************************************************************************
 * NAME
 *    valid_form
 * ARGUMENTS
 *    form  - pointer to word string
 *    lang  - pointer to LANGUAGE data structure
 *    logfp - FILE pointer for possible log file
 * DESCRIPTION
 *    validate a form (i.e. supposed alphabetic sequence of characters
 *    make up a word)
 * RETURN VALUE
 *    TRUE if valid, FALSE if invalid
 */
static int valid_form(form,lang,logfp)
unsigned char *form;
LANGUAGE *lang;
FILE *logfp;
{
unsigned char *p;
unsigned char *alpha;
unsigned char nullchar;

alpha = lang->alphabet;
nullchar = lang->null;
for ( p = form ; *p != NUL ; ++p )
    {
    if ((strchr((char *)alpha, *p ) == (char *)NULL) && (*p != nullchar))
	{
	report_error(FATAL, &static3_Bad_form, (int *)NULL, (char *)NULL, form, *p);
	if (logfp != (FILE *)NULL)
	    {
	    fprintf(logfp, static3_Bad_form.string, *p);
	    putc('\n', logfp);
	    }
	return(FALSE);
	}
    }
return(TRUE);
}

/****************************************************************************
 * Name
 *    add_result
 * ARGUMENTS
 *    pres     - pointer to primary result string
 *    pfeat    - pointer to result feature string
 *    headp    - pointer to linked list of RESULT nodes
 *    nullchar - null character (not copied to output)
 *    trace    - flag to trace progress verbosely
 *    logfp    - FILE pointer for possible log file
 * DESCRIPTION
 *    Add this result to the end of the linked list of results.
 * RETURN VALUE
 *    pointer to the new beginning of the longer list of results
 */
RESULT *add_result(pres, pfeat, headp, nullchar, trace, logfp)
unsigned char *pres;
unsigned char *pfeat;
RESULT *headp;
unsigned char nullchar;
int trace;
FILE *logfp;
{
int i;
RESULT *newresp;
RESULT *tailp;
unsigned char *p;
unsigned char *q;
static char t_fmt[] = "\n    RESULT = %s   %s\n\n";

if ( trace )
    {
    if (logfp != (FILE *)NULL)
	fprintf(logfp, t_fmt, pres, pfeat );
    fprintf(stderr, t_fmt, pres, pfeat );
    }
/*
 *  set tailp to point to the last node in the list
 */
for (	tailp = headp ;
	(tailp != (RESULT *)NULL) && (tailp->link != (RESULT *)NULL) ;
	tailp = tailp->link )
    ;

i = strlen((char *)pres);
newresp = (RESULT *)myalloc(sizeof(RESULT));
newresp->str = (unsigned char *)myalloc(i+1);
i = strlen((char *)pfeat);
newresp->feat = (unsigned char *)myalloc(i+1);
newresp->link = (RESULT *)NULL;
if ( tailp == (RESULT *)NULL )
    headp = newresp;
else
    tailp->link = newresp;
tailp = newresp;
strcpy((char *)newresp->str, (char *)pres);
strcpy((char *)newresp->feat, (char *)pfeat);
p = q = newresp->str;
while ( *p != NUL )
    {
    if ( *p != nullchar )
	*q++ = *p++;
    else
	p++;
    }
*q = NUL;

return( headp );
}

/****************************************************************************
 * NAME
 *    getline
 * ARGUMENTS
 *    infp         - input FILE pointer
 *    line_num     - pointer to line number counter
 *    comment_char - character indicating comment in input file
 * DESCRIPTION
 *    Read an arbitrarily long line from an input file.  Both leading and
 *    trailing whitespace are removed from each line.
 * RETURN VALUE
 *    address of the buffer containing the NUL-terminated line, or NULL
 *    if EOF.  This is a effectively a static buffer which is overwritten
 *    each time getline() is called.
 */
unsigned char *getline(infp, line_num, comment_char)
FILE *infp;
int  *line_num;
unsigned char comment_char;
{
static unsigned char buf[MAXLINELEN];
static unsigned char *s = buf;
int size;
register unsigned char *p;
int c;
int comment_line;

read_a_line:

if (s != buf)
    {
    myfree(s);		/* release dynamically allocated space */
    s = buf;
    }
(*line_num)++;		/* bump the line number */
p = buf;
size = MAXLINELEN;
while ( ((c = getc(infp)) != EOF) && (c != '\n') )
    {
    *p++ = c;			/* store the character */
    if ((p - s) >= size)
	{
	/*
	 *  increase the buffer size
	 */
	size += MAXLINELEN;
	if (s == buf)
	    {
	    s = (unsigned char *)myalloc(size);
	    memcpy(s, (char *)buf, MAXLINELEN);
	    }
	else
	    s = (unsigned char *)myrealloc( (char *)s, size );
	p = s + size - MAXLINELEN;
	}
    }
*p = NUL;
if ((c == EOF) && (p == buf))
    return( (unsigned char *)NULL );
/*
 *  get rid of any comments or trailing whitespace
 */
if ((p=(unsigned char *)strchr((char *)s,comment_char))!=(unsigned char *)NULL)
    {
    *p = NUL;
    comment_line = TRUE;
    }
else
    comment_line = FALSE;
for (	p = s + strlen((char *)s) ;
	(p-- > s) && isascii(*p) && isspace(*p) ;
	*p = NUL )
    ;
/*
 *  if comments wipe out an entire line, read another line
 */
if (comment_line && (*s == NUL))
    goto read_a_line;
/*
 *  skip past leading whitespace
 */
for ( p = s ; (*p != NUL) && isascii(*p) && isspace(*p) ; ++p )
    ;
return(p);
}

/************************************************************************
 * NAME
 *    strpos
 * ARGUMENTS
 *    s - address of NUL-terminated character string
 *    c - character to search for
 * DESCRIPTION
 *    Search for the first occurrence of the character c in the string s.
 *    If the character c is found in the string, the position
 *    of the first occurrence is returned (where the first character of s
 *    is considered to be at position 0).  If the character is not found,
 *    the value -1 is returned.  The terminating NUL character is considered
 *    to be part of s for the purposes of the search, so searching for NUL
 *    returns the position of the terminated NUL (which is equal to the
 *    length of the string), not the value -1.  strpos(s,'\0') is therefore
 *    equivalent to strlen(s).
 * RETURN VALUE
 *    position of the first occurrence of c in s, or -1 if c does not
 *    occur in s
 */
int strpos(s,c)
unsigned char *s;
register unsigned char c;
{
register unsigned char *p;

if ((p = s) == (unsigned char *)NULL)
    return( -1 );
do  {
    if (*p == c)
	return( p - s );
    } while (*p++ != NUL);
return( -1 );
}

/************************************************************************
 * NAME
 *    strtok8
 * ARGUMENTS
 *    s1 - address of NUL-terminated character string
 *    s2 - address of NUL-terminated set of separator characters
 * DESCRIPTION
 *    Split the string s1 into a sequence of zero or more text tokens
 *    separated by spans of one or more characters from s2.  Only the
 *    initial call provides a value for s1; successive calls must use
 *    a NULL pointer for the first argument.  The first separater
 *    character following the token in s1 is replaced by a NUL character.
 *    Subsequent calls to strtok8 work through s1 sequentially.  Note
 *    that s2 may change from one call to the next.
 * RETURN VALUE
 *    address of the next token, or NULL if no more tokens exist
 */
unsigned char *strtok8(s1, s2)
unsigned char *s1, *s2;
{
static unsigned char *string = 0;
register unsigned char *rp, *p;
register unsigned int c;

if (s1 != (unsigned char *)NULL)
    string = s1;
if ((string == (unsigned char *)NULL) || (*string == NUL))
    return( (unsigned char *)NULL );
if (s2 == (unsigned char *)NULL)
    {
    rp = string;	/* return rest of string */
			/* note that entire string has been scanned */
    string = (unsigned char *)NULL;
    return( rp );
    }
/*
 *  find first character in string which doesn't occur in s2
 *  save value for return (rp)
 *  if all characters in string occur in s2, return NULL
 */
for ( rp = string ; (c = *rp) != NUL ; ++rp )
    {
    for ( p = s2 ; *p != NUL ; ++p )
	{
	if (c == *p)
	    break;
	}
    if (*p == NUL)
	break;			/* didn't find character in s2 */
    }
if (*rp == NUL)
    {
    string = rp;
    return( (unsigned char *)NULL );
    }
/*
 *  find first character which occurs in s2
 *  replace it with NUL, and save pointer in string
 */
for ( string = rp+1 ; (c = *string) != NUL ; ++string )
    {
    for ( p = s2 ; *p != NUL ; ++p )
	{
	if (c == *p)
	    {
	    *string++ = '\0';	/* mark end of token */
	    return( rp );
	    }
	}
    }
return( rp );
}

static VOIDP   newinfo;			/* these are recursion invariant */
static VOIDP (*link_function)();	/*    parameters */
static void  (*show_function)();
static TRIE *enter_entry();
static void show_entries();

/*************************************************************************
 * NAME
 *    add_to_trie
 * ARGUMENTS
 *    trp      - pointer to head of trie
 *    key      - pointer to insertion key (character string)
 *    info     - pointer to generic information
 *    linkinfo - pointer to function for adding infomation to existing list
 *    maxlevel - maximum depth to which the trie can go
 * DESCRIPTION
 *    Add info to the trie, using the insertion key.  The trie is cut off at
 *    a depth of maxlevel.
 * RETURN VALUE
 *    pointer to the head of the modified trie
 */
TRIE *add_to_trie( trp, key, info, linkinfo, maxlevel )
TRIE *trp;
char *key;
VOIDP info;
VOIDP (*linkinfo)();
int maxlevel;
{
newinfo = info;			/* save the invariant parameters */
link_function = linkinfo;
return( enter_entry( trp, key, maxlevel ) );
}

/*************************************************************************
 * NAME
 *    enter_entry
 * ARGUMENTS
 *    trp      - pointer to head of (sub)trie
 *    key      - pointer to insertion key (character string)
 *    levels   - number of levels deep we can still go
 * DESCRIPTION
 *    Add info to the trie, using the insertion key.
 *    This is a recursive function.
 * RETURN VALUE
 *    pointer to the head of the modified trie
 */
static TRIE *enter_entry( trp, key, levels )
TRIE *trp;
char *key;
int levels;
{
register TRIE *tp;
char *plet;
char newlet[2];		/* short string for new key letter */
int pos;
/*
 *  check for running out of key or running out of trie depth
 */
if ((*key == NUL) || (levels <= 0))
    {
    if (trp)
	{			/* link new stuff to existing information */
#ifdef THINK_C
	trp->trieinfo = (VOIDP)(*link_function)(newinfo, trp->trieinfo);
#else
	(VOIDP)trp->trieinfo = (VOIDP)(*link_function)(newinfo, trp->trieinfo);
#endif
	return( trp );
	}
    else
	{			/* this level doesn't exist, so create it */
	tp = (TRIE *)myalloc( sizeof(TRIE) );
	tp->letters  = (unsigned char *)NULL;
	tp->children = (TRIE *)NULL;
	tp->siblings = (TRIE *)NULL;
	tp->trieinfo = newinfo;
	return( tp );
	}
    }
/*
 *  more key and trie depth to go, so set up newlet[] for key letter copying
 */
newlet[0] = *key;
newlet[1] = NUL;
if (trp)
    {
    /*
     *  trie exists, so check for children
     */
    if (trp->children)
	{
	/*
	 *  child tries exist, so look for the one we want
	 */
	if ((pos = strpos(trp->letters, newlet[0])) >= 0)
	    {
	    /*
	     *  appropriate child trie exists, so scan to it and recurse
	     */
	    for ( tp = trp->children ; pos ; --pos, tp = tp->siblings )
		;
	    enter_entry( tp, key+1, levels-1 );
	    return( trp );
	    }
	else
	    {
	    /*
	     *  no appropriate child trie, so create one recursively
	     */
	    plet = strcpy( myalloc((unsigned)strlen((char *)trp->letters)+2),
			   (char *)trp->letters );
	    myfree( trp->letters );
	    trp->letters = (unsigned char *)strcat(plet, newlet);
	    for ( tp = trp->children ; tp->siblings ; tp = tp->siblings )
		;
	    tp->siblings = enter_entry((TRIE *)NULL, key+1, levels-1);
	    return( trp );
	    }
	}
    else
	{
	/*
	 *  no children, so create child trie recursively
	 */
	trp->letters  = (unsigned char *)mystrdup( newlet );
	trp->children = enter_entry((TRIE *)NULL, key+1, levels-1);
	return( trp );
	}
    }
else
    {
    /*
     *  no trie exists at this level, so create this level's node, creating
     *    the child node recursively
     */
    tp = (TRIE *)myalloc( sizeof(TRIE) );
    tp->letters  = (unsigned char *)mystrdup( newlet );
    tp->children = enter_entry((TRIE *)NULL, key+1, levels-1 );
    tp->siblings = (TRIE *)NULL;
#ifdef THINK_C
    tp->trieinfo = (VOIDP)NULL;
#else
    (VOIDP)tp->trieinfo = (VOIDP)NULL;
#endif
    return( tp );
    }
}

/*************************************************************************
 * NAME
 *    show_trie
 * ARGUMENTS
 *    trp      - pointer to the head of the trie
 *    showinfo - pointer to a function for displaying the information lists
 * DESCRIPTION
 *    Walk through a trie, displaying the information stored at each node.
 * RETURN VALUE
 *    none
 */
void show_trie(trp, showinfo)
TRIE *trp;
void (*showinfo)();
{
/*
 *  check for nonexistent trie or function
 */
if (!trp || !showinfo)
    return;			/* nothing to do */

show_function = showinfo;	/* store recursion invariant parameter */

show_entries( trp );		/* recursive showing, with indentation */
}

/*************************************************************************
 * NAME
 *    show_entries
 * ARGUMENTS
 *    trp    - pointer to the head of the trie
 * DESCRIPTION
 *    Recursively show the information in the trie, using a function
 *    supplied by the original caller to display the information at each
 *    node.
 * RETURN VALUE
 *    none
 */
static void show_entries(trp)
TRIE *trp;
{
register TRIE *tp;
/*
 *  show the information stored at this level in the trie
 */
if (trp->trieinfo)
    (*show_function)(trp->trieinfo);
/*
 *  recursively display the children trie nodes
 */
for ( tp = trp->children ; tp ; tp = tp->siblings )
    show_entries(tp);
}

/*************************************************************************
 * NAME
 *    trie_indent
 * ARGUMENTS
 *    num - number of levels of indentation desired
 * DESCRIPTION
 *    Print a newline followed by num iterations of the indentation
 *    spacing.
 * RETURN VALUE
 *    none
 */
/* static void trie_indent(num) */
/* int num; */
/* { */
/* putc('\n', stdout); */
/* while (num--) */
/*     fprintf(stdout, "   "); */
/* } */

/*************************************************************************
 * NAME
 *    erase_trie
 * ARGUMENTS
 *    trp       - pointer to the head of the trie
 *    eraseinfo - pointer to a function for erasing the information lists
 * DESCRIPTION
 *    Walk through a trie, freeing all the memory allocated for it.
 * RETURN VALUE
 *    none
 */
void erase_trie(trp, eraseinfo)
TRIE *trp;
void (*eraseinfo)();
{
if (trp == (TRIE *)NULL)
    return;			/* nothing to do */
if (trp->letters)
    myfree( trp->letters );		/* erase sibling key letters */
erase_trie( trp->siblings, eraseinfo);	/* recurse across, erasing */
erase_trie( trp->children, eraseinfo);	/* recurse down, erasing */
if (trp->trieinfo && eraseinfo)
    (*eraseinfo)(trp->trieinfo);	/* erase the stored information */
myfree( (char *)trp );			/* erase this node itself */
}

/****************************************************************************
 * NAME
 *    free_result
 * ARGUMENTS
 *    resp - pointer to linked list of RESULTs
 * DESCRIPTION
 *    Free the memory used by a linked list of RESULTs.
 * RETURN VALUE
 *    none
 */
void free_result(resp)
register RESULT *resp;
{
register RESULT *rp;

while ((rp = resp) != (RESULT *)NULL)
    {
    resp = rp->link;
    if (rp->str != (unsigned char *)NULL)
	myfree(rp->str);
    if (rp->feat != (unsigned char *)NULL)
	myfree(rp->feat);
    myfree(rp);
    }
}

