/*	GENERATE.C - generator function for Kimmo morphological analysis
 ***************************************************************************
 *
 *	RESULT *generator(lexform,lang,limit,trace,logfp)
 *	unsigned char *lexform;
 *	LANGUAGE *lang;
 *	int limit;
 *	int trace;
 *	FILE *logfp;
 *
 ***************************************************************************
 *	EDIT HISTORY
 *	24-Jun-89	written by Dave Smith
 *	19-Sep-89	SRMc - replace malloc() with myalloc()
 *	20-Sep-89	SRMc - regularize some comments and includes
 *	21-Sep-89	SRMc - change char to unsigned char for 8-bit safety
 *			     - revised output format for traceG()
 *	23-Sep-89	SRMc - rename MORPH.H to KIMMO.H
 *	27-Sep-89	SRMc - replace typedef Rule with typedef RULE
 *			     - rename file from GENERAT.C to GENERATE.C
 *			     - modify interface to moveAutomata()
 *	28-Sep-89	SRMc - make Lang global, remove "lang" arguments
 *			     - simplify tracing and logging variables
 *			     - change generator() inner loop into two
 *				loops
 *			     - change interface to generator(), revise old
 *				generator() to create gener()
 *	29-Sep-89	SRMc - eliminate configCpy()
 *	 2-Oct-89	SRMc - rename Language to LANGUAGE
 *			     - rename ResNode to RESULT
 *	13-Oct-89	SRMc - revise tracing output
 *			     - add SET LIMIT {ON|OFF} command
 *			     - add Lang.boundary processing
 *	14-Oct-89	SRMc - fix bug in Lang.boundary processing
 *			     - eliminate static2_resHead and resTail as global
 *				variables
 *			     - revise interface to recordResult(), rename
 *				it to add_result()
 *	16-Oct-89	SRMc - finetune tracing output
 *	17-Oct-89	SRMc - more finetuning of tracing output
 *	18-Oct-89	SRMc - even more finetuning of tracing output
 *	19-Oct-89	SRMc - yet even more finetuning of tracing output
 *	21-Oct-89	SRMc - eliminate global variables
 *			     - rename moveAutomata() to move_automata()
 *			     - rename finalConfig() to final_config()
 *	24-Oct-89	SRMc - some delinting
 *	12-Dec-89	SRMc - add TRACE() for final step before add_output()
 *	13-Dec-89	SRMc - add filename to report_error() argument list
 *	 2-Jan-90	SRMc - add function prototypes, more delinting
 *	 3-Jan-90	SRMc - will we never run out of lint?
 *	26-Jan-90	SRMc - port to 4.2BSD (SunOS 3.2)
 *	 8-Mar-90	EA   - #ifdef for THINK_C
 *	12-Jul-90	SRMc - replace "void *" with "VOIDP", as suggested
 *				by Greg Lee (lee@uhccux.uhcc.hawaii.edu) for
 *				port to ULTRIX
 *	 5-Sep-90	SRMc - fix bug discovered by Greg Lee: when the end
 *				of the lexical form occurs, we need to check
 *				rules involving Lang.null as well as those
 *				involving Lang.boundary
 ***************************************************************************
 * Copyright 1989, 1990 by the Summer Institute of Linguistics, Inc.
 * All rights reserved.
 */


#ifdef BSD
#include <strings.h>
#else

#endif

#ifndef NOMEMSET
#ifdef THINK_C
#include <MemoryMgr.h>
#else

#endif
#endif

#ifdef __STDC__

/* from pckfuncs.c */
extern RESULT *add_result(unsigned char *pres, unsigned char *pfeat,
			  RESULT *headp, unsigned char nullchar, int trace,
			  FILE *logfp);
extern int final_config(int *config, LANGUAGE *lang);
extern int move_automata(unsigned char lexChar, unsigned char surfChar,
			 int *config, LANGUAGE *lang);
/* extern VOIDP myalloc(unsigned size); */
extern void myfree(VOIDP s);
extern VOIDP myrealloc(VOIDP s, unsigned size);
/* extern int valid_form(unsigned char *form, LANGUAGE *lang, FILE *logfp); */
#ifdef NOMEMSET
extern char *memset(char *,int,int);	/* in pckfuncs.c */
#endif

#else

/* from pckfuncs.c */
extern RESULT *add_result();
extern int final_config(), move_automata() /* , valid_form() */ ;
/* extern VOIDP myalloc(); */
extern VOIDP myrealloc();
extern void myfree();
#ifdef NOMEMSET

#endif

#endif
/*
 *  local storage of invariant parameters for processing data
 */
static LANGUAGE Lang;		/* data for the current language */

/* marwan, 10-15-91 -- replaced the static 'Limit_flag's name with the */
/* name 'static_Limit_flag' to solve a kcl problem */

static int static_Limit_flag;		/* flag for limit on/off */
static int Trace_flag;		/* flag for tracing on/off */
static FILE *static2_Log_fp;		/* log FILE pointer */
/*
 *  local storage for the current result string (dynamically allocated)
 */
static RESULT *static2_resHead;		/* locally global result list pointer */
static unsigned char *static2_result = (unsigned char *)NULL;
static int static2_result_size = 0;

#define TRACE(tracelevel,genlev,dir,lc,sc,states,rule,type) \
{\
if (Trace_flag >= tracelevel)\
    {\
    if (static2_Log_fp != (FILE *)NULL)\
	traceG(static2_Log_fp, genlev, dir, lc, sc, states, rule, type);\
    traceG(stderr, genlev, dir, lc, sc, states, rule, type);\
    }\
}

#define NORMAL_T  0
#define BLOCK_T   1
#define FAIL_T    2


/****************************************************************************
 * NAME
 *    traceG
 * ARGUMENTS
 *    outfp     - output FILE pointer
 *    level     - depth of recursion (actually number of chars in result)
 *    dir       - special flag character
 *    lc        - lexical character
 *    sc        - surface character
 *    states    - current vector of rule states
 *    ruleNum   - number of the rule that blocked
 *    tracetype - type of trace output desired
 * DESCRIPTION
 *    Produce one line of tracing output
 * RETURN VALUE
 *    none
 */
static void traceG(outfp,level,dir,lc,sc,states,ruleNum,tracetype)
FILE *outfp;
int level;
unsigned char dir;
unsigned char lc;
unsigned char sc;
int *states;
int ruleNum;
int tracetype;
{
int i;

if (lc == ' ')
    fprintf( outfp, "%2d%c     ", level, dir );
else
    fprintf(outfp, "%2d%c %c:%c ", level, dir, lc, sc );

switch (tracetype)
    {
    case NORMAL_T:
	if (states != (int *)NULL)
	    {
	    for ( i = 0 ; i < Lang.num_rules ; ++i )
		fprintf(outfp, "%2d ", states[i]);
	    fprintf(outfp, "   %s\n", (static2_result!=(unsigned char *)NULL) ?
					static2_result : (unsigned char *)"" );
	    }
	else
	    putc( '\n', outfp);
	break;

    case BLOCK_T:
	if (Trace_flag >= 3)
	    {
	    if (states != (int *)NULL)
		{
		for ( i = 0 ; i < Lang.num_rules ; ++i )
		    {
		    if (i < ruleNum)
			fprintf(outfp, "%2d ", states[i]);
		    else
		    fprintf(outfp, " ? ");
		    }
		fprintf(outfp, "   %s\n        ",
			(static2_result != (unsigned char *)NULL) ?
					static2_result : (unsigned char *)"" );
		}
	    }
	fprintf( outfp, "BLOCKED BY RULE %d:  %s\n",
				ruleNum, Lang.automata[ruleNum-1].name );
	break;

    case FAIL_T:
#if 0
	if (states != (int *)NULL)
	    {
	    for ( i = 0 ; i < Lang.num_rules ; ++i )
		fprintf(outfp, "%2d ", states[i]);
	    fprintf(outfp, "   %s\n", (static2_result!=(unsigned char *)NULL) ?
					static2_result : (unsigned char *)"" );
	    }
	else
	    putc( '\n', outfp);
#endif
	fprintf( outfp, "END OF INPUT, FAILED RULE %d:  %s\n",
				ruleNum, Lang.automata[ruleNum-1].name );
	break;

    default:
	putc( '\n', outfp);
	break;
    }
}

/****************************************************************************
 * NAME
 *    gener
 * ARGUMENTS
 *    remainder - rest of the lexical form to generate from
 *    level     - depth of the process (== number of chars in result)
 *    config    - current states of the automata
 * DESCRIPTION
 *    Try to generate a word (surface form) from the provided lexical form.
 * RETURN VALUE
 *    nonzero if successful, 0 if unsuccessful
 */
static int gener(remainder, level, config)
unsigned char *remainder;
int level;
int *config;
{
unsigned char lc, sc;	/* current lexical and surface character pair */
int *newConfig;
int have_good_result;
int blockR;		/* blocked rule number (from moveAutomata) */
register unsigned char *p;
int *old, *new;
int i;


#if 9
have_good_result = 0;
newConfig = (int *)myalloc(Lang.num_rules * sizeof(newConfig));

/*****************************************************************
 *  try to use the null character for the lexical character
 */
for (	lc = Lang.null, p = Lang.lex_pair ;
	(p = (unsigned char *)strchr((char *)p,lc)) != (unsigned char *)NULL ;
	++p )
    {
    sc = Lang.surf_pair[ p - Lang.lex_pair ];
    for ( new = newConfig, old = config, i = 0 ; i < Lang.num_rules ; ++i )
	*new++ = *old++;
    TRACE(2, level, ' ', lc, sc, config, 0, NORMAL_T)
    if ((blockR = move_automata(lc, sc, newConfig, &Lang)) == 0)
	{
	if (level >= static2_result_size)
	    {
	    static2_result_size += MAXLINELEN;
	    static2_result = (unsigned char *)myrealloc(static2_result,
							static2_result_size+1);
	    }
	static2_result[level] = sc;	/* add this surface char to result */
	static2_result[level+1] = NUL;

	have_good_result = gener(remainder, level+1, newConfig);

	static2_result[level] = NUL;	/* remove old surface char for next try */
	TRACE(2, level, '<', ' ', ' ', config, 0, NORMAL_T)
	}
    else
	TRACE(2, level, '-', ' ',' ', newConfig, blockR, BLOCK_T)
    if (have_good_result && static_Limit_flag)
	{
	myfree( newConfig );
	return( have_good_result );
	}
    }
#endif

/*****************************************************************
 *  check for the end of the lexical form
 */
lc = *remainder;
if (lc == NUL)
    {
    /*****************************************************************
     *  check the boundary character if applicable
     */
    if (    (Lang.boundary != NUL) &&
	    (strchr((char *)Lang.lex_pair, Lang.boundary) != (char *)NULL) )
	{
	lc = Lang.boundary;
	sc = Lang.boundary;
	TRACE(2, level, ' ', lc, sc, config, 0, NORMAL_T)
	if ((blockR = move_automata(lc, sc, config, &Lang)) == 0)
	    TRACE(2, level, ' ',' ',' ', config, 0, NORMAL_T)
	else
	    {
	    TRACE(2, level, '-',' ',' ', config, blockR, BLOCK_T)
#if 9
	    myfree(newConfig);
#endif
	    return(0);
	    }
	}
    else
	TRACE(2, level, ' ',' ',' ', config, 0, NORMAL_T)
    i = final_config(config,&Lang);
    if (i == 0)
	{
	static2_resHead = add_result(static2_result, (unsigned char *)"", static2_resHead,
			     Lang.null, Trace_flag, static2_Log_fp);
#if 9
	myfree(newConfig);
#endif
	return(1);			/* we have a good result */
	}
    else
	{
	TRACE(2, level, '-',' ',' ', (int *)NULL, i, FAIL_T)
#if 9
	myfree(newConfig);
#endif
	return(0);			/* sorry, no result */
	}
    }

#if 9
#else
have_good_result = 0;
newConfig = (int *)myalloc(Lang.num_rules * sizeof(newConfig));
#endif

/*****************************************************************
 *  try to use the provided lexical character
 */
for (	p = Lang.lex_pair ;
	(p = (unsigned char *)strchr((char *)p,lc)) != (unsigned char *)NULL ;
	++p )
    {
    sc = Lang.surf_pair[ p - Lang.lex_pair ];
    for ( new = newConfig, old = config, i = 0 ; i < Lang.num_rules ; ++i )
	*new++ = *old++;
    TRACE(2, level, ' ', lc, sc, config, 0, NORMAL_T)
    if ((blockR = move_automata(lc, sc, newConfig, &Lang)) == 0)
	{
	if (level >= static2_result_size)
	    {
	    static2_result_size += MAXLINELEN;
	    static2_result = (unsigned char *)myrealloc(static2_result,
							static2_result_size+1);
	    }
	static2_result[level] = sc;	/* add this surface char to result */
	static2_result[level+1] = NUL;

	have_good_result = gener(remainder+1, level+1, newConfig);

	static2_result[level] = NUL;	/* remove old surface char for next try */
	TRACE(2, level, '<', ' ', ' ', config, 0, NORMAL_T)
	}
    else
	TRACE(2, level, '-', ' ',' ', newConfig, blockR, BLOCK_T)
    if (have_good_result && static_Limit_flag)
	{
	myfree( newConfig );
	return( have_good_result );
	}
    }

#if 9
#else
/*****************************************************************
 *  try to use the null character for the lexical character
 */
for (	lc = Lang.null, p = Lang.lex_pair ;
	(p = (unsigned char *)strchr((char *)p,lc)) != (unsigned char *)NULL ;
	++p )
    {
    sc = Lang.surf_pair[ p - Lang.lex_pair ];
    for ( new = newConfig, old = config, i = 0 ; i < Lang.num_rules ; ++i )
	*new++ = *old++;
    TRACE(2, level, ' ', lc, sc, config, 0, NORMAL_T)
    if ((blockR = move_automata(lc, sc, newConfig, &Lang)) == 0)
	{
	if (level >= static2_result_size)
	    {
	    static2_result_size += MAXLINELEN;
	    static2_result = (unsigned char *)myrealloc(static2_result,
							static2_result_size+1);
	    }
	static2_result[level] = sc;	/* add this surface char to result */
	static2_result[level+1] = NUL;

	have_good_result = gener(remainder, level+1, newConfig);

	static2_result[level] = NUL;	/* remove old surface char for next try */
	TRACE(2, level, '<', ' ', ' ', config, 0, NORMAL_T)
	}
    else
	TRACE(2, level, '-', ' ',' ', newConfig, blockR, BLOCK_T)
    if (have_good_result && static_Limit_flag)
	{
	myfree( newConfig );
	return( have_good_result );
	}
    }
#endif

/*****************************************************************
 *  we're through at this level, so clean up and return what happened
 */
myfree(newConfig);
return( have_good_result );
}

/****************************************************************************
 * NAME
 *    generator
 * ARGUMENTS
 *    lexform - pointer to lexical form string
 *    lang    - data for the current language
 *    limit   - flag for limit on/off
 *    trace   - flag for tracing on/off
 *    logfp   - log FILE pointer
 * DESCRIPTION
 *    Try to generate a word (surface form) from the provided lexical form.
 * RETURN VALUE
 *    pointer to list of results, or NULL if unsuccessful
 */
RESULT *generator(lexform,lang,limit,trace,logfp)
unsigned char *lexform;
LANGUAGE *lang;
int limit;
int trace;
FILE *logfp;
{
register int i;
int *config;

if (lang == (LANGUAGE *)NULL)
    return( (RESULT *)NULL );	/* don't bother if no language data */
/*
 *  copy language data into local structure (for faster access)
 */
Lang.alphabet = lang->alphabet;
Lang.null = lang->null;
Lang.any = lang->any;
Lang.boundary = lang->boundary;
Lang.subsets = lang->subsets;
Lang.numsubsets = lang->numsubsets;
Lang.automata = lang->automata;
Lang.num_rules = lang->num_rules;
Lang.lex_pair = lang->lex_pair;
Lang.surf_pair = lang->surf_pair;
Lang.num_pairs = lang->num_pairs;
Lang.alterns = lang->alterns;
Lang.num_alterns = lang->num_alterns;
Lang.lex_sections = lang->lex_sections;
Lang.initial_lex = lang->initial_lex;
Lang.num_lex_sections = lang->num_lex_sections;
/*
 *  copy other invariant parameters to local storage
 */
static_Limit_flag = limit;
Trace_flag = trace;
static2_Log_fp = logfp;

if (!valid_form(lexform,&Lang,static2_Log_fp))
    return( (RESULT *)NULL );	/* don't bother if invalid lexical form */

config = (int *)myalloc(Lang.num_rules * sizeof(int));
for ( i = 0 ; i < Lang.num_rules ; ++i )
    config[i] = 1;		/* automata all start in state 1 */

/*****************************************************************
 *  check the boundary character if applicable
 */
if (	(Lang.boundary != NUL) &&
	(strchr((char *)Lang.lex_pair, Lang.boundary) != (char *)NULL)  )
    {
    TRACE(2, 0, ' ', Lang.boundary, Lang.boundary, config, 0, NORMAL_T )
    if ((i = move_automata(Lang.boundary, Lang.boundary, config, &Lang)) != 0)
	{
	TRACE(2, 0, '-', ' ',' ', config, i, BLOCK_T )
	myfree(config);
	return( (RESULT *)NULL );
	}
    }
else
    TRACE(2, 0, ' ', ' ', ' ', config, 0, NORMAL_T)

static2_result = (unsigned char *)myalloc(MAXLINELEN+1);
memset((char *)static2_result,0,MAXLINELEN+1);
static2_result_size = MAXLINELEN;
static2_resHead = (RESULT *)NULL;

gener(lexform, 0, config);

myfree(static2_result);
static2_result = (unsigned char *)NULL;
static2_result_size = 0;
myfree(config);
return( static2_resHead );
}

