kmx git

/*
 *                     OpenBIOS - free your system! 
 *                         ( FCode tokenizer )
 *                          
 *  scanner.c - simple scanner for forth files.
 *  
 *  This program is part of a free implementation of the IEEE 1275-1994 
 *  Standard for Boot (Initialization Configuration) Firmware.
 *
 *  Copyright (C) 2001-2005 by Stefan Reinauer <stepan@openbios.org>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; version 2 of the License.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA, 02110-1301 USA
 *
 */

/* **************************************************************************
 *         Modifications made in 2005 by IBM Corporation
 *      (C) Copyright 2005 IBM Corporation.  All Rights Reserved.
 *      Modifications Author:  David L. Paktor    dlpaktor@us.ibm.com
 **************************************************************************** */

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#ifdef __GLIBC__
#define __USE_XOPEN_EXTENDED
#endif
#include <string.h>
#include <time.h>
#include <ctype.h>

#include "macros.h"
#include "stack.h"
#include "stream.h"
#include "emit.h"
#include "toke.h"
#include "dictionary.h"
#include "vocabfuncts.h"
#include "scanner.h"
#include "errhandler.h"
#include "tokzesc.h"
#include "conditl.h"
#include "flowcontrol.h"
#include "usersymbols.h"
#include "clflags.h"
#include "devnode.h"
#include "tracesyms.h"
#include "nextfcode.h"

#include "parselocals.h"

/* **************************************************************************
 *
 *  Some VERY IMPORTANT global variables follow
 *
 **************************************************************************** */

u8  *statbuf=NULL;      /*  The word just read from the input stream  */
u8   base=0x0a;         /*  The numeric-interpretation base           */

/* pci data */
bool pci_is_last_image=TRUE;
u16  pci_image_rev=0x0001;  /*  Vendor's Image, NOT PCI Data Structure Rev */
u16  pci_vpd=0x0000;


/*  Having to do with the state of the tokenization  */
bool offs16       = TRUE;    /*  We are using 16-bit branch- (etc) -offsets */
bool in_tokz_esc  = FALSE;   /*  TRUE if in "Tokenizer Escape" mode   */
bool incolon      = FALSE;   /*  TRUE if inside a colon definition    */
bool haveend      = FALSE;   /*  TRUE if the "end" code was read.     */
int do_loop_depth = 0;       /*  How deep we are inside DO ... LOOP variants  */

/*  Used for error-checking of IBM-style Locals  */
int lastcolon;   /*  Location in output stream of latest colon-definition. */

/*  Used for error reporting   */
char *last_colon_defname = NULL;   /*  Name of last colon-definition        */
char *last_colon_filename = NULL;  /*  File where last colon-def'n made     */
unsigned int last_colon_lineno;    /*  Line number of last colon-def'n      */
bool report_multiline = TRUE;      /*  False to suspend multiline warning   */
unsigned int last_colon_abs_token_no;

/* **************************************************************************
 *  Local variables
 **************************************************************************** */
static u16  last_colon_fcode;  /*  FCode-number assigned to last colon-def'n  */
                               /*      Used for RECURSE  */

static bool do_not_overload = TRUE ;  /*  False to suspend dup-name-test     */
static bool got_until_eof = FALSE ;   /*  TRUE to signal "unterminated"      */

static unsigned int last_colon_do_depth = 0;

/*  State of headered-ness for name-creation  */
typedef enum headeredness_t {
       FLAG_HEADERLESS ,
       FLAG_EXTERNAL ,
       FLAG_HEADERS }  headeredness ;
static headeredness hdr_flag = FLAG_HEADERLESS ;  /*  Init'l default state  */

/*  Local variables having to do with:                                      */
/*       ...  the state of the tokenization                                 */
static bool is_instance = FALSE;        /*  Is "instance" is in effect?     */
static char *instance_filename = NULL;  /*  File where "instance" invoked   */
static unsigned int instance_lineno;    /*  Line number of "instance"       */
static bool fcode_started = FALSE ;     /*  Only 1 fcode_starter per block. */
static bool first_fc_starter = TRUE;    /*  Only once per tokenization...   */

/*       ... with the state of the input stream,                            */
static bool need_to_pop_source;

/*       ... with the use of the return stack,                              */
static int ret_stk_depth = 0;          /*  Return-Stack-Usage-Depth counter */

/*       ... and with control of error-messaging.                           */
           /*  Should a warning about a dangling "instance" 
	    *      be issued at the next device-node change?
	    */
static bool dev_change_instance_warning = TRUE;

           /*  Has a gap developed between "instance" and its application?  */
static bool instance_definer_gap = FALSE;

           /*  Shared phrases                                               */
static char *in_tkz_esc_mode = "in Tokenizer-Escape mode.\n";


/* **************************************************************************
 *
 *      Function name:  skip_ws
 *      Synopsis:       Advance the PC past all whitespace.
 *                      Protect against pointer over-runs 
 *
 *      Inputs:
 *         Parameters:                  NONE
 *         Global Variables:        
 *             pc                       Input-source Scanning pointer
 *             end                      End of input-source buffer
 *
 *      Outputs:
 *         Returned Value:      TRUE if PC reached END before non-blank char
 *         Global Variables:    
 *             pc            Advanced to first non-blank char, or to END  
 *             lineno        Incremented if encountered new-line along the way
 *
 *      Error Detection:
 *          Return a TRUE if End of input-source buffer reached before
 *              non-blank character.  Not necessarily an error; allow
 *              calling routine to decide...
 *
 **************************************************************************** */

static bool skip_ws(void)
{
    bool retval = TRUE;
    char ch_tmp;

    for (  ; pc < end; pc++ )
{
        ch_tmp = *pc;
	if ( (ch_tmp != '\t') && (ch_tmp != ' ') && (ch_tmp != '\n' ) )
	{
	    retval = FALSE;
	    break;
	}
        if ( ch_tmp == '\n')  lineno++;
    }
    return ( retval );
}

/* **************************************************************************
 *
 *      Function name:  skip_until
 *      Synopsis:       Advance the PC to the given character.
 *                      Do not copy anything into statbuf.
 *                      Protect against pointer over-runs 
 *
 *      Inputs:
 *         Parameters:
 *             lim_ch                   Limiting Character
 *         Global Variables:        
 *             pc                       Input-source Scanning pointer
 *             end                      End of input-source buffer
 *
 *      Outputs:
 *         Returned Value:      TRUE if PC reached END before finding LIM_CH
 *         Global Variables:    
 *             pc            Advanced to first occurrence of LIM_CH, or to END  
 *             lineno        Incremented if encountered new-line along the way
 *
 *      Error Detection:
 *          Return a TRUE if End of input-source buffer reached before
 *              desired character.  Not necessarily an error; allow calling
 *              routine to decide...
 *
 **************************************************************************** */

bool skip_until( char lim_ch)
{
    bool retval = TRUE;
    char ch_tmp;

    for (  ; pc < end; pc++ )
    {
        ch_tmp = *pc;
        if ( ch_tmp == lim_ch )
	{
	    retval = FALSE;
	    break;
	}
        if ( ch_tmp == '\n')  lineno++;
	}
    return ( retval );
}


/* **************************************************************************
 *
 *      Function name:  get_until
 *      Synopsis:       Return, in  statbuf, the string from  PC  to the first
 *                      occurrence of the given delimiter-character..
 *
 *      Inputs:
 *         Parameters:
 *             needle          The given delimiter-character
 *         Global Variables:
 *             pc              Input-source Scanning Pointer
 *
 *      Outputs:
 *         Returned Value:     Length of the string obtained
 *         Global Variables:
 *             statbuf         The string obtained from the input stream;
 *                                 does not include the delimiter-character.
 *             pc              Bumped past the delimiter-character, unless
 *                                 it's a new-line, in which case leave it
 *                                 to be handled by  get_word()
 *         Local Static Variables:
 *             got_until_eof   Pass this as a signal that the end of the
 *                                 buffer was reached before the delimiter;
 *                                 Testing whether PC has reached END is
 *                                 not a sufficient indication.
 *
 *      Error Detection:
 *          If string overflows  statbuf  allocation, ERROR, and 
 *              return "no string" (i.e., length = 0).
 *          Otherwise, if delimiter not found before eof, keep string.
 *              Protection against PC pointer-over-run past END is
 *              provided by  skip_until() .  Reaching END will be
 *              handled by calling routine; pass indication along
 *              via Local Static Variable.
 *
 *      Process Explanation:
 *          Skip the delimiter-character from further input, unless it's a
 *              new-line which will be skipped anyway.  Let  skip_until() 
 *              and  get_word()  handle incrementing line-number counters.
 *          If skip_until()  indicated reaching end-of-file, don't bump PC
 *
 *      Revision History:
 *          Updated Thu, 14 Jul 2005 by David L. Paktor
 *              More robust testing for when PC exceeds END 
 *                  Involved replacing  firstchar()
 *
 **************************************************************************** */
	
static signed long get_until(char needle)
{                                                                               
	u8 *safe;                                                         
	unsigned long len = 0;

	safe=pc;

	got_until_eof = skip_until(needle);

	len = pc - safe;
	if (len >= GET_BUF_MAX )
	{
	    tokenization_error( TKERROR,
		"get_until buffer overflow.  Max is %d.\n", GET_BUF_MAX-1 );
	    len = GET_BUF_MAX-1;
}

	memcpy(statbuf, safe, len);
	statbuf[len]=0;

	if ( INVERSE(got_until_eof) )
{
	    if ( needle != '\n' )  pc++;
	}
	return len;
}


/* **************************************************************************
 *
 *          We are going to use a fairly sophisticated mechanism to
 *              make a smooth transition between processing the body
 *              of a Macro, a User-defined Symbol or an FLOADed file 
 *              and the resumption of processing the source file, so
 *              that the end-of-file will only be seen at the end of
 *              the primary input file (the one from the command-line).
 *         This mechanism will be tied in with the get_word() routine
 *
 *         We are going to define a private data-structure in which
 *              we will save the state of the current source file,
 *              and from which, of course, we will recover it.  Its
 *              fields will be:
 *                   A pointer to the next structure in the list.
 *                   The saved values of  START  END  and  PC
 *                   The saved values of  INAME  and  LINENO
 *                   A flag indicating that get-word should "pause"
 *                        before popping the source-stream because
 *                        the input file will be changing.
 *                   A place from which to save and recover the state of
 *                        whether we're testing for "Multi-line" strings;
 *                        to prevent undeserved "Multi-line" warnings
 *                        during Macro processing.
 *                   A pointer to a "resumption" routine, to call
 *                        when resuming processing the source file;
 *                        the routine takes a pointer parameter
 *                        and has no return value.  The pointer
 *                        may be NULL if no routine is needed.
 *                   The pointer to pass as the parameter to the
 *                        resumption routine.
 *
 **************************************************************************** */

typedef struct source_state
    {
	struct source_state   *next;
	u8                    *old_start;
	u8                    *old_pc;
	u8                    *old_end;
	char                  *old_iname;
	unsigned int           old_lineno;
	bool                   pause_before_pop;
	bool                   sav_rep_multlin;
	void                 (*resump_func)();
	_PTR                   resump_param;
    } source_state_t ;

static source_state_t  *saved_source = NULL;


/* **************************************************************************
 *
 *      Function name:  push_source
 *      Synopsis:       Save the state of the current source file, in the
 *                          source_state data-structure LIFO linked-list.
 *
 *      Inputs:
 *         Parameters:
 *             res_func              Pointer to routine to call when resuming
 *                                       processing the saved source file.
 *             res_param             Parameter to pass to res_func.
 *                                   Either or both pointers may be NULL.
 *             file_chg              TRUE if input file is going to change.
 *         Global Variables:
 *             start                 Points to current input buffer
 *             end                   Points to end of current input buffer
 *             pc                    Input point in current buffer
 *             iname                 Name of current source file
 *             lineno                Line number in current source file
 *             report_multiline      Whether we're testing for "Multi-line"
 *         Local Static Variables:
 *             saved_source          Pointer to the source_state data-structure
 *
 *      Outputs:
 *         Returned Value:           NONE
 *         Local Static Variables:
 *             saved_source          Points to new source_state entry
 *         Memory Allocated
 *             for the new source_state entry
 *         When Freed?
 *             When resuming processing the source file, by drop_source().
 *
 *      Process Explanation:
 *          The calling routine will establish the new input buffer via
 *              a call to init_inbuf() or the like.
 *
 **************************************************************************** */

void push_source( void (*res_func)(), _PTR res_parm, bool file_chg )
{
    source_state_t  *new_sav_src;

    new_sav_src = safe_malloc( sizeof(source_state_t), "pushing Source state");

    new_sav_src->next = saved_source;
    new_sav_src->old_start = start;
    new_sav_src->old_pc = pc;
    new_sav_src->old_end = end;
    new_sav_src->old_iname = iname;
    new_sav_src->old_lineno = lineno;
    new_sav_src->pause_before_pop = file_chg;
    new_sav_src->sav_rep_multlin = report_multiline;
    new_sav_src->resump_func = res_func;
    new_sav_src->resump_param = res_parm;

    saved_source = new_sav_src;
}

/* **************************************************************************
 *
 *      Function name:  drop_source
 *      Synopsis:       Remove last saved state of source processing
 *                          from the source_state LIFO linked-list,
 *                          without (or after) restoring.
 *
 *      Inputs:
 *         Parameters:               NONE
 *         Local Static Variables:
 *             saved_source          Pointer to the source_state data-structure
 *
 *      Outputs:
 *         Returned Value:           NONE
 *         Local Static Variables:
 *             saved_source          Points to previous source_state entry
 *         Memory Freed
 *             Saved source_state entry that was just "dropped"
 *
 *      Error Detection:
 *          None.  Called only when linked-list is known not to be at end.  
 *
 **************************************************************************** */

static void drop_source( void)
{
    source_state_t  *former_sav_src = saved_source;

    saved_source = saved_source->next ;
    free( former_sav_src);
}

/* **************************************************************************
 *
 *      Function name:  pop_source
 *      Synopsis:       Restore the state of source processing as it was
 *                          last saved in the source_state linked-list.
 *
 *      Inputs:
 *         Parameters:               NONE
 *         Local Static Variables:
 *             saved_source          Pointer to the source_state data-structure
 *             need_to_pop_source    If TRUE, don't check before popping.
 *
 *      Outputs:
 *         Returned Value:           TRUE if reached end of linked-list
 *         Global Variables:
 *             start                 Points to restored input buffer
 *             end                   Points to end of restored input buffer
 *             pc                    Input point in restored buffer
 *             iname                 Name of restored source file
 *             lineno                Line number in restored source file
 *             report_multiline      Restored to saved value.
 *         Local Static Variables:
 *             saved_source          Points to previous source_state entry
 *             need_to_pop_source    TRUE if postponed popping till next time
 *         Memory Freed
 *             Saved source-state entry that was just "popped"
 *
 *      Process Explanation:
 *          First check the need_to_pop_source flag.
 *          If it is set, we will clear it and go ahead and pop.
 *          If it is not set, we will check the  pause_before_pop  field
 *                  of the top entry in the source_state linked-list.
 *              If the  pause_before_pop  field is set, we will set the
 *                  need_to_pop_source flag and return.
 *              If it is not, we will go ahead and pop.
 *          If we are going to go ahead and pop, we will call the
 *              "Resume-Processing" routine (if it's not NULL) before
 *              we restore the saved source state.
 *
 **************************************************************************** */

static bool pop_source( void )
{
    bool retval = TRUE;

    if ( saved_source != NULL )
    {
	retval = FALSE;
	if ( need_to_pop_source )
	{
	    need_to_pop_source = FALSE;
	}else{
	    if ( saved_source->pause_before_pop )
	    {
	        need_to_pop_source = TRUE;
		return( retval);
	    }
	}

	if ( saved_source->resump_func != NULL )
	{
	    saved_source->resump_func( saved_source->resump_param);
	}
	report_multiline = saved_source->sav_rep_multlin;
	lineno = saved_source->old_lineno ;
	iname = saved_source->old_iname ;
	end = saved_source->old_end ;
	pc = saved_source->old_pc ;
	start = saved_source->old_start ;

	drop_source();
    }
    return( retval);
}


/* **************************************************************************
 *
 *      Function name:  get_word
 *      Synopsis:       Gather the next "word" (aka Forth Token) from the
 *                          input stream.
 *                      A Forth Token is, of course, a string of characters
 *                          delimited by white-space (blank, tab or new-line).
 *                      Do not increment line-number counters here; leave
 *                          the delimiter after the word unconsumed.
 *
 *      Inputs:
 *         Parameters:                 NONE
 *         Global Variables:
 *             pc                      Input-stream Scanning Pointer
 *         Local Static Variables:
 *             need_to_pop_source      If TRUE, pop_source() as first step
 *
 *      Outputs:
 *         Returned Value:             Length of "word" gotten;
 *                                     0 if  reached end of file.
 *                                     -1 if reached end of primary input
 *                                         (I.e., end of all source)
 *         Global Variables:
 *             statbuf                 Copy of "gotten" word
 *             pc                      Advanced to end of "gotten" word,
 *                                         (i.e., the next word is "consumed")
 *                                         unless returning zero.
 *             abs_token_no            Incremented, if valid "word" (token)
 *                                         was gotten.
 *
 *      Process Explanation:
 *          Skip whitespace to the start of the token, 
 *             then skip printable characters to the end of the token.
 *          That part's easy, but what about when skipping whitespace
 *              brings you to the end of the input stream?
 *          First, look at the  need_to_pop_source  flag.  If it's set,
 *              we came to the end of the input stream the last time
 *              through.  Now we need to  pop_source()  first.
 *          Next, we start skipping whitespace; this detects when we've
 *                  reached the end of the input stream.  If we have,
 *                  then we need to  pop_source()  again.
 *              If  pop_source()  returned a TRUE, we've reached the end
 *                  of the primary input file.  Return -1.
 *              If  pop_source()  turned the  need_to_pop_source  flag
 *                  to TRUE again, then we need to "pause" until the
 *                  next time through; return zero.
 *          Otherwise, we proceed with collecting the token as described.
 *
 *      Revision History:
 *          Updated Thu, 23 Feb 2006 by David L. Paktor
 *              Tied this routine in with a more sophisticated mechanism that
 *                  makes a smooth transition between processing the body of
 *                  a Macro, a User-defined Symbol or an FLOADed file, and 
 *                  the resumption of processing the source file, so that the
 *                  end-of-file will only be seen at the end of the primary
 *                  input file (the one that came from the command-line)
 *          Updated Fri, 24 Feb 2006 by David L. Paktor
 *              This is trickier than I thought.  Added a global indicator
 *                  of whether a file-boundary was crossed while getting
 *                  the word; previously, that was indicated by a return
 *                  value of zero, which now means something else...
 *              The flag,  closed_stream , will be cleared every time this
 *                  routine is entered, and set whenever close_stream() is
 *                  entered.
 *         Updated Tue, 28 Feb 2006 at 10:13 PST by David L. Paktor
 *              Trickier still.  On crossing a file-boundary, must not
 *                  consume the first word in the resumed file, for one
 *                  call; instead, return zero.  Consume it on the next
 *                  call.  The  closed_stream  flag is now irrelevant and
 *                  has gone away.
 *
 **************************************************************************** */

signed long get_word( void)
{
	size_t len;
	u8 *str;
	bool keep_skipping;
	bool pop_result;

	if ( need_to_pop_source )
	{
	    pop_result = pop_source();
	}

	do {
	    keep_skipping = skip_ws();
	    if ( keep_skipping )
	    {
		pop_result = pop_source();
		if ( pop_result || need_to_pop_source )
		{
		    statbuf[0] = 0;
		    if ( pop_result )
		    {
			return -1;
		    }
		return 0;
		}
	    }
	} while ( keep_skipping );

	str=pc;
	while ( (str < end) && *str && *str!='\n' && *str!='\t' && *str!=' ')
		str++;

	len=(size_t)(str-pc);
	if (len >= GET_BUF_MAX )
	{
	    tokenization_error ( FATAL,
		"get_word buffer overflow.  Max is %d.", GET_BUF_MAX-1 );
	}

	memcpy(statbuf, pc, len); 
	statbuf[len]=0;

#ifdef DEBUG_SCANNER
	printf("%s:%d: debug: read token '%s', length=%ld\n",
			iname, lineno, statbuf, len);
#endif
	pc+=len;
	abs_token_no++;
	return len;
}


/* **************************************************************************
 *
 *      Function name:  get_word_in_line
 *      Synopsis:       Get the next word on the same line as the current
 *                      line of input.  If the end of line was reached
 *                      before a word was found, print an error message
 *                      and return an indication.
 *
 *      Inputs:
 *         Parameters:
 *             func_nam        Name of the function expecting the same-line
 *                                 input; for use in the Error Message.
 *                                 If NULL, do not issue Error Message
 *         Global Variables:
 *             pc              Input character pointer.  Saved for comparison
 *             lineno          Current input line number.  Saved for comparison
 *
 *      Outputs:
 *         Returned Value:     TRUE = success.  Word was acquired on same line.
 *         Global Variables:
 *             statbuf         Advanced to the next word in the input stream.
 *             pc              Advanced if no error; restored otherwise.
 *
 *      Error Detection:
 *          If no next word is gotten (i.e., we're at end-of-file), or if
 *              one is gotten but not on the same line, the routine will
 *              return FALSE; if  func_nam  is not NULL, an ERROR Message
 *              will be issued.
 *          Also, the values of  PC  LINENO  and  ABS_TOKEN_NO  will be reset
 *              to the positions they had when this routine was entered.
 *
 **************************************************************************** */

bool get_word_in_line( char *func_nam)
{                                                                               
    signed long wlen;
    bool retval = TRUE;
    u8 *save_pc = pc;
    unsigned int save_lineno = lineno;
    unsigned int save_abs_token_no = abs_token_no;

    /*  Copy of function name, for error message  */
    char func_cpy[FUNC_CPY_BUF_SIZE+1];

    /*  Do this first, in the likely event that  func_nam  was  statbuf   */
    if ( func_nam != NULL )
    {
	strncpy( func_cpy, func_nam, FUNC_CPY_BUF_SIZE);
	func_cpy[FUNC_CPY_BUF_SIZE] = 0;  /*  Guarantee a null terminator  */
    }

    wlen = get_word();
    if ( ( lineno != save_lineno ) || ( wlen <= 0 ) )
    {
	abs_token_no = save_abs_token_no;
	lineno = save_lineno;
	pc = save_pc;
	retval = FALSE;
	if ( func_nam != NULL )
	{
	    tokenization_error ( TKERROR,
	       "Operator %s expects its target on the same line\n",
		   strupr(func_cpy));
	}
    }
    return ( retval );
}


/* **************************************************************************
 *
 *      Function name:  get_rest_of_line
 *      Synopsis:       Get all the remaining text on the same line as
 *                      the current line of input.  If there is no text
 *                      (not counting whitespace) before the end of line,
 *                      return an indication.
 *
 *      Inputs:
 *         Parameters:         NONE
 *         Global Variables:
 *             pc              Input character pointer.  Saved for restoration
 *             lineno          Current input line number.  Saved for comparison
 *
 *      Outputs:
 *         Returned Value:     TRUE = success.  Text was acquired on same line.
 *         Global Variables:
 *             statbuf         Contains the text found in the input stream.
 *             pc              Advanced to end of line or of whitespace, if
 *                                 no error; restored otherwise.
 *             lineno          Preserved if no error; otherwise, restored.
 *             abs_token_no    Restored if error; otherwise, advanced as normal.
 *
 *      Error Detection:
 *          Routine will return FALSE if no text is gotten on the same line.
 *
 **************************************************************************** */

bool get_rest_of_line( void)
{
    bool retval = FALSE;
    u8 *save_pc = pc;
    unsigned int save_lineno = lineno;
    unsigned int save_abs_token_no = abs_token_no;

    if ( INVERSE( skip_ws() ) )
    {
        if ( lineno == save_lineno )
	{
	    signed long wlen = get_until('\n');
	    if ( wlen > 0 ) retval = TRUE;
	}else{
	    abs_token_no = save_abs_token_no;
	    lineno = save_lineno;
	    pc = save_pc;
	}
    }
    return( retval);
}


/* **************************************************************************
 *
 *      Function name:  warn_unterm
 *      Synopsis:       Message for "Unterminated ..." something
 *                      Show saved line-number, where the "something" started,
 *                      and the definition, if any, in which it occurred.
 *
 *      Inputs:
 *         Parameters:
 *             severity              Type of error/warning message to display
 *                                       usually either WARNING or TKERROR
 *             something             String to print after "Unterminated"
 *             saved_lineno          Line-Number where the "something" started
 *         Global Variables:
 *             lineno                Saved, then restored.
 *             last_colon_defname    Used only if unterm_is_colon is TRUE;
 *         Local Static Variables:
 *             unterm_is_colon       See 07 Mar 2006 entry under Rev'n History
 *
 *      Outputs:
 *         Returned Value:           NONE
 *         Global Variables:
 *             lineno                Saved, then restored.
 *         Local Static Variables:
 *             unterm_is_colon       Reset to FALSE
 *         Printout:
 *             Warning or Error message
 *
 *      Revision History:
 *          Updated Mon, 06 Mar 2006 by David L. Paktor
 *              Added call to in_last_colon()
 *          Updated Tue, 07 Mar 2006 by David L. Paktor
 *              Call to in_last_colon() works okay in most cases except for
 *                  when the "something" is a Colon Definition; there, it
 *                  results in the phrase: ... Definition in definition of ...
 *                  which is awkward.  To eliminate that, I am introducing 
 *                  a Local Static Variable flag called  unterm_is_colon
 *                  which will be set only in the appropriate place and
 *                  re-cleared here.  It's a retro-fit, of course; it could
 *                  have been a parameter had the need for it occurred when
 *                  this routine was first constructed... 
 *
 **************************************************************************** */

static bool unterm_is_colon = FALSE;
void warn_unterm( int severity, char *something, unsigned int saved_lineno)
{
    unsigned int tmp = lineno;
    lineno = saved_lineno;
    if ( unterm_is_colon )
    {
	tokenization_error( severity, "Unterminated %s of %s\n",
	    something, strupr( last_colon_defname) );
	unterm_is_colon = FALSE;
    }else{
	tokenization_error( severity, "Unterminated %s", something);
	in_last_colon();
    }
    lineno = tmp;
}

/* **************************************************************************
 *
 *      Function name:  warn_if_multiline
 *      Synopsis:       Test for "Multi-line ..." something and issue WARNING
 *                      Show saved line-number, where the "something" started
 *
 *      Inputs:
 *         Parameters:
 *             something          String to print after "Unterminated"
 *             start_lineno       Line-Number where the "something" started
 *         Global Variables:
 *             lineno             Line-Number where we are now
 *             iname              Input file name, to satisfy ...where_started()
 *                                    (Not crossing any actual file boundary.)
 *             report_multiline   TRUE = go ahead with the message
 *
 *      Outputs:
 *         Returned Value:        NONE
 *         Global Variables:
 *             report_multiline   Restored to TRUE.
 *
 *      Error Detection:
 *          Only issue message if the current  lineno  doesn't equal
 *              the start_lineno  
 *
 *      Process Explanation:
 *          The directive "multi-line" allows the user to specify that
 *              the next "Multi-line ..." something is intentional, and
 *              will cause its warning to be suppressed.  It remains in
 *              effect until it's "used"; afterwards, it's reset.
 *
 **************************************************************************** */

void warn_if_multiline( char *something, unsigned int start_lineno )
{
    if ( report_multiline && ( start_lineno != lineno ) )
    {
	tokenization_error( WARNING, "Multi-line %s, started", something);
	where_started( iname, start_lineno);
    }
    report_multiline = TRUE;
}


/* **************************************************************************
 *
 *      Function name:  string_remark
 *      Synopsis:       Suspend string parsing past end of line and
 *                      whitespace at start of the new line.
 *
 *      Inputs:
 *         Parameters:
 *             errmsg_txt            Text to be used for error-message.
 *         Global Variables:
 *             pc                    Input-source Scanning pointer
 *
 *      Outputs:
 *         Returned Value:           NONE
 *         Global Variables:
 *             pc                    Will point to first non-blank in new line
 *
 *      Error Detection:
 *          The return value of the skip_until() or skip_ws() routine
 *             will indicate if PC goes past END.  Issue a WARNING.
 *             The calling routine will handle things from there.
 *
 **************************************************************************** */

static void string_remark(char *errmsg_txt)
{
    unsigned int sav_lineno = lineno;
    bool eof = skip_until('\n');
    if ( ! eof )
    {
	eof = skip_ws();
    }
    if ( eof )
    {
	warn_unterm(WARNING, errmsg_txt, sav_lineno);
	}
	
}


/*  Convert the given string to a number in the supplied base   */
/*  Allow -- and ignore -- embedded periods.    */
/*  The  endptr  param represents a pointer that will be updated
 *      with the address of the first non-numeric character encountered,
 *      (unless it is a NULL, in which case it is ignored).
 */
/*  There is no test for a completely invalid string;
 *  the calling routine is responsible for ascertaining
 *  the validity of the string being passed.
 */
static long parse_number(u8 *start, u8 **endptr, int lbase) 
{
	long val = 0;
	bool negative = FALSE ;
	int  curr;
	u8 *nptr=start;

	curr = *nptr;
	if (curr == '-')
	{
		negative = TRUE ;
		nptr++;
	}
	
	for (curr = *nptr; (curr = *nptr); nptr++) {
		if ( curr == '.' )
			continue;
		if ( curr >= '0' && curr <= '9')
			curr -= '0';
		else if (curr >= 'a' && curr <= 'f')
			curr += 10 - 'a';
		else if (curr >= 'A' && curr <= 'F')
			curr += 10 - 'A';
		else
			break;
		
		if (curr >= lbase)
			break;
		
		val *= lbase;
		val += curr;
	}

#ifdef DEBUG_SCANNER
	if (curr)
		printf( "%s:%d: warning: couldn't parse number '%s' (%d/%d)\n",
				iname, lineno, start,curr,lbase);
#endif

	if (endptr)
		*endptr=nptr;

	if (negative)
	{
		val = -val;
	}
	return val;
}

/* **************************************************************************
 *
 *      Function name:  add_byte_to_string
 *      Synopsis:       Add the given byte (or character) to the string
 *                          being accumulated in statbuf, but protect
 *                          against a buffer overflow.
 *
 *      Inputs:
 *         Parameters:
 *             nu_byte           The given character to be added
 *             walk              Pointer to pointer to the position
 *                                   in  statbuf  where the character
 *                                   is to be placed
 *         Global Variables:
 *             statbuf           Buffer where the string is accumulated
 *         Macros:
 *             GET_BUF_MAX       Size of the buffer
 *
 *      Outputs:
 *         Returned Value:       NONE
 *         Supplied Pointers:
 *             **walk            Given character is placed here
 *             *walk             Incremented in any case
 *
 *      Error Detection:
 *          If  walk  has reached end of string buffer, do not place
 *              the character, but continue to increment  walk .
 *              Calling routine will detect overflow.
 *
 **************************************************************************** */
				
static void add_byte_to_string( u8 nu_byte, u8 **walk )
{
    if ( *walk - statbuf < GET_BUF_MAX )
    {
	**walk = nu_byte;
	}
    (*walk)++;
}

/* **************************************************************************
 *
 *      Function name:  c_string_escape
 *      Synopsis:       Process C-style escape syntax in strings
 *
 *      Inputs:
 *         Parameters:
 *             walk                    Pointer to pointer to area into
 *                                         which to put acquired values
 *         Global Variables:
 *             pc                      Input-source Scanning pointer
 *
 *      Outputs:
 *         Returned Value:             NONE
 *         Global Variables:
 *             pc                      Point to last character processed.
 *         Supplied Pointers:
 *             *walk                   Advanced by number of bytes acquired
 *
 *      Error Detection:
 *          WARNING conditions.  See under "Process Explanation" below.
 *
 *      Process Explanation:
 *          Start with  PC  pointing to the first character to process
 *              i.e., after the backslash.
 *          We recognize newline, tab and numbers
 *          A digit-string in the current base can be converted to a number.
 *          The first non-numeric character ends the numeric sequence
 *              and gets swallowed up.
 *          If the number exceeds the size of a byte, use the truncated
 *          	value and issue a WARNING.
 *          If the first character in the "digit"-string was non-numeric,
 *              use the character literally and issue a WARNING.
 *          If the character that ended the numeric sequence is a quote,
 *              it might be the end of the string, or the start of a
 *              special-character or even of an "( ... ) hex-sequence,
 *              so don't swallow it up.
 *
 *      Still to be done:
 *          Better protection against PC pointer-over-run past END.
 *              Currently, this works, but it's held together by threads:
 *              Because  init_stream  forces a null-byte at the end of
 *              the input buffer, parse_number() exits immediately upon
 *              encountering it.  This situation could be covered more
 *              robustly...
 *
 **************************************************************************** */

static void c_string_escape( u8 **walk)
{
    char c = *pc;
    u8 val;
    /*  We will come out of this "switch" statement
     *      with a value for  val  and a decision
     *      as to whether to write it.
     */
    bool write_val = TRUE;
	
    switch (c)
    {
			case 'n':
				/* newline */
	    val = '\n';
				break;
			case 't':
				/* tab */
	    val = '\t';
				break;
			default:

	    /*  Digit-string?  Convert it to a number, using the current base.
	     *  The first non-numeric character ends the numeric sequence
	     *      and gets swallowed up.
	     *  If the number exceeds the size of a byte, use the truncated
	     *      value and issue a WARNING.
	     *  If the first character in the "digit"-string was non-numeric,
	     *      use the character literally and issue a WARNING.
	     */

	     /*
	     *  If the sequence ender is a quote, it might be the end of
	     *      the string, or the start of a special-character or even
	     *      of an "( ... ) hex-sequence, so don't swallow it up.
	     */
	    {
		long lval;
		u8 *sav_pc = pc;
		lval=parse_number(pc, &pc, base);
		val = (u8)lval;
#ifdef DEBUG_SCANNER
				if (verbose)
					printf( "%s:%d: debug: escape code "
						"0x%x\n",iname, lineno, val);
#endif
		if ( lval > 0x0ff )
		{
		    tokenization_error ( WARNING,
			"Numeric String after \\ overflows byte.  "
			    "Using 0x%02x.\n", val);
			}

		if ( pc == sav_pc )
		{
		    /*  NOTE:  Here, PC hasn't been advanced past its
		     *      saved value, so we can count on  C  remaining
		     *      unchanged since the start of the routine.
		     */ 
		    /*  Don't use the null-byte at the end of the buffer  */
		    if ( ( pc >= end ) 
		    /*        or a sequence-ending quote.                 */
			 || ( c == '"' ) )
		    {
			write_val = FALSE;
		    }else{
			/*  In the WARNING message, print the character
			 *      if it's printable or show it in hex
			 *      if it's not.
			 */
			if ( (c > 0x20 ) && ( c <= 0x7e) )
			{
			    tokenization_error ( WARNING,
				"Unrecognized character, %c, "
				    "after \\ in string.  "
					"Using it literally.\n", c);
			}else{
			    tokenization_error ( WARNING,
				"Unrecognized character, 0x%02x, "
				    "after \\ in string.  "
					"Using it literally.\n", c);
			}
			val = c;
		    }
		}
		/*  NOTE:  Here, however, PC may have been advanced...  */
		/*  Don't swallow the sequence-ender if it's a quote.   */
		if ( *pc == '"' )
		{
		    pc--;
		}

	    }   /*  End of the  "default"  clause  */
    }    /*  End of the  "switch"  statement  */

    if ( write_val ) add_byte_to_string( val, walk );

}

/* **************************************************************************
 *
 *      Function name:  get_sequence
 *      Synopsis:       Process the Hex-Number option in strings
 *                      Protect against PC pointer-over-run past END.
 *
 *      Inputs:
 *         Parameters:
 *             **walk           Pointer to pointer to area into which
 *                                  to put acquired values
 *         Global Variables:
 *             pc               Input-source Scanning pointer
 *             end              End of input-source buffer
 *
 *      Outputs:
 *         Returned Value:      TRUE = "Normal Completion" (I.e., not EOF)
 *         Global Variables:
 *             pc               Points at terminating close-paren, or END
 *             lineno           Input File Line-Number Counter, may be incr'd
 *         Supplied Pointers:
 *             *walk            Advanced by number of values acquired
 *
 *      Error Detection:
 *          End-of-file encountered before end of hex-sequence:
 *              Issue a Warning, conclude processing, return FALSE.
 *
 *      Process Explanation:
 *          SETUP and RULES:
 *              Start with  PC  pointing to the first character
 *                  after the '('  (Open-Paren)     
 *              Bytes are gathered from digits in pairs, except
 *                  when separated they are treated singly.
 *              Allow a backslash in the middle of the sequence
 *                  to skip to the end of the line and past the
 *                  whitespace at the start of the next line,
 *                  i.e., it acts as a comment-escape.
 *
 *          INITIALIZE:
 *              PV_indx = 0
 *              Set return-indicator to "Abnormal Completion"
 *              Ready_to_Parse = FALSE
 *              Stuff NULL into PVAL[2]
 *          WHILE PC is less than END
 *              Pick up character at PC into Next_Ch
 *              IF  Next_Ch  is close-paren :
 *                  Set return-indicator to "Normal Completion".
 *                  Done!  Break out of loop.
 *              ENDIF
 *              IF comment-escape behavior (controlled by means of a
 *                      command-line switch) is allowed
 *                  IF  Next_Ch  is backslash :
 *                      Skip to end-of line, skip whitespace.
 *                      If that makes PC reach END :  WARNING message.
 *                          (Don't need to break out of loop;
 *                               normal test will terminate.)
 *                      CONTINUE Loop.
 *                          (Don't increment PC; PC is already at right place).
 *                  ENDIF
 *              ENDIF
 *              IF  Next_Ch  is a valid Hex-Digit character :
 *                  Stuff it into  PVAL[PV_indx]
 *                  IF (PV_indx is 0) :
 *                      Increment PV_indx
 *                  ELSE
 *                      Set Ready_to_Parse to TRUE
 *          	    ENDIF
 *              ELSE
 *                  IF  Next_Ch  is a New-Line, increment Line Number counter
 *                  IF (PV_indx is 1) :
 *                      Stuff NULL into PVAL[1]
 *                      Set Ready_to_Parse to TRUE
 *          	    ENDIF
 *              ENDIF
 *              IF Ready_to_Parse
 *                  Parse PVAL
 *                  Stuff into WALK
 *                  Reset PV_indx to zero
 *                  Reset Ready_to_Parse to FALSE
 *              ENDIF
 *              Increment PC
 *          REPEAT
 *          Return with Normal/Abnormal completion indicator
 *
 **************************************************************************** */

static bool get_sequence(u8 **walk)
{
    int pv_indx = 0;
    bool retval = FALSE;   /*  "Abnormal Completion" indicator  */
    bool ready_to_parse = FALSE;
    char next_ch;
    char pval[3];

#ifdef DEBUG_SCANNER
	printf("%s:%d: debug: hex field:", iname, lineno);
#endif
    pval[2]=0;

    while ( pc < end )
    {
	next_ch = *pc;
	if ( next_ch == ')' )
	{
	    retval = TRUE;
				break;
	}
	if ( hex_remark_escape )
	{
	    if ( next_ch == '\\' )
	    {
		string_remark("string hex-sequence remark");
		continue;
	    }
	}
	if ( isxdigit(next_ch) )
	{
	    pval[pv_indx] = next_ch;
	    if ( pv_indx == 0 )
	    {
		pv_indx++;
	    }else{
		ready_to_parse = TRUE;
	    }
	}else{
	    if ( next_ch == '\n' )  lineno++ ;
	    if ( pv_indx != 0 )
	    {
		pval[1] = 0;
		ready_to_parse = TRUE;
	    }
	}
	if ( ready_to_parse )
	{
	    u8 val = parse_number(pval, NULL, 16);
	    *((*walk)++)=val;
#ifdef DEBUG_SCANNER
		printf(" %02x",val);
#endif
	    pv_indx = 0;
	    ready_to_parse = FALSE;
	}
	pc++;
    }
#ifdef DEBUG_SCANNER
	printf("\n");
#endif
    return ( retval );
}

/* **************************************************************************
 *
 *    Return the length of the string.
 *    Pack the string, without the terminating '"' (Quote), into statbuf
 *    Protect against PC pointer-over-run past END.
 *    Enable Quote-Backslash as a String-Remark Escape.
 *    Allowability of Quote-Backslash as a String-Remark is under control
 *        of a command-line switch (string_remark_escape ).
 *    Allowability of C-style escape characters is under control
 *        of a command-line switch ( c_style_string_escape ).
 *
 *    Truncate string to size of Forth Packed-String (i.e., uses leading
 *        count-byte, so limited to 255, number that one byte can express)
 *        unless the string is being gathered for a Message or is being
 *        consumed for purposes of ignoring it, in either of which case
 *        that limit need not be enforced.  Parameter "pack_str" controls
 *        this:  TRUE  if limit needs to be enforced.
 *
 *    Issue WARNING if string length gets truncated.
 *    Issue WARNING if string crosses line.
 *        The issuance of the Multi-line WARNING is under control of a
 *           one-shot directive similar to OVERLOAD , called  MULTI-LINE
 *
 *    Still to be decided:
 *        Do we want to bring the allowability of strings crossing
 *            lines under control of a command-line switch?
 *
 ************************************************************************** */

static signed long get_string( bool pack_str)
{
	u8 *walk;
	unsigned long len;
	char c;
	bool run = TRUE;
	unsigned long start_lineno = lineno;    /*  For warning message  */
	
	/*
	 *  Bump past the single whitespace character that delimits
	 *      the command -- e.g.,  ."  or  "  or suchlike -- that
	 *      starts the string.  Allow new-line to be a command-
	 *      -delimiting whitespace character.  Regard any sub-
	 *      sequent whitespace characters as part of the string
	 */
	if (*pc++=='\n') lineno++;

	got_until_eof = TRUE ;

	walk=statbuf;
	while (run) {
		switch ((c=*pc))
		{
		    /*  Standard use of '"' (Quote)  for special-char escape  */
		    case '\"':
			/*  Skip the '"' (Quote) */
				pc++;
			/*  End of the buffer also ends the string cleanly  */
			if ( pc >= end )
			{
			    run = FALSE;
			    got_until_eof = FALSE ;
				break;
			}
			/*  Pick up the next char after the '"' (Quote) */
			c=*pc;
			switch (c)
			{
			    case '(':
				pc++; /* skip the '(' */
				run = get_sequence(&walk);
				break;

			case 'n':
				add_byte_to_string( '\n', &walk);
				break;
			case 'r':
				add_byte_to_string( '\r', &walk);
				break;
			case 't':
				add_byte_to_string( '\t', &walk);
				break;
			case 'f':
				add_byte_to_string( '\f', &walk);
				break;
			case 'l':
				add_byte_to_string( '\n', &walk);
				break;
			case 'b':
				add_byte_to_string( 0x08, &walk);
				break;
			case '!':
				add_byte_to_string( 0x07, &walk);
				break;
			case '^':
				pc++;    /*   Skip the up-arrow (Caret) */
				add_byte_to_string( *pc & 0x1f , &walk);
				break;
			    /*  We're done after any of the whitespace
			     *     characters follows a quote.
			     */
			case ' ':
			case '\t':
				/*  Advance past the terminating whitespace
				 *       character, except newline.
				 *  Let  get_word()  handle that.
				 */
				pc++;
			    case '\n':
				run=FALSE;
				got_until_eof = FALSE ;
				break;
			default:
				/*  Control allowability of Quote-Backslash
				 *      as a String-Remark by means of a
				 *      command-line switch.
				 */
				if ( string_remark_escape )
				{
				    if ( c == '\\' )
				    {
					string_remark("string-escape remark");
					/* The first non-blank in the new line
					 *     has not been processed yet.
					 *     Back up to allow it to be.
					 */
					pc--;
				break;
			}
				}
				add_byte_to_string( c, &walk);
			}
			break;
		    case '\n':
			/*  Allow strings to cross lines.  Include the
			 *      newline in the string.  Account for it.
			 */
			lineno++;
		default:
			/*  Control allowability of C-style escape-character
			 *      syntax by means of a command-line switch.
			 */
			if ( c_style_string_escape )
			{
			    if ( c == '\\' )
			    {
				pc++;
				c_string_escape(&walk );
				break;
			    }
			}
			add_byte_to_string( c, &walk);
		}
		/*  Advance past the char processed, unless we're done.     */
		if ( run ) pc++;
		/*  Done if we hit end of file before string was concluded  */
		if ( pc >= end )
		{
		    run = FALSE;
		    if ( got_until_eof )
		    {
			warn_unterm( WARNING, "string", start_lineno);
			/*  Prevent multiple messages for one error  */
			got_until_eof = FALSE;
		    }
		}
	}
	
	warn_if_multiline( "string", start_lineno);

	len = walk - statbuf;
	if (len >= GET_BUF_MAX )
	{
	    tokenization_error ( TKERROR,
		"get_string buffer overflow.  Max is %d\n.", GET_BUF_MAX-1 );
	    len = GET_BUF_MAX-1;
	}
#ifdef DEBUG_SCANNER
	if (verbose)
		printf("%s:%d: debug: scanned string: '%s'\n", 
					iname, lineno, statbuf);
#endif
	if ( pack_str && (len > STRING_LEN_MAX) )
	{
	    tokenization_error ( WARNING,
		"String length being truncated to %d.\n", STRING_LEN_MAX );
	    len = STRING_LEN_MAX;
	}
	statbuf[len] = 0;

	return len ;
}


/* **************************************************************************
 *
 *      Function name:  handle_user_message
 *      Synopsis:       Collect a user-generated tokenization-time message;
 *                          either print it or discard it.  Shared code
 *                          for  user_message()  and  skip_user_message()
 *
 *      Inputs:
 *         Parameters:
 *             delim                End-of-string delimiter character.
 *                                  If it's a double-quote ("), we will use
 *                                      the get-string() routine, with all
 *                                      its options, to collect the message.
 *                                  Otherwise, we'll capture plain text from
 *                                      the input stream.
 *             print_it             TRUE if we should print the message 
 *         Local Static Variables:
 *             got_until_eof        TRUE if reached end of buffer before delim.
 *         Global Variables:
 *             lineno               Save, then restore.
 *
 *      Outputs:
 *         Returned Value:          NONE
 *         Global Variables:
 *             statbuf              The string will be collected in here
 *             
 *         Printout (if  print_it  is TRUE):
 *            The string, with new-line tacked on, will be printed from
 *                the  tokenization_error()  routine as a MESSAGE.
 *            The line-number will be shown as of the origin of the message
 *
 *      Error Detection:
 *          Error-reports will be printed regardless of  print_it  param.
 *          If delimiter was not found, show "Unterminated" warning message.
 *          If delimiter was " (double-quote), the get_string() routine
 *              already checked for a multi-line construct; if delimiter is
 *              a new-line, then a multi-line construct is impossible.
 *              otherwise, we will do the multi-line check here.
 *
 **************************************************************************** */

static void handle_user_message( char delim, bool print_it )
{
    signed long wlen;
    unsigned int start_lineno = lineno;
    unsigned int multiline_start = lineno;    /*  For warning message  */
    bool check_multiline = FALSE;
    const char *ug_msg = "user-generated message";

    if ( delim == '"' )
    {
	wlen = get_string( FALSE);
    }else{
	/*
	 *  When the message-delimiter is a new-line, and the
	 *      command-delimiter was a new-line, it means the
	 *      string length is zero; we won't bump the PC.
	 *  Otherwise, we will honor the convention we extend
	 *      to  .(  whereby, if the command is delimited
	 *      by a new-line, we allow the string to begin
	 *      on the next line.
	 */
	if ( delim == '\n' )
	{
	    if ( *pc != '\n') pc++;
	}else{
		if (*pc++=='\n') lineno++;
	    multiline_start = lineno;
	    check_multiline = TRUE;
	}
	wlen = get_until( delim );
    }

    if ( print_it )
    {
	unsigned int tmp_lineno = lineno;
	lineno = start_lineno;
	/*  Don't add a new-line to body of the message.
	 *  Routine already takes care of that.
	 *  Besides, buffer might be full...
	 */
	tokenization_error( MESSAGE, statbuf);
	lineno = tmp_lineno;
    }

    if ( got_until_eof )   /*  Crude but effective retrofit... */
    {
	warn_unterm(WARNING, (char *)ug_msg, start_lineno);
    }else{
	if ( check_multiline )
	{
	    warn_if_multiline( (char *)ug_msg, multiline_start);
	}
    }
}

/* **************************************************************************
 *
 *      Function name:  user_message
 *      Synopsis:       Collect a user-generated message and
 *                          print it at tokenization-time.
 *
 *      Tokenizer directive (either mode):
 *          Synonyms                              String Delimiter
 *             [MESSAGE]  #MESSAGE  [#MESSAGE]        end-of-line
 *             #MESSAGE"                                  "  
 *      "Tokenizer-Escape" mode directive         String Delimiter
 *          .(                                            )
 *          ."                                            "
 *
 *      Inputs:
 *         Parameter is the "parameter field" of the TIC entry, which
 *             was initialized to the end-of-string delimiter character.
 *
 *      Outputs:
 *         Returned Value:          NONE
 *         Printout:                User-message, parsed from input.
 *
 *      Extraneous Remarks:
 *          We would have preferred to simply use the "character value"
 *              aspect of the union, but we found portability issues
 *              between big- and little- -endian processors, so we still
 *              have to recast its type here.
 *
 **************************************************************************** */

void user_message( tic_param_t pfield )
{
    char delim = (char)pfield.deflt_elem ;
    handle_user_message( delim, TRUE);
}

/* **************************************************************************
 *
 *      Function name:  skip_user_message
 *      Synopsis:       Collect a user-generated message and discard it.
 *                          Used when ignoring a Conditional section.
 *
 *      Tokenizer directive (either mode):
 *          Synonyms                              String Delimiter
 *             [MESSAGE]  #MESSAGE  [#MESSAGE]        end-of-line
 *             #MESSAGE"                                  "  
 *      "Tokenizer-Escape" mode directive         String Delimiter
 *          .(                                            )
 *          ."                                            "
 *
 *      Inputs:
 *         Parameters:
 *             pfield               "Parameter field" of the TIC entry, which
 *                                      was initialized to the delimiter.
 *
 *      Outputs:
 *         Returned Value:          NONE
 *         Printout:                NONE
 *
 **************************************************************************** */

void skip_user_message( tic_param_t pfield )
{
    char delim = (char)pfield.deflt_elem ;
    handle_user_message( delim, FALSE);
}



/* **************************************************************************
 *
 *      Function name:  get_number
 *      Synopsis:       If the word retrieved from the input stream is a
 *                      valid number (under the current base) convert it.
 *                      Return an indication if it was not.
 *
 *      Inputs:
 *         Parameters:
 *             *result             Pointer to place to return the number
 *         Global Variables:
 *             statbuf             The word just read that is to be converted.
 *             base                The current numeric-interpretation base.
 *
 *      Outputs:
 *         Returned Value:         TRUE = Input was a valid number
 *         Supplied Pointers:
 *             *result             The converted number, if valid
 *                                     otherwise undefined
 *
 *      Revision History:
 *          Updated Mon, 28 Mar 2005 by David L. Paktor
 *              Always use the current base.
 *              Reversed sense of return-flag.
 *
 **************************************************************************** */

bool get_number( long *result)
{
    u8 *until;
    long val;
    bool retval = FALSE ;

    val = parse_number(statbuf, &until, base);
	
#ifdef DEBUG_SCANNER
    printf("%s:%d: debug: parsing number: base 0x%x, val 0x%lx, "
		"processed %ld of %ld bytes\n", iname, lineno, 
		 base, val,(size_t)(until-statbuf), strlen((char *)statbuf));
#endif

    /*  If number-parsing ended before the end of the input word,
     *      then the input word was not a valid number.
     */
    if (until==(statbuf+strlen((char *)statbuf)))
    {
	*result=val;
	retval = TRUE;
    }

    return ( retval );
}

/* **************************************************************************
 *
 *      Function name:  deliver_number
 *      Synopsis:       Deliver the supplied number according to the
 *                              state of the tokenizer:
 *                          In normal tokenization mode, emit it as an
 *                              FCode literal.
 *                          In  "Tokenizer-Escape" mode, push it onto
 *                              the Data Stack.
 *
 *      Inputs:
 *         Parameters:
 *             numval                  The number, verified to be valid.
 *         Global Variables:
 *             in_tokz_esc   TRUE if tokenizer is in "Tokenizer Escape" mode.
 *
 *      Outputs:
 *         Returned Value:             NONE 
 *         Items Pushed onto Data-Stack:
 *             Top:                    The number, if  in_tokz_esc  was TRUE
 *         FCode Output buffer:
 *             If  in_tokz_esc  was FALSE, a  b(lit)  token will be written,
 *                 followed by the number.
 *
 **************************************************************************** */

static void deliver_number( long numval)
{
    if ( in_tokz_esc )
    {
        dpush( numval );
    } else {
        emit_literal(numval);
    }
}
/* **************************************************************************
 *
 *      Function name:  handle_number
 *      Synopsis:       Convert the word just retrieved from the input stream
 *                              to a number.
 *                      Indicate whether the string was a valid number and
 *                              deliver it, as appropriate.
 *
 *      Inputs:
 *         Parameters:                 NONE
 *         Global Variables:
 *             statbuf       The word that was just read, and to be converted.
 *
 *      Outputs:
 *         Returned Value:    TRUE = Input string was a valid number
 *         If input string was a valid number, the converted number will
 *             be delivered, as appropriate, by  deliver_number(). 
 *
 **************************************************************************** */

static bool handle_number( void )
{
    bool retval ;
    long numval;

    retval = get_number( &numval );
    if ( retval )
    {
        deliver_number( numval );
    }

    return ( retval );
}

/* **************************************************************************
 *
 *      Function name:  ascii_right_number
 *      Synopsis:       Convert a character sequence to a number, justified
 *                          toward the right (i.e., the low-order bytes) and
 *                          deliver it, as appropriate.
 *
 *      Inputs:
 *         Parameters:
 *             in_str                  The input string
 *
 *      Outputs:
 *         Returned Value:             NONE
 *         The converted number will be delivered by  deliver_number(). 
 *
 *      Process Explanation:
 *          The last four characters in the sequence will become the number.
 *          If there are fewer than four, they will fill the low-order part
 *              of the number.
 *          Example:  PCIR            is converted to  h# 50434952
 *                    CPU             is converted to  h# 00435055
 *             and
 *          	      LotsOfStuff     is equivalent to  a# tuff 
 *                                    and is converted to  h# 74756666
 *
 **************************************************************************** */

static void ascii_right_number( char *in_str)
{
    u8 nxt_ch;
    char *str_ptr = in_str;
    long numval = 0;

    for ( nxt_ch = (u8)*str_ptr ;
	    ( nxt_ch = (u8)*str_ptr ) != 0 ;
        	str_ptr++ )
    {
        numval = ( numval << 8 ) + nxt_ch ;
    }
    deliver_number( numval );
}


/* **************************************************************************
 *
 *      Function name:  ascii_left_number
 *      Synopsis:       Similar to  ascii_right_number()  except justified
 *                          toward the left (i.e., the high-order bytes).
 *                      
 *
 *      Inputs:
 *         Parameters:
 *             in_str                  The input string
 *
 *      Outputs:
 *         Returned Value:            NONE
 *         The converted number will be delivered by  deliver_number().
 *
 *      Process Explanation:
 *          If there are fewer than four characters in the sequence, they
 *              will fill the high-order part of the number.
 *                    CPU             is converted to  h# 43505500
 *          In all other respects, similar to  ascii_right_number()
 *
 **************************************************************************** */

static void ascii_left_number( char *in_str)
{
    u8 nxt_ch;
    char *str_ptr = in_str;
    long numval = 0;
    int shift_amt = 24;
    bool shift_over = FALSE ;

    for ( nxt_ch = (u8)*str_ptr ;
	    ( nxt_ch = (u8)*str_ptr ) != 0 ;
        	str_ptr++ )
    {
        if ( shift_over )  numval <<= 8;
	if ( shift_amt == 0 )  shift_over = TRUE ;
	numval += ( nxt_ch << shift_amt );
	if ( shift_amt > 0 ) shift_amt -= 8;
    }
    deliver_number( numval );

}

/* **************************************************************************
 *
 *      Function name:  init_scanner
 *      Synopsis:       Allocate memory the Scanner will need.
 *                          Only need to call once per program run.
 *
 **************************************************************************** */

void init_scanner(void)
{
	statbuf=safe_malloc(GET_BUF_MAX, "initting scanner");
}

/* **************************************************************************
 *
 *      Function name:  exit_scanner
 *      Synopsis:       Free up memory the Scanner used
 *
 **************************************************************************** */

void exit_scanner(void)
{
	free(statbuf);
}

/* **************************************************************************
 *
 *      Function name:  set_hdr_flag
 *      Synopsis:       Set the state of the "headered-ness" flag to the
 *                          value given, unless over-ridden by one or both
 *                          of the "always-..." Command-Line Flags
 *
 *      Inputs:
 *         Parameters:
 *             new_flag                  New setting
 *         Global Variables:
 *             always_headers            Override HEADERLESS and make HEADERS
 *             always_external           Override HEADERLESS and HEADERS;
 *                                           make EXTERNAL
 *
 *      Outputs:
 *         Returned Value:               None
 *         Local Static Variables:
 *             hdr_flag                  Adjusted to new setting
 *
 *      Process Explanation:
 *          If  always_headers  is TRUE, and  new_flag  is not FLAG_EXTERNAL
 *              then set to FLAG_HEADERS
 *          If  always_external  is TRUE, set to FLAG_EXTERNAL, regardless.
 *              (Note:  always_external  over-rides  always_headers).
 *          Otherwise, set to  new_flag
 *
 **************************************************************************** */

static void set_hdr_flag( headeredness new_flag)
{
    headeredness new_state = new_flag;
    switch ( new_flag)
    {
	case FLAG_HEADERLESS:
	    {
		if ( always_headers )
		{   new_state = FLAG_HEADERS;
		}
	    /*  No  break.  Intentional...   */
	    }
	case FLAG_HEADERS:
	    {
		if ( always_external )
		{   new_state = FLAG_EXTERNAL;
		}
	    /*  No  break.  Intentional...   */
	    }
	case FLAG_EXTERNAL:
	    break;  /*  Satisfy compiler's error-checking...   */
	/*  No default needed here...   */
    }

    hdr_flag = new_state;

}


/* **************************************************************************
 *
 *      Function name:  init_scan_state
 *      Synopsis:       Initialize various state variables for each time
 *                          a new tokenization scan is started.
 *
 *      Inputs:
 *         Parameters:             NONE
 *
 *      Outputs:
 *         Returned Value:         NONE
 *         Global Variables:   Initialized to:
 *             base                            0x0a (I.e., base = "decimal")
 *             nextfcode                       By  reset_fcode_ranges()
 *             pci_is_last_image               TRUE
 *             incolon                         FALSE
 *         Local Static Variables:
 *             hdr_flag                  FLAG_HEADERLESS (unless over-ridden)
 *             is_instance                     FALSE
 *             last_colon_filename             NULL
 *             instance_filename               NULL
 *             dev_change_instance_warning     TRUE
 *             instance_definer_gap            FALSE
 *             need_to_pop_source              FALSE
 *             first_fc_starter                TRUE
 *             ret_stk_depth                   0
 *         Memory Freed
 *             Copies of input-file name in  last_colon_filename  and
 *                 instance_filename , if allocated.
 *
 **************************************************************************** */

void init_scan_state( void)
{
    base = 0x0a;
    pci_is_last_image = TRUE;
    incolon = FALSE;
    is_instance = FALSE;
    set_hdr_flag( FLAG_HEADERLESS);
    reset_fcode_ranges();
    first_fc_starter = TRUE;
    if ( last_colon_filename != NULL ) free( last_colon_filename);
    if ( instance_filename != NULL ) free( instance_filename);
    last_colon_filename = NULL;
    instance_filename = NULL;
    dev_change_instance_warning = TRUE;
    instance_definer_gap = FALSE;
    need_to_pop_source = FALSE;
    ret_stk_depth = 0;
}


/* **************************************************************************
 *
 *      Function name:  collect_input_filename
 *      Synopsis:       Save a copy of the current input file name in the
 *                          given variable, for error-reporting purposes
 *
 *      Inputs:
 *         Parameters:
 *             saved_nam                    Pointer to pointer for copy of name
 *         Global Variables:
 *             iname                        Current input file name
 *         Local Static Variables:
 *
 *      Outputs:
 *         Returned Value:                  NONE
 *         Supplied Pointers:
 *             *saved_nam                   Copy of name
 *         Memory Allocated
 *             For copy of input file name
 *         When Freed?
 *             Subsequent call to this routine with same pointer
 *             (Last copy made will be freed if starting a new tokenization,
 *                 otherwise, will persist until end of program.) 
 *         Memory Freed
 *             Previous copy in same pointer.
 *
 *      Process Explanation:
 *          If there is a previous copy, and it still matches the current
 *              input-file name, we don't need to free or re-allocate.
 *
 **************************************************************************** */

static void collect_input_filename( char **saved_nam)
{
    bool update_lcfn = TRUE;    /*  Need to re-allocate?  */
    if ( *saved_nam != NULL )
    {
	if ( strcmp( *saved_nam, iname) == 0 )
	{
	    /*  Last collected filename unchanged from iname  */
	    update_lcfn = FALSE;
	}else{
	    free( *saved_nam);
	}
    }
    if ( update_lcfn )
    {
	*saved_nam = strdup(iname);
    }
} 

/* **************************************************************************
 *
 *      Function name:  test_in_colon
 *      Synopsis:       Error-check whether a word is being used in the
 *                      correct state, relative to being inside a colon
 *                      definition; issue a message if it's not.
 *      
 *      Inputs:
 *         Parameters:
 *             wname            The name of the word in question
 *             sb_in_colon      TRUE if the name should be used inside
 *                                  a colon-definition only; FALSE if
 *                                  it may only be used outside of a
 *                                  colon-definition.
 *             severity         Type of error/warning message to call.
 *                                  usually either WARNING or TKERROR
 *             use_instead      Word the error-message should suggest be
 *                                  used "instead".  This may be a NULL,
 *                                  in which case the "suggestion" part
 *                                  of the message will simply be omitted.
 *         Global Variables:
 *             incolon          State of the tokenization; TRUE if inside
 *                                  a colon definition
 *
 *      Outputs:
 *         Returned Value:     TRUE if no error.
 *         Printout:           Error messages as indicated.
 *
 *      Error Detection:
 *          If the state, relative to being inside a colon-definition,
 *              is not what the parameter says it should be, issue a
 *              message of the indicated severity, and return FALSE.
 *
 **************************************************************************** */

static bool test_in_colon ( char *wname,
                           bool sb_in_colon,    /*  "Should Be IN colon"  */
			        int severity,
				     char *use_instead)
{
    bool is_wrong;
    bool retval = TRUE ;

    is_wrong = BOOLVAL(( sb_in_colon != FALSE ) != ( incolon != FALSE )) ;
    if ( is_wrong )
    {  
        char *ui_pt1 = "";
        char *ui_pt2 = "";
        char *ui_pt3 = "";
	retval = FALSE;
	if ( use_instead != NULL )
	{
	    ui_pt1 = "  Use  ";
	    ui_pt2 = use_instead;
	    ui_pt3 = "  instead.";
	}
	tokenization_error ( severity, "The word  %s  should not be used "
	    "%sside of a colon definition.%s%s%s\n", strupr(wname),
	        sb_in_colon ? "out" : "in", ui_pt1, ui_pt2, ui_pt3 );
    }
    return ( retval );
}

/* **************************************************************************
 *
 *      Function name:  must_be_deep_in_do
 *      Synopsis:       Check that the statement in question is called 
 *                          from inside the given depth of structures
 *                          of the  DO ... LOOP -type (i.e., any combination
 *                          of DO  or ?DO  and  LOOP  or  +LOOP ).
 *                      Show an error if it is not.
 *
 **************************************************************************** */

static void must_be_deep_in_do( int how_deep )
{
    int functional_depth = do_loop_depth;
    if ( incolon )
    {
        functional_depth -= last_colon_do_depth;
    }
    if ( functional_depth < how_deep )
    {
	char deep_do[64] = "";
	int indx;
	bool prefix = FALSE;

	for ( indx = 0; indx < how_deep ; indx ++ )
	{
	    strcat( deep_do, "DO ... ");
	}
	for ( indx = 0; indx < how_deep ; indx ++ )
	{
	    if ( prefix )
	    {
		strcat( deep_do, " ... ");
	    }
	    strcat( deep_do, "LOOP");
	    prefix = TRUE;
	}

	tokenization_error( TKERROR,
	    "%s outside of  %s  structure", strupr(statbuf), deep_do);
	in_last_colon();
    }

}

/* **************************************************************************
 *
 *      Function name:  bump_ret_stk_depth
 *      Synopsis:       Increment or decrement the Return-Stack-Usage-Depth
 *                          counter.
 *
 *      Inputs:
 *         Parameters:
 *             bump              Amount by which to increment;
 *                                   negative number to decrement.
 *         Local Static Variables:
 *             ret_stk_depth     The Return-Stack-Usage-Depth counter
 *
 *      Outputs:
 *         Returned Value:        NONE
 *         Local Static Variables:
 *             ret_stk_depth     Incremented or decremented
 *
 *      Process Explanation:
 *          This simple-seeming function is actually a place-holder
 *             for future expansion.  Proper error-detection of
 *             Return-Stack usage is considerably more complex than
 *             what we are implementing here, and is deferred for a
 *             later revision.
 *
 *      Still to be done:
 *          Full detection of whether the Return-Stack has been cleared
 *              when required, including analysis of Return-Stack usage
 *              within Flow-Control constructs, and before Loop elements...
 *
 *      Extraneous Remarks:
 *          Some FORTHs use a Loop-Control stack separate from the Return-
 *              -Stack, but others use the Return-Stack to keep LOOP-control
 *              elements.  An FCode program must be portable between different
 *              environments, and so must adhere to the restrictions listed
 *              in the ANSI Spec:
 *
 *       3.2.3.3   Return stack  
 *        . . . . . .
 *       A program may use the return stack for temporary storage during the
 *          execution of a definition subject to the following restrictions:
 *       	A program shall not access values on the return stack (using R@,
 *       	    R>, 2R@ or 2R>) that it did not place there using >R or 2>R;
 *       	A program shall not access from within a do-loop values placed
 *       	    on the return stack before the loop was entered;
 *       	All values placed on the return stack within a do-loop shall
 *       	    be removed before I, J, LOOP, +LOOP, UNLOOP, or LEAVE is
 *       	    executed;
 *       	All values placed on the return stack within a definition
 *       	    shall be removed before the definition is terminated
 *       	    or before EXIT is executed.
 *
 **************************************************************************** */

static void bump_ret_stk_depth( int bump)
{
    ret_stk_depth += bump;
}


/* **************************************************************************
 *
 *      Function name:  ret_stk_balance_rpt
 *      Synopsis:       Display a Message if usage of the Return-Stack
 *                          appears to be out of balance.
 *
 *      Inputs:
 *         Parameters:
 *             before_what         Phrase to use in Message;
 *                                     if NULL, use statbuf...
 *             clear_it            TRUE if this call should also clear the
 *                                     Return-Stack-Usage-Depth counter
 *         Global Variables:
 *             statbuf             Word currently being processed
 *         Local Static Variables:
 *             ret_stk_depth       The Return-Stack-Usage-Depth counter
 *
 *      Outputs:
 *         Returned Value:         NONE
 *         Local Static Variables:
 *             ret_stk_depth       May be cleared
 *
 *      Error Detection:
 *          Based simply on whether the Return-Stack-Usage-Depth counter
 *              is zero.  This is a weak and uncertain implementation;
 *              therefore, the Message will be a WARNING phrased with
 *              some equivocation.
 *
 *      Process Explanation:
 *          Proper detection of Return-Stack usage errors is considerably
 *              more complex, and is deferred for a future revision.
 *
 *      Still to be done:
 *          Correct analysis of Return-Stack usage around Flow-Control
 *              constructs.  Consider, for instance, the following:
 * 
 *          blablabla >R  yadayada IF  R> gubble ELSE flubble R>  THEN
 * 
 *              It is, in fact, correct, but the present scheme would
 *              tag it as a possible error.  Conversely, something like:
 * 
 *          blablabla >R  yadayada IF  R> gubble THEN
 * 
 *              would not get tagged, even though it is actually an error.
 * 
 *          The current simple scheme also does not cover Return-Stack
 *              usage within Do-Loops or before Loop elements like I and
 *              J or UNLOOP or LEAVE.  Implementing something like that
 *              would probably need to be integrated in with Flow-Control
 *              constructs, and will be noted in  flowcontrol.c
 *
 **************************************************************************** */

static void ret_stk_balance_rpt( char *before_what, bool clear_it)
{
    if ( ret_stk_depth != 0 )
    {
	char *what_flow = ret_stk_depth < 0 ? "deficit" : "excess" ;
	char *what_phr =  before_what != NULL ? before_what : strupr(statbuf);

	tokenization_error( WARNING,
	    "Possible Return-Stack %s before %s", what_flow, what_phr);
	in_last_colon();

	if ( clear_it )
	{
	    ret_stk_depth = 0;
	}
    }
}


/* **************************************************************************
 *
 *      Function name:  ret_stk_access_rpt
 *      Synopsis:       Display a Message if an attempt to access a value
 *                          on the Return-Stack appears to occur before
 *                          one was placed there.
 *
 *      Inputs:
 *         Parameters:                NONE
 *         Global Variables:
 *             statbuf                Word currently being processed
 *         Local Static Variables:
 *             ret_stk_depth          The Return-Stack-Usage-Depth counter
 *
 *      Outputs:
 *         Returned Value:             NONE
 *
 *      Error Detection:
 *          Equivocal WARNING, based simply on whether the Return-Stack-
 *              -Usage-Depth counter not positive.
 *
 *      Process Explanation:
 *          Proper detection is deferred...
 *
 *      Still to be done:
 *          Correct analysis of Return-Stack usage...
 *
 **************************************************************************** */

static void ret_stk_access_rpt( void)
{
    if ( ret_stk_depth <= 0 )
    {
	tokenization_error( WARNING,
	    "Possible Return-Stack access attempt by %s "
		"without value having been placed there",
		strupr(statbuf) );
	in_last_colon();
    }
}



/* **************************************************************************
 *
 *      Function name:  encode_file
 *      Synopsis:       Input a (presumably binary) file and encode it
 *                      as a series of strings which will be accumulated
 *                      and encoded in a manner appropriate for a property.
 *
 *      Associated Tokenizer directive:        encode-file        
 *
 *      Error Detection:
 *          Handled by support routines.
 *
 **************************************************************************** */

static void encode_file( const char *filename )
{
	FILE *f;
	size_t s;
	int num_encoded=0;
	
	tokenization_error( INFO, "ENCODing File %s\n", filename );

	f = open_expanded_file( filename, "rb", "encoding");
	if( f != NULL )
	{
	    while( (s=fread(statbuf, 1, STRING_LEN_MAX, f)) )
	    {
		    emit_token("b(\")");
		    emit_string(statbuf, s);
		    emit_token("encode-bytes");
		    if( num_encoded )
			    emit_token("encode+");
		    num_encoded += s;
	    }
	    fclose( f );
	    tokenization_error ( INFO, "ENCODed %d bytes.\n", num_encoded);
	}
}

/* **************************************************************************
 *
 *      Function name:  check_name_length
 *      Synopsis:       If the length of a user-defined name exceeds the
 *                          ANSI-specified maximum of 31 characters, issue
 *                          a message.  This is a hard-coded limit.
 *                      Although our Tokenizer can handle longer names,
 *                          they will cause big problems when encountered
 *                          by an FCode interpreter.
 *                      If the name is going to be included in the binary
 *                          output, the message severity must be an ERROR.
 *                      Otherwise, if the name is HEADERLESS, the severity
 *                          can be reduced to a Warning; if the name is only
 *                          defined in "Tokenizer Escape" mode the message
 *                          severity can be further reduced to an Advisory.
 *
 *      Inputs:
 *         Parameters:
 *             wlen                 Length of the newly-created word
 *         Global Variables: 
 *             in_tokz_esc          TRUE if in "Tokenizer Escape" mode.
 *         Local Static Variables:
 *             hdr_flag             State of headered-ness for name-creation
 *
 *      Outputs:
 *         Returned Value:          NONE
 *         Global Variables:        
 *         Printout:                ERROR message if applicable.
 *
 *      Error Detection:
 *             The whole point of this routine.  
 *
 *      Revision History:
 *          Updated Wed, 20 Jul 2005 by David L. Paktor
 *               Escalated from merely an informative warning to a TKERROR 
 *          Updated Fri, 21 Oct 2005 by David L. Paktor
 *               Adjust severity if name doesn't go into the FCode anyway...
 *
 **************************************************************************** */

void check_name_length( signed long wlen )
{
    if ( wlen > 31 )
    {
	int severity = TKERROR;
	if ( in_tokz_esc )
	{   severity = INFO;
	}else{
	    if (hdr_flag == FLAG_HEADERLESS)
	    {   severity = WARNING;
	    }
	}
	tokenization_error( severity,
	    "ANSI Forth does not permit definition of names "
		"longer than 31 characters.\n" );
    }

}


/* **************************************************************************
 *
 *      Function name:  definer_name
 *      Synopsis:       Given a defining-word internal token, return
 *                      a printable string for the definer, for use
 *                      in an error-message.
 *
 *      Inputs:
 *         Parameters:
 *             definer             Internal token for the defining-word
 *             reslt_ptr           Pointer to string-pointer that takes
 *                                     the result, if successful
 *
 *      Outputs:
 *         Returned Value:         TRUE if definer was recognized
 *         Supplied Pointers:
 *             *reslt_ptr          If successful, points to printable string;
 *                                     otherwise, left unchanged.
 *
 *
 **************************************************************************** */

static bool definer_name(fwtoken definer, char **reslt_ptr)
{
    bool retval = TRUE;
    switch (definer)
    {
	case VARIABLE:
	    *reslt_ptr = "VARIABLE";
	    break;
	case DEFER:
	    *reslt_ptr = "DEFER";
	    break;
	case VALUE:
	    *reslt_ptr = "VALUE";
	    break;
	case BUFFER:
	    *reslt_ptr = "BUFFER";
	    break;
	case CONST:
	    *reslt_ptr = "CONSTANT";
	    break;
	case COLON:
	    *reslt_ptr = "COLON";
	    break;
	case CREATE:
	    *reslt_ptr = "CREATE";
	    break;
	case FIELD:
	    *reslt_ptr = "FIELD";
	    break;
	case MACRO_DEF:
	    *reslt_ptr = "MACRO";
	    break;
	case ALIAS:
	    *reslt_ptr = "ALIAS";
	    break;
	case LOCAL_VAL:
	    *reslt_ptr = "Local Value name";
	    break;
	default:
	    retval = FALSE;
    }

    return ( retval);
}


/* **************************************************************************
 *
 *      Function name:  as_a_what
 *      Synopsis:       Add the phrase "as a[n] <DEF'N_TYPE>" for the given
 *                          definition-type to the given string buffer.
 *
 *      Inputs:
 *         Parameters:
 *             definer                 Internal token for the defining-word
 *             as_what                 The string buffer to which to add.
 *
 *      Outputs:
 *         Returned Value:             TRUE if an assigned name was found
 *                                         for the given definer and text
 *                                         was added to the buffer.
 *         Supplied Pointers:
 *             *as_what                Text is added to this buffer.
 *
 *      Process Explanation:
 *          The calling routine is responsible to make sure the size of
 *              the buffer is adequate.  Allow 25 for this routine.
 *          The added text will not have spaces before or after; if any
 *              are needed, they, too, are the responsibility of the
 *              calling routine.  The return value gives a helpful clue.
 *
 **************************************************************************** */

bool as_a_what( fwtoken definer, char *as_what)
{
    char *defn_type_name;
    bool retval = definer_name(definer, &defn_type_name);
    if ( retval )
    {
	strcat( as_what, "as a");
	/*  Handle article preceding definer name
	 *      that starts with a vowel.
	 */
	/*  HACK:  Only one definer name -- ALIAS --
	 *      begins with a vowel.  Take advantage
	 *      of that...
	 *  Otherwise, we'd need to do something involving
	 *      strchr( "AEIOU", defn_type_name[0] )
	 */
	if ( definer == ALIAS ) strcat( as_what, "n" );

	strcat( as_what, " ");
	strcat( as_what, defn_type_name);
    }
    return( retval);
}


/* **************************************************************************
 *
 *      Function name:  lookup_word
 *      Synopsis:       Find the TIC-entry for the given word in the Current
 *                          mode -- relative to "Tokenizer-Escape" -- and
 *                          Scope into which definitions are being entered.
 *                      Optionally, prepare text for various Message types.
 *
 *      Inputs:
 *         Parameters:
 *             stat_name               Word to look up
 *             where_pt1               Pointer to result-display string, part 1
 *                                         NULL if not preparing text
 *             where_pt2               Pointer to result-display string, part 2
 *                                         NULL if not preparing text
 *         Global Variables:
 *             in_tokz_esc             TRUE if in "Tokenizer Escape" mode.
 *             scope_is_global         TRUE if "global" scope is in effect
 *             current_device_node     Current dev-node data-struct
 *             ibm_locals              TRUE if IBM-style Locals are enabled
 *
 *      Outputs:
 *         Returned Value:             Pointer to TIC-entry; NULL if not found
 *         Supplied Pointers:
 *             *where_pt1              Result display string, part 1 of 2
 *             *where_pt2              Result display string, part 2 of 2
 *
 *      Process Explanation:
 *          We will set the two-part result-display string in this routine
 *              because only here do we know in which vocabulary the word
 *              was found.
 *          Pre-load the two parts of the result-display string.
 *          If we are in "Tokenizer Escape" mode, look up the word:  first,
 *              in the "Tokenizer Escape" Vocabulary, or, if not found,
 *              among the "Shared" words.
 *          Otherwise, we're in Normal" mode.  Look it up:  first, among the
 *              Locals, if IBM-style Locals are enabled (it can possibly be
 *              one if "Tokenizer Escape" mode was entered during a colon-
 *              -definition); then, if it was not found and if "Device"
 *              scope is in effect, look in the current device-node; then,
 *              if not found, in the "core" vocabulary.
 *          Load the second part of the result-display string with the
 *               appropriate phrase for whereever it was found.
 *          Then adjust the first part of the result-display string with
 *               the definer, if known.
 *
 *          The two strings will be formatted to be printed adjacently,
 *              without any additional spaces in the printf() format.
 *          The first part of the result-display string will not start with
 *              a space, but will have an intermediate space if necessary.
 *          The second part of the result-display string will not start
 *              with a space, and will contain the terminating new-line
 *              if appropriate.  It might or might not have been built
 *              with a call to  in_what_node().
 *
 *          If the calling routine displays the result-display strings,
 *              it should follow-up with a call to  show_node_start()
 *              This will be harmless if  in_what_node()  was not used
 *              in the construction of the result-display string.
 *          If the calling routine is NOT going to display the result strings,
 *              it should pass NULLs for the string-pointer pointers.
 *
 *          The second part of the string consists of pre-coded phrases;
 *              therefore, we can directly assign the pointer.
 *          The first part of the string, however, has developed into
 *              something constructed "on the fly".  Earlier, it, too,
 *              had been a directly-assignable pointer; all the callers
 *              to this routine expect that.  Rather than change all the
 *              callers, we will assign a local buffer for it.
 *
 *      Extraneous Remarks:
 *          We had to add the rule allowing where_pt1 or where_pt2 to be
 *              NULL after we introduced the  in_what_node()  function.
 *              We had cases where residue from a lookup for processing
 *              showed up later in an unrelated Message.  The NULL rule
 *              should prevent that.
 *
 **************************************************************************** */

static char lookup_where_pt1_buf[32];

tic_hdr_t *lookup_word( char *stat_name, char **where_pt1, char **where_pt2 )
{
    tic_hdr_t *found = NULL;
    bool trail_space = TRUE;
    bool doing_lookup = BOOLVAL( ( where_pt1 != NULL )
			      && ( where_pt2 != NULL ) );
    char *temp_where_pt2 = "in the core vocabulary.\n";

    lookup_where_pt1_buf[0] = 0;             /*  Init'lz part-1 buffer  */

    /*  "Core vocab" refers both to shared fwords and built-in tokens.  */

    /*  Distinguish between "Normal" and "Tokenizer Escape" mode  */
    if ( in_tokz_esc )
    {   /*  "Tokenizer Escape" mode.  */
	found = lookup_tokz_esc( stat_name);
	if ( found != NULL )
	{
	    temp_where_pt2 = in_tkz_esc_mode;
	}else{
	    /*  "Core vocabulary".  */
	    found = lookup_shared_word( stat_name);
	}
    }else{
	/*  "Normal" tokenization mode  */
	if ( ibm_locals )
	{
	    found = lookup_local( stat_name);
	    if ( doing_lookup && ( found != NULL ) )
	    {
		trail_space = FALSE;
		temp_where_pt2 = ".\n";
	    }
	}

	if ( found == NULL )
	{
	    found = lookup_in_dev_node( stat_name);
	    if ( found != NULL )
	    {
		if ( doing_lookup )
		{
		    temp_where_pt2 = in_what_node( current_device_node);
		}
	    }else{
		/*  "Core vocabulary".  */
		found = lookup_core_word( stat_name);
	    }
	}
    }

    if ( ( doing_lookup ) && ( found != NULL ) )
    {
	if ( as_a_what( found->fword_defr, lookup_where_pt1_buf) )
	{
	    if ( trail_space )
	    {
		strcat(lookup_where_pt1_buf, " ");
	    }
	}
	*where_pt1 = lookup_where_pt1_buf;
	*where_pt2 = temp_where_pt2;
    }
    return( found);
}

/* **************************************************************************
 *
 *      Function name:  word_exists
 *      Synopsis:       Check whether a given word is already defined in the
 *                          Current mode -- relative to "Tokenizer-Escape" --
 *                          and Scope into which definitions are being entered. 
 *                      Used for error-reporting.
 *
 *      Inputs:
 *         Parameters:
 *             stat_name                 Word to look up
 *             where_pt1                 Pointer to string, part 1 of 2,
 *                                          to display in result
 *             where_pt2                 Pointer to string, part 2 of 2,
 *                                          to display in result
 *
 *      Outputs:
 *         Returned Value:               TRUE if the name exists.
 *         Supplied Pointers:
 *             *where_pt1                Result display string, part 1 of 2
 *             *where_pt2                Result display string, part 2 of 2
 *
 *      Process Explanation:
 *          If the calling routine displays the result-display strings,
 *              it should follow-up with a call to  show_node_start()
 *
 *      Extraneous Remarks:
 *          This used to be a much heftier routine; now it's just
 *              a wrapper around  lookup_word() .
 *
 **************************************************************************** */

bool word_exists( char *stat_name, char **where_pt1, char **where_pt2 )
{
    bool retval = FALSE;
    tic_hdr_t *found = lookup_word( stat_name, where_pt1, where_pt2 );

    if ( found != NULL )
    {
	retval = TRUE;
    }

    return( retval);
}

/* **************************************************************************
 *
 *      Function name:  warn_if_duplicate
 *      Synopsis:       Check whether a given word is already defined in
 *                          the current mode and issue a warning if it is.
 *
 *      Inputs:
 *         Parameters:
 *             stat_name                Word to check
 *         Global Variables:
 *             verbose_dup_warning      Whether to run the check at all.
 *         Local Static Variables:
 *             do_not_overload          FALSE if  OVERLOAD  is in effect.
 *
 *      Outputs:
 *         Returned Value:              NONE
 *         Local Static Variables:
 *             do_not_overload          Restored to TRUE
 *         Printout:
 *             Warning message if a duplicate.
 *
 *      Error Detection:
 *             None.  This is merely an informative warning.
 *
 *      Process Explanation:
 *          "Current mode" -- meaning, whether the tokenizer is operating
 *              in "Tokenizer Escape" mode or in normal tokenization mode --
 *              will be recognized by the  word_exists()  function.
 *
 *      Extraneous Remarks:
 *          The  OVERLOAD  directive is our best shot at creating a more
 *              fine-grained way to temporarily bypass this test when
 *              deliberately overloading a name.  It would be nice to have
 *              a mechanism, comparable to the classic
 *                     WARNING @ WARNING OFF  .....  WARNING !
 *              that could be applied to a range of definitions, but:
 *              (1)  That would require more of a true FORTH infrastructure;
 *                       hence, more effort than I am willing to invest, at
 *                       this juncture, for such a small return,
 *              and
 *              (2)  Most intentional-overloading ranges only cover a
 *                       single definition anyway.
 *
 **************************************************************************** */

void warn_if_duplicate( char *stat_name)
{
    if ( verbose_dup_warning && do_not_overload )
    {
	char *where_pt1;
	char *where_pt2; 
	if ( word_exists( stat_name, &where_pt1, &where_pt2) )
	{
	    tokenization_error( WARNING, 
		"Duplicate definition:   %s  already exists %s%s",
		    stat_name, where_pt1, where_pt2 );
	    show_node_start();
	}
    }
    do_not_overload = TRUE;
}


/* **************************************************************************
 *
 *      Function name:  glob_not_allowed
 *      Synopsis:       Print a Message that "XXX is not allowed."
 *                          because Global Scope is in effect.
 *                      Used from several places...
 *      
 *      Inputs:
 *         Parameters:
 *             severity              Severity of the Message
 *             not_ignoring          FALSE = "Ignoring", for the part of the
 *                                       message about "How It's being Handled"
 *         Global Variables:
 *             statbuf               Disallowed word currently being processed
 *
 *      Outputs:
 *         Returned Value:           NONE
 *         Printout:                 Message of given severity.
 *
 **************************************************************************** */

static void glob_not_allowed( int severity, bool not_ignoring)
{
    tokenization_error( severity, "Global Scope is in effect; "
			"%s not allowed.  %s.\n",
			    strupr(statbuf), 
				 not_ignoring ?
				     "Attempting to compensate.." :
					  "Ignoring" );
}


/* **************************************************************************
 *
 *      Function name:  not_in_dict
 *      Synopsis:       Print the message "XXX is not in dictionary."
 *                      Used from several places...
 *      
 *      Inputs:
 *         Parameters:
 *             stat_name                Word that could not be processed
 *
 *      Outputs:
 *         Returned Value:              NONE
 *         Printout:         Error message.
 *
 **************************************************************************** */

static void not_in_dict( char *stat_name)
{
    tokenization_error ( TKERROR,
        "Word  %s  is not in dictionary.\n", stat_name);
}

/* **************************************************************************
 *
 *      Function name:  tokenized_word_error
 *      Synopsis:       Report an error when a word could not be processed
 *                          by the tokenizer.  Messages will vary...
 *      
 *      Inputs:
 *         Parameters:
 *             stat_name                Word that could not be processed
 *         Global Variables:
 *             in_tokz_esc    TRUE if tokenizer is in "Tokenizer Escape" mode.
 *
 *      Outputs:
 *         Returned Value:              NONE
 *         Printout:          Error message.  Possible Advisory about
 *
 *      Error Detection:
 *          Error was detected by the calling routine...
 *
 *      Process Explanation:
 *          If the tokenizer is in "Tokenizer Escape" mode, the word might
 *              be one that can be used in normal tokenization mode;
 *          Conversely, if the tokenizer is in normal-tokenization mode,
 *              the word might be one that can be used in the "Escape" mode.
 *          Or, the word is completely unknown.
 *          Recognizing the current mode is handled by  word_exists()
 *          However, we need to test for the *converse* of the current mode,
 *              so before we call  word_exists()  we are going to save and
 *              invert the setting of  in_tokz_esc  (and afterwards, of
 *              course, restore it...)
 *
 **************************************************************************** */

static void tokenized_word_error( char *stat_name)
{
    char *where_pt1;
    char *where_pt2;
    bool found_somewhere;
    
    bool sav_in_tokz_esc = in_tokz_esc;
    in_tokz_esc = INVERSE(sav_in_tokz_esc);

    found_somewhere = word_exists( stat_name, &where_pt1, &where_pt2);
    if ( found_somewhere )
    {
	tokenization_error ( TKERROR, "The word %s is %s recognized "
	    "in tokenizer-escape mode.\n",
		 stat_name, sav_in_tokz_esc ? "not" :  "only" );
    } else {
	not_in_dict( stat_name);
    }

    if ( INVERSE(exists_in_ancestor( stat_name)) )
    {
        if ( found_somewhere && sav_in_tokz_esc )
	{
	    tokenization_error(INFO,
		"%s is defined %s%s", stat_name, where_pt1, where_pt2 );
	    show_node_start();
	}
    }

    in_tokz_esc = sav_in_tokz_esc;
}


/* **************************************************************************
 *
 *      Function name:  unresolved_instance
 *      Synopsis:       Print the "unresolved instance" message
 *
 *      Inputs:
 *         Parameters:
 *             severity                    Severity of the Message
 *         Local Static Variables:
 *             instance_filename           File where "instance" invoked
 *             instance_lineno             Line number where "instance" invoked
 *
 *      Outputs:
 *         Returned Value:                 NONE
 *         Printout:          Message.
 *
 *      Error Detection:
 *          Error was detected by the calling routine...
 *
 **************************************************************************** */

static void unresolved_instance( int severity)
{
    tokenization_error( severity, "Unresolved \"INSTANCE\"" );
    just_where_started( instance_filename, instance_lineno );
}


/* **************************************************************************
 *
 *      Function name:  modified_by_instance
 *      Synopsis:       Print the "[not] modified by instance" message
 *
 *      Inputs:
 *         Parameters:
 *             definer                     Internal token for the defining-word
 *             was_modded                  FALSE if "not modified..."
 *         Local Static Variables:
 *             instance_filename           File where "instance" invoked
 *             instance_lineno             Line number where "instance" invoked
 *
 *      Outputs:
 *         Returned Value:                 NONE
 *         Printout:          WARNING message.
 *
 *      Error Detection:
 *          Error was detected by the calling routine...
 *
 **************************************************************************** */

static void modified_by_instance( fwtoken definer, bool was_modded)
{
    char *was_not = was_modded ? "was" : "not" ;
    char *defn_type_name;

    /*  No need to check the return value  */
    definer_name(definer, &defn_type_name);

    tokenization_error ( WARNING,
	"%s definition %s modified by \"INSTANCE\"",
	    defn_type_name, was_not );
    just_where_started( instance_filename, instance_lineno );
 }

/* **************************************************************************
 *
 *      Function name:  validate_instance
 *      Synopsis:       If "instance" is in effect, check whether it is
 *                          appropriate to the defining-word being called.
 *
 *      Inputs:
 *         Parameters:
 *             definer                   Internal token for the defining-word
 *         Local Static Variables:
 *             is_instance               TRUE if "instance" is in effect.
 *             instance_definer_gap      TRUE if invalid definer(s) invoked
 *                                           since "instance" went into effect.
 *
 *      Outputs:
 *         Returned Value:               NONE
 *         Local Static Variables:
 *             is_instance               Reset to FALSE if definer was valid.
 *             instance_definer_gap      TRUE if definer was not valid;
 *                                           FALSE if definer was valid.
 *
 *      Error Detection:
 *          If "instance" is in effect, the only defining-words that are
 *              valid are:  value  variable  defer  or  buffer:  Attempts
 *              to use any other defining-word will be reported with a
 *              WARNING, but "instance" will remain in effect.
 *          If an invalid defining-word was invoked since "instance" went
 *              into effect, then, when it is finally applied to a valid
 *              definer, issue a WARNING.
 *
 *      Process Explanation:
 *          Implicit in the Standard is the notion that, once INSTANCE has
 *              been executed, it remains in effect until a valid defining-
 *              word is encountered.  We will do the same.
 *
 **************************************************************************** */

static void validate_instance(fwtoken definer)
{
    if ( is_instance )
    {
	bool is_error = TRUE ;

	switch ( definer)
	{
	    case VALUE:
	    case VARIABLE:
	    case DEFER:
	    case BUFFER:
		is_error = FALSE;
	    /*  No default needed, likewise, no breaks;      */
	    /*  but some compilers get upset without 'em...  */
	    default:
		break;
	}

	if( is_error )
	{
	    modified_by_instance(definer, FALSE );
	    instance_definer_gap = TRUE;
	}else{
	    if ( instance_definer_gap )
	    {
		modified_by_instance(definer, TRUE );
	    }
	    is_instance = FALSE;
	    instance_definer_gap = FALSE;
	}
    }
}
    

/* **************************************************************************
 *
 *      Function name:  trace_creation
 *      Synopsis:       If the word being created is on the Trace List,
 *                          display the appropriate message
 *
 *      Inputs:
 *         Parameters:
 *             definer                 Internal token for the defining-word
 *             nu_name                 The word being created
 *         Global Variables:
 *             verbose                 No point in doing all this if we're
 *                                         not showing the message anyway...
 *             in_tokz_esc             TRUE if we are in Tokenizer-Escape mode
 *             scope_is_global         TRUE if "global" scope is in effect
 *             current_device_node     Current dev-node data-struct
 *
 *      Outputs:
 *         Returned Value:             NONE
 *         Printout:
 *             Advisory Message, if the word is on the Trace List.
 *
 *      Process Explanation:
 *          The order of scope-checking is important:
 *              A Local has no scope beyond the definition in which it occurs.
 *              Tokenizer-Escape mode supercedes "Normal" mode, and renders
 *                  moot the differences between Global and Device scope.
 *              Global scope is mutually exclusive with Device scope.
 *              Device scope needs to identify where the Current device-node
 *                  began.
 *
 **************************************************************************** */

void trace_creation( fwtoken definer, char *nu_name)
{
    if ( verbose )
    {
	if ( is_on_trace_list( nu_name) )
	{
            char  as_what[96] = "";
	    bool show_last_colon = BOOLVAL( definer == LOCAL_VAL);

	    as_a_what( definer, as_what);  /*  No need to check return value. */

	    /*  Scope-checking starts here, unless  show_last_colon  is TRUE.
	     *  Come out of this with  as_what[]  filled up and
	     *      terminated with a new-line, if appropriate,
	     */
	    while ( ! show_last_colon )
	    {
		strcat( as_what, " ");

		if ( in_tokz_esc )
		{
		    strcat( as_what, in_tkz_esc_mode);
		    break;
		}

		if ( scope_is_global )
		{
		    strcat( as_what, "with Global scope.\n");
		}else{
		    /*  In Device scope.  Show the Current node.   */
		    strcat( as_what, in_what_node( current_device_node));
		}
		break;

	    }   /*  Destination of BREAKs ...   */

	    tokenization_error(INFO, "Creating %s %s", nu_name, as_what);

	    if ( show_last_colon )
	    {
		in_last_colon();
	    }else{
		show_node_start();
	    }

	}
    }
}

/* **************************************************************************
 *
 *      Function name:  create_word
 *      Synopsis:       
 *
 *      Inputs:
 *         Parameters:
 *             definer             Internal token for the defining-word
 *         Global Variables:
 *             control_stack_depth Number of "Control Stack" entries in effect
 *             nextfcode           FCode-number to be assigned to the new name
 *             statbuf             Symbol last read from the input stream
 *             pc                  Input-source Scanning pointer
 *             hdr_flag            State of headered-ness for name-creation
 *             force_tokens_case   If TRUE, force token-names' case in FCode
 *             force_lower_case_tokens
 *                                 If  force_tokens_case  is TRUE, this
 *                                     determines which case to force
 *             iname               Input-source file name; for error-reporting
 *             lineno              Input-source Line number; also for err-rep't
 *
 *      Outputs:
 *         Returned Value:         TRUE if successful
 *         Global Variables:  
 *             nextfcode           Incremented  (by bump_fcode() )
 *             statbuf             Advanced to next symbol; must be re-read
 *             pc                  Advanced, then restored to previous value
 *         Memory Allocated
 *             Copy of the name being defined, by support routine.
 *             Copy of input-source file name, for error-reporting
 *         When Freed?
 *             Copy of name being defined is freed when Current Device Vocab
 *                 is "finished", or at end of tokenization.
 *             Copy of input-source file name is freed at end of this routine.
 *
 *      Error Detection:
 *          ERROR if already inside a colon-definition.  Discontinue
 *              processing and return FALSE.
 *          ERROR if inside a control-structure.  Continue processing,
 *              though, to catch other errors, and even return TRUE;
 *              except:  leave the new token undefined. 
 *          Warning on duplicate name (subject to command-line control)
 *          Message if name is excessively long; Warning if headerless.
 *          FATAL if the value of  nextfcode  is larger than the legal
 *              maximum for an FCode, (0x0fff).
 *
 *      Revision History:
 *      Updated Thu, 24 Mar 2005 by David L. Paktor
 *          Optional warning when name about to be created is a
 *              duplicate of an existing name.
 *      Updated Wed, 30 Mar 2005 by David L. Paktor
 *          Warning when name length exceeds ANSI-specified max (31 chars).
 *      Updated Tue, 05 Apr 2005 by David L. Paktor
 *          Add "definer" parameter and call to  add_definer() .  Part
 *              of the mechanism to forbid attempts to use the  TO 
 *              directive to change values of CONSTANTs in particular
 *              and of inappropriate targets in general.
 *      Updated Fri, 06 May 2005 by David L. Paktor
 *          Error-detection of   DO ...  LOOP  and  BEGIN ...  imbalance
 *          Error-detection of  nextfcode  exceeding legal maximum (0x0fff).
 *      Updated Wed, 20 Jul 2005 by David L. Paktor
 *          Put Duplicate-Name-Test under command-line control...
 *      Updated Wed, 24 Aug 2005 by David L. Paktor
 *          Error-detection via  clear_control_structs()  routine.
 *      Updated Tue, 10 Jan 2006 by David L. Paktor
 *          Convert to  tic_hdr_t  type vocabulary.
 *      Updated Thu, 20 Apr 2006 by David L. Paktor
 *          Allow creation of new definition within body of a flow-control
 *              structure.  (Remove error-detection via  clear_control_structs)
 *      Updated Tue, 13 Jun 2006 by David L. Paktor
 *          Move detection of out-of-bounds  nextfcode  to  assigning_fcode()
 *              routine, which also detects Overlapping Ranges error.
 *      Updated Thu, 27 Jun 2006 by David L. Paktor
 *          Report Error for attempt to create def'n inside control structure.
 *
 *      Extraneous Remarks:
 *          We must not set  incolon  to TRUE (if we are creating a colon
 *              definition) until *AFTER* this routine has been called, due
 *              to the initial error-checking.  If we need to detect whether
 *              we are creating a colon definition, we can do so by testing
 *              whether the parameter, DEFINER, equals COLON .
 *
 **************************************************************************** */

static bool create_word(fwtoken definer)
{
    signed long wlen;
    bool retval = FALSE;
    char *defn_type_name;

    /*  If already inside a colon, ERROR and discontinue processing    */
    /*  If an alias to a definer is used, show the name of the alias  */
    if ( test_in_colon(statbuf, FALSE, TKERROR, NULL) ) 
    {
	char defn_type_buffr[32] = "";
	unsigned int old_lineno = lineno;    /*  For error message  */
	bool define_token = TRUE;

	{   /*  Set up definition-type text for error-message */

	    /*  No need to check the return value  */
	    definer_name(definer, &defn_type_name);

	    strcat( defn_type_buffr, defn_type_name);
	    strcat( defn_type_buffr, " definition");
	}
	/*  If in a control-structure, ERROR but continue processing  */
	if ( control_stack_depth != 0 )
	{
	    announce_control_structs( TKERROR, defn_type_buffr, 0);
	    /*  Leave the new token undefined.  */
	    define_token = FALSE;
	}

	/*  Get the name of the new token  */
	wlen = get_word();

#ifdef DEBUG_SCANNER
	printf("%s:%d: debug: defined new word %s, fcode no 0x%x\n",
			iname, lineno, name, nextfcode);
#endif
	if ( wlen <= 0 )
	{
	    warn_unterm( TKERROR, defn_type_buffr, old_lineno);
	}else{
	    bool emit_token_name = TRUE;

	    /*  Handle Tracing of new definitions  */
	    trace_creation( definer, statbuf);

	    /*  Other Error or Warnings as applicable  */
	    validate_instance( definer);
	    warn_if_duplicate( statbuf);
	    check_name_length( wlen);

	    /*  Bump FCode; error-check as applicable  */
	    assigning_fcode();

	    /*  Define the new token, unless disallowed  */
	    add_to_current( statbuf, nextfcode, definer, define_token);

	    /*  Emit appropriate FCodes:  Type of def'n,   */
	    switch ( hdr_flag )
	    {
		case FLAG_HEADERS:
		    emit_token("named-token");
		    break;

		case FLAG_EXTERNAL:
		    emit_token("external-token");
		    break;

		default:  /*   FLAG_HEADERLESS   */
		    emit_token("new-token");
		    emit_token_name = FALSE;
	    }

	    /*  Emit name of token, if applicable  */
	    if ( emit_token_name )
	    {
		if ( force_tokens_case )
		{
		    if ( force_lower_case_tokens )
		    {
			strlwr( statbuf);
		    }else{
			strupr( statbuf);
		    }
		}
		emit_string((u8 *)statbuf, wlen);	
	    }

	    /*  Emit the new token's FCode   */
	    emit_fcode(nextfcode);

	    /*  Prepare FCode Assignment Counter for next definition   */
	    bump_fcode();

	    /*  Declare victory   */
	    retval = TRUE;
	}
    }
    return( retval);
}


/* **************************************************************************
 *
 *      Function name:  cannot_apply
 *      Synopsis:       Print error message of the form:
 *                     "Cannot apply <func> to <targ>, which is a <def'n>"
 *
 *      Inputs:
 *         Parameters:
 *             func_nam                    The name of the function
 *             targ_nam                    The name of the target
 *             defr                        The numeric-code of the definer-type
 *
 *      Outputs:
 *         Returned Value:                 NONE
 *         Printout:
 *             The error message is the entire printout of this routine
 *
 *      Error Detection:
 *          Error was detected by calling routine
 *
 *      Process Explanation:
 *          The calling routine already looked up the definer for its
 *              own purposes, so we don't need to do that again here.
 *
 *      Still to be done:
 *          If the definer-name is not found, we might still look up
 *              the target name in the various vocabularies and use
 *              a phrase for those.  E.g., if it is a valid token,
 *              we could say it's defined as a "primitive".  (I'm
 *              not sure what we'd say about an FWord...)
 *
 **************************************************************************** */

static void cannot_apply( char *func_nam, char *targ_nam, fwtoken defr)
{
    char *defr_name = "" ;
    const char *defr_phrase = ", which is defined as a " ;

    if ( ! definer_name(defr, &defr_name) )
    {
	defr_phrase = "";
    }

    tokenization_error ( TKERROR , 
	"Cannot apply  %s  to  %s %s%s.\n",
	     func_nam, targ_nam, defr_phrase, defr_name );

}


/* **************************************************************************
 *
 *      Function name:  lookup_with_definer
 *      Synopsis:       Return pointer to data-structure of named word,
 *                      if it's valid in Current context, and supply its
 *                      definer.  If it's not valid in Current context,
 *                      see if it might be a Local, and supply that definer.
 *
 *      Inputs:
 *         Parameters:
 *             stat_name                  Name to look up
 *             *definr                    Pointer to place to put the definer.
 *
 *      Outputs:
 *         Returned Value:                Pointer to data-structure, or
 *                                            NULL if not in Current context.
 *         Supplied Pointers:
 *             *definr                    Definer; possibly LOCAL_VAL
 *
 *      Process Explanation:
 *          If the name is not found in the Current context, and does not
 *              exist as a Local, *definr will remain unchanged.
 *
 *      Extraneous Remarks:
 *          This is an odd duck^H^H^H^H^H^H^H^H^H^H^H^H a highly-specialized 
 *              routine created to meet some corner-case needs engendered by
 *              the conversion to tic_hdr_t vocabularies all around, combined
 *              with an obsessive urge to preserve a high level of detail in
 *              our error-messages.
 *
 **************************************************************************** */

static tic_hdr_t *lookup_with_definer( char *stat_name, fwtoken *definr )
{
    tic_hdr_t *retval = lookup_current( stat_name);
    if ( retval != NULL )
    {
         *definr = retval->fword_defr;
    }else{
        if ( exists_as_local( stat_name) ) *definr = LOCAL_VAL;
    }
    return ( retval );
}

/* **************************************************************************
 *
 *      Function name:  validate_to_target
 *      Synopsis:       Print a message if the intended target
 *                          of the  TO  directive is not valid
 *      
 *      Inputs:
 *         Parameters:                NONE
 *         Global Variables:
 *             statbuf             Next symbol to be read from the input stream
 *             pc                  Input-source Scanning pointer
 *
 *      Outputs:
 *         Returned Value:         TRUE = Allow  b(to)  token to be output.
 *         Global Variables:
 *             statbuf             Advanced to next symbol; must be re-read
 *             pc                  Advanced, then restored to previous value
 *
 *      Error Detection:
 *          If next symbol is not a valid target of  TO , issue ERROR    
 *              message.  Restored  pc  will cause the next symbol to
 *              be processed by ordinary means.
 *          Allow  b(to)  token to be output in selected cases.  Even if
 *              user has set the "Ignore Errors" flag, certain targets are
 *              still too risky to be allowed to follow a  b(to)  token;
 *              if "Ignore Errors" is not set, output won't get created
 *              anyway.
 *          Issue ERROR in the extremely unlikely case that "to" is the
 *              last word in the Source.
 *
 *      Process Explanation:
 *          Valid targets for a TO directive are words defined by:
 *              DEFER, VALUE and arguably VARIABLE.  We will also allow
 *              CONSTANT, but will still issue an Error message.
 *          After the check, restore  pc ; this was only a look-ahead.
 *              Also restore  lineno  and  abs_token_no 
 *
 *      Extraneous Remarks:
 *          Main part of the mechanism to detect attempts to use the  TO 
 *              directive to change the values of CONSTANTs in particular
 *              and of inappropriate targets in general.
 *
 **************************************************************************** */

static bool validate_to_target( void )
{
    signed long wlen;
    tic_hdr_t *test_entry;
    u8 *saved_pc = pc;
    char *cmd_cpy = strupr( strdup( statbuf));    /*  For error message  */
    unsigned int saved_lineno = lineno;
    unsigned int saved_abs_token_no = abs_token_no;
    fwtoken defr = UNSPECIFIED ;
    bool targ_err = TRUE ;
    bool retval = FALSE ;

    wlen = get_word();
    if ( wlen <= 0 )
    {
	warn_unterm( TKERROR, cmd_cpy, saved_lineno);
    }else{

	test_entry = lookup_with_definer( statbuf, &defr);
	if ( test_entry != NULL )
	{
	    switch (defr)
	    {
		case VARIABLE:
		    tokenization_error( WARNING,
			"Applying %s to a VARIABLE (%s) is "
			"not recommended; use  !  instead.\n",
			cmd_cpy, statbuf);
		case DEFER:
		case VALUE:
		    targ_err = FALSE ;
		case CONST:
		    retval = TRUE ;
		/*  No default needed, likewise, no breaks;      */
		/*  but some compilers get upset without 'em...  */
		default:
		    break;
	    }
	}

	if ( targ_err )
	{
	    cannot_apply(cmd_cpy, strupr(statbuf), defr );
	}

	pc = saved_pc;
	lineno = saved_lineno;
	abs_token_no = saved_abs_token_no;
    }
    free( cmd_cpy);
    return( retval);
}


/* **************************************************************************
 *
 *      Function name:  you_are_here
 *      Synopsis:       Display a generic Advisory of the Source command
 *                          or directive encountered and being processed
 *
 *      Inputs:
 *         Parameters:                NONE
 *         Global Variables:
 *             statbuf                The command being processed 
 *
 *      Outputs:
 *         Returned Value:            NONE
 *         Printout:
 *             Advisory message
 *
 **************************************************************************** */

static void you_are_here( void)
{
    tokenization_error( INFO,
	"%s encountered; processing...\n",
	    strupr(statbuf) );
}


/* **************************************************************************
 *
 *      Function name:  fcode_starter
 *      Synopsis:       Respond to one of the "FCode Starter" words
 *      
 *      Inputs:
 *         Parameters:
 *             token_name         The FCode-token for this "Starter" word
 *             spread             The separation between tokens.
 *             is_offs16          Whether we are using a 16-bit number
 *                                    for branch- (and suchlike) -offsets,
 *                                    or the older-style 8-bit offset numbers.
 *         Global Variables:
 *            iname               Input-File name, used to set ifile_name 
 *                                    field of  current_device_node
 *            lineno              Current Input line number, used to set
 *                                    line_no field of  current_device_node
 *         Local Static Variables:
 *            fcode_started       If this is TRUE, we have an Error.
 *            first_fc_starter    Control calling  reset_fcode_ranges() ;
 *                                    only on the first fcode_starter of
 *                                    a tokenization.
 *
 *      Outputs:
 *         Returned Value:        NONE
 *         Global Variables:
 *            offs16              Global "16-bit-offsets" flag
 *            current_device_node   The ifile_name and line_no fields will be
 *                                    loaded with the current input file name
 *                                    and line number.  This node will be the
 *                                    top-level device-node.
 *            FCode Ranges will be reset the first time per tokenization
 *                that this routine is entered.
 *            A new FCode Range will be started every time after that.
 *         Local Static Variables:
 *            fcode_started       Set to TRUE.  We invoke the starter only
 *                                    once per image-block.
 *            first_fc_starter    Reset to FALSE if not already
 *         Memory Allocated
 *             Duplicate of Input-File name
 *         When Freed?
 *             In  fcode_ender()
 *
 *      Error Detection:
 *          Spread of other than 1 -- Warning message.
 *          "FCode Starter" previously encountered -- Warning and ignore.
 *
 *      Question under consideration:
 *          Do we want directives -- such as definitions of constants --
 *              supplied before the "FCode Starter", to be considered as
 *              taking place in "Tokenizer Escape" mode?  That would mean
 *              the "Starter" functions must be recognized in "Tokenizer
 *              Escape" mode.  Many ramifications to be thought through...
 *          I think I'm coming down strongly on the side of "No".  The user
 *              who wants to do that can very well invoke "Tokenizer Escape"
 *              mode explicitly.
 *
 **************************************************************************** */

static void fcode_starter( const char *token_name, int spread, bool is_offs16)
{
    you_are_here();
    if ( spread != 1 )
    {
        tokenization_error( WARNING, "spread of %d not supported.\n", spread);
    }
    if ( fcode_started )
    {
        tokenization_error( WARNING,
	    "Only one \"FCode Starter\" permitted per tokenization.  "
		"Ignoring...\n");
    } else {

	emit_fcodehdr(token_name);
	offs16 = is_offs16;
	fcode_started = TRUE;

	current_device_node->ifile_name = strdup(iname);
	current_device_node->line_no = lineno;

	if ( first_fc_starter )
	{
	    reset_fcode_ranges();
	    first_fc_starter = FALSE;
	}else{
	    set_next_fcode( nextfcode);
	}
    }
}

/* **************************************************************************
 *
 *      Function name:  fcode_end_err_check
 *      Synopsis:       Do error-checking at end of tokenization,
 *                          whether due to FCODE-END or end-of-file,
 *                          and reset the indicators we check.
 *
 *      Inputs:
 *         Parameters:                    NONE
 *         Global Variables:
 *             Data-Stack depth     Is anything left on the stack?
 *
 *      Outputs:
 *         Returned Value:                NONE
 *         Global Variables:
 *             Data-Stack           Reset to empty
 *
 *      Error Detection:
 *          Unresolved control structures detected by clear_control_structs()
 *          If anything is left on the stack, it indicates some incomplete
 *              condition; we will treat it as a Warning.
 *
 **************************************************************************** */

static void fcode_end_err_check( void)
{
    bool stack_imbal = BOOLVAL( stackdepth() != 0 );

	if ( stack_imbal )
	{
	    tokenization_error( WARNING,
		"Stack imbalance before end of tokenization.\n");
	}
    clear_stack();
    clear_control_structs("End of tokenization");
}

/* **************************************************************************
 *
 *      Function name:  fcode_ender
 *      Synopsis:       Respond to one of the "FCode Ender" words:
 *                          The FCode-token for "End0" or "End1"
 *                              has already been written to the
 *                              FCode Output buffer.
 *                          Finish the FCode header:  fill in its
 *                              checksum and length.
 *                          Reset the token names defined in "normal" mode
 *                          (Does not reset the FCode-token number)
 *
 *      Associated FORTH words:                 END0, END1
 *      Associated Tokenizer directive:         FCODE-END
 *
 *      Inputs:
 *         Parameters:            NONE
 *         Global Variables:
 *             incolon            If TRUE, a colon def'n has not been completed
 *             last_colon_filename         For error message.
 *             last_colon_lineno           For error message.
 *             scope_is_global             For error detection
 *             is_instance                 For error detection
 *
 *      Outputs:
 *         Returned Value:        NONE
 *         Global Variables:
 *             haveend            Set to TRUE
 *             fcode_started      Reset to FALSE.  Be ready to start anew.
 *             FCode-defined tokens, aliases and macros -- i.e., those
 *                 *NOT* defined in tokenizer-escape mode -- are reset.
 *                 (Also, command-line-defined symbols are preserved).
 *             Vocabularies will be reset
 *             Device-node data structures will be deleted
 *             Top-level device-node ifile_name and line_no fields
 *                 will be reset.
 *         Memory Freed
 *             Duplicate of Input-File name, in top-level device-node.
 *         Printout:
 *             Advisory message giving current value of nextfcode
 *                 (the "FCode-token Assignment Counter")
 *
 *      Error Detection:
 *          ERROR if a Colon definition has not been completed.
 *          ERROR if "instance" is still in effect
 *          WARNING if Global-Scope has not been terminated; compensate.
 *
 *      Extraneous Remarks:
 *          In order to accommodate odd cases, such as multiple FCode blocks
 *          within a single PCI header, this routine does not automatically
 *          reset nextfcode  to h# 0800
 *
 **************************************************************************** */

void fcode_ender(void)
{
    if ( incolon )
    {
	char *tmp_iname = iname;
	iname = last_colon_filename;
	unterm_is_colon = TRUE;
	warn_unterm( TKERROR, "Colon Definition", last_colon_lineno);
	iname = tmp_iname;    
    }
    
    haveend = TRUE;

    if ( is_instance )
    {
	unresolved_instance( TKERROR);
    }

    if ( scope_is_global )
    {
        tokenization_error( WARNING ,
	    "No DEVICE-DEFINITIONS directive encountered before end.  "
		"Compensating...\n");
	resume_device_scope();
    }
    fcode_end_err_check();
    reset_normal_vocabs();
    finish_fcodehdr();
    fcode_started = FALSE;

    if ( current_device_node->ifile_name != default_top_dev_ifile_name )
    {
	free( current_device_node->ifile_name );
	current_device_node->ifile_name = default_top_dev_ifile_name;
	current_device_node->line_no = 0;
    }
}

/* **************************************************************************
 *
 *      Function name:  get_token
 *      Synopsis:       Read the next word in the input stream and retrieve
 *                          its FCode-token number.  If it's not a symbol to
 *                          which a single token is assigned (e.g., if it's
 *                          a macro), report an error.
 *
 *      Associated FORTH words:                   [']  '
 *      Associated Tokenizer directive:          F[']
 *
 *      Inputs:
 *         Parameters:
 *             *tok_entry             Place to put the pointer to token entry
 *         Global Variables:
 *             statbuf                The command being processed 
 *             pc                     Input stream character pointer
 *
 *      Outputs:
 *         Returned Value:            TRUE if successful (i.e., no error)
 *         Supplied Pointers:
 *             *tok_entry             The token entry, if no error
 *         Global Variables:
 *             statbuf                The next word in the input stream
 *             pc                     Restored to previous value if error
 *
 *      Error Detection:
 *          The next word in the input stream is expected to be on the
 *              same line as the directive.  The  get_word_in_line()
 *              routine will check for that.
 *          If the next word in the input stream is not a symbol
 *              for which a single-token FCode number is assigned,
 *              report an ERROR and restore PC to its previous value.
 *
 **************************************************************************** */

static bool get_token(tic_hdr_t **tok_entry)
{
    bool retval = FALSE;
    u8 *save_pc;

    /*  Copy of command being processed, for error message  */
    char cmnd_cpy[FUNC_CPY_BUF_SIZE+1];
    strncpy( cmnd_cpy, statbuf, FUNC_CPY_BUF_SIZE);
    cmnd_cpy[FUNC_CPY_BUF_SIZE] = 0;   /*  Guarantee null terminator. */

    save_pc = pc;

    if ( get_word_in_line( statbuf) )
    {
	fwtoken defr = UNSPECIFIED;

	/*  We need to scan the newest definitions first; they
	 *      might supercede standard ones.  We need, though,
	 *      to bypass built-in FWords that need to trigger
	 *      some tokenizer internals before emitting their
	 *      synonymous FCode Tokens, (e.g., version1 , end0 ,
	 *      and start{0-4}); if we find one of those, we will
	 *      need to search again, specifically within the list
	 *      of FCode Tokens.
	 */
	*tok_entry = lookup_with_definer( statbuf, &defr);
	if ( *tok_entry != NULL )
	{
	    /*  Built-in FWords can be uniquely identified by their
	     *      definer,  BI_FWRD_DEFN .  The definer for "shared"
	     *      FWords is  COMMON_FWORD  but there are none of
	     *      those that might be synonymous with legitimate
	     *      FCode Tokens, nor are any likely ever to be...
	     */
	    if ( defr == BI_FWRD_DEFN )
	    {
	        *tok_entry = lookup_token( statbuf);
		retval = BOOLVAL( *tok_entry != NULL );
	    }else{
		retval = entry_is_token( *tok_entry);
	    }
	}

	if ( INVERSE( retval) )
	{
	    cannot_apply( cmnd_cpy, strupr(statbuf), defr );
	    pc = save_pc;
	}
    }

    return ( retval );
}


static void base_change ( int new_base )
{
    if ( incolon && ( INVERSE( in_tokz_esc) ) )
    {
        emit_literal(new_base );
	emit_token("base");
	emit_token("!");
    } else {
        base = new_base;
    }
}

static void base_val (int new_base)
{
    u8  *old_pc;

    char base_cmnd[FUNC_CPY_BUF_SIZE+1];
    strncpy( base_cmnd, statbuf, FUNC_CPY_BUF_SIZE);
    base_cmnd[FUNC_CPY_BUF_SIZE] = 0;  /* Guarantee NULL terminator */

    old_pc=pc;
    if ( get_word_in_line( statbuf) )
    {
	u8 basecpy=base;

	base = new_base;
	if ( ! handle_number() )
	{
	    /*  We did get a word on the line, but it's not a valid number */
	    tokenization_error( WARNING ,
		 "Applying %s to non-numeric value.  Ignoring.\n",
		      strupr(base_cmnd) );
	    pc = old_pc;
	}
	base=basecpy;
    }
}


/* **************************************************************************
 *
 *      Function name:  eval_string
 *      Synopsis:       Prepare to tokenize a string, artificially generated
 *                          by this program or created as a user-defined
 *                          Macro.   When done, resume at existing source.
 *                      Keep the file-name and line-number unchanged.
 *      
 *      Inputs:
 *         Parameters:
 *             inp_bufr          String (or buffer) to evaluate
 *
 *      Outputs:
 *         Returned Value:       NONE
 *         Global Variables, changed by call to init_inbuf():
 *             start             Points to given string
 *             pc                         ditto
 *             end               Points to end of given string
 *
 *      Revision History:
 *          Updated Thu, 23 Feb 2006 by David L. Paktor
 *              This routine no longer calls its own instance of  tokenize()
 *              It has become the gateway to the mechanism that makes a
 *                  smooth transition between the body of the Macro, User-
 *                  defined Symbol or internally-generated string and the
 *                  resumption of processing the source file. 
 *              A similar (but more complicated) transition when processing
 *                  an FLOADed file will be handled elsewhere.
 *          Updated Fri, 24 Feb 2006 by David L. Paktor
 *              In order to support Macro-recursion protection, this routine
 *                  is no longer the gateway for Macros; they will have to
 *                  call push_source() directly.
 *
 **************************************************************************** */

void eval_string( char *inp_bufr)
{
    push_source( NULL, NULL, FALSE);
    init_inbuf( inp_bufr, strlen(inp_bufr));
}


/* **************************************************************************
 *
 *      Function name:  finish_or_new_device
 *      Synopsis:       Handle the shared logic for the NEW-DEVICE and
 *                          FINISH-DEVICE commands.
 *
 *      Inputs:
 *         Parameters:
 *             finishing_device		   TRUE for FINISH-DEVICE,
 *					       FALSE for NEW-DEVICE
 *         Global Variables:
 *             incolon			     TRUE if inside a colon definition
 *             noerrors 		     TRUE if ignoring errors
 *             scope_is_global		     TRUE if "global scope" in effect
 *         Local Static Variables:
 *             is_instance		     TRUE if "instance" is in effect
 *             dev_change_instance_warning   TRUE if warning hasn't been issued
 *
 *      Outputs:
 *         Returned Value: 		     NONE
 *         Local Static Variables:
 *             dev_change_instance_warning   FALSE if warning is issued
 *             instance_definer_gap	     TRUE if "instance" is in effect
 *
 *      Error Detection:
 *          NEW-DEVICE and FINISH-DEVICE should not be used outside of
 *              a colon-definition if global-scope is in effect.  Error
 *              message; no further action unless we are ignoring errors.
 *          Issue a WARNING if INSTANCE wasn't resolved before the current
 *              device-node is changed.  Try not to be too repetitive...
 *
 *      Process Explanation:
 *          The words NEW-DEVICE and FINISH-DEVICE may be incorporated into
 *              a colon-definition, whether the word is defined in global-
 *              or device- -scope.  Such an incorporation does not effect
 *              a change in the device-node vocabulary; simply emit the token.
 *          If we are in interpretation mode, though, we need to check for
 *              errors before changing the device-node vocabulary:
 *          If global-scope is in effect, we need to check whether we are
 *              ignoring errors; if so, we will compensate by switching to  
 *              device-scope.
 *          If "instance" is in effect, it's "dangling".  It will remain
 *              in effect through a device-node change, but this is very
 *              bad style and deserves a WARNING, but only one for each
 *              occurrence.  It would be unaesthetic, to say the least,
 *              to have multiple messages for the same dangling "instance"
 *              in a "finish-device   new-device" sequence.
 *           We must be careful about the order we do things, because of
 *              the messages printed as a side-effect of the node change...
 *
 *      Extraneous Remarks:
 *          I will violate strict structure here.
 *
 **************************************************************************** */

static void finish_or_new_device( bool finishing_device )
{
    if ( INVERSE( incolon ) )
    {
	if ( INVERSE( is_instance) )
	{
	    /*  Arm warning for next time:         */
	    dev_change_instance_warning = TRUE;
	}else{
	    /*  Dangling "instance"                */
	    instance_definer_gap = TRUE;
	    /*   Warn only once.                   */
	    if ( dev_change_instance_warning )
	    {
		unresolved_instance( WARNING);
		dev_change_instance_warning = FALSE;
	    }
	}

	/*  Note:  "Instance" cannot be in effect during "global" scope  */ 
	if ( scope_is_global )
	{
	    glob_not_allowed( TKERROR, noerrors );
	    if ( noerrors )
	    {
		 resume_device_scope();
	    }else{
		 return;
	    }
	}

	if ( finishing_device )
	{
	     finish_device_vocab();
	}else{
	     new_device_vocab();
	}
    }
    emit_token( finishing_device ? "finish-device" : "new-device" );
	}
	
	
/* **************************************************************************
 *
 *      Function name:  abort_quote
 *      Synopsis:       Optionally implement the   ABORT"  function as
 *                      though it were a macro.  Control whether to allow
 *                      it, and which style to support, via switches set
 *                      on the command-line at run-time.
 *
 *      Inputs:
 *         Parameters:
 *             tok                       Numeric-code associated with the
 *                                           FORTH word that was just read.
 *         Global Variables:
 *             enable_abort_quote        Whether to allow ABORT"
 *             sun_style_abort_quote     SUN-style versus Apple-style
 *             abort_quote_throw         Whether to use -2 THROW vs ABORT
 *
 *      Outputs:
 *         Returned Value:     TRUE if it was handled
 *         Global Variables:
 *             report_multiline              Reset to FALSE.
 *         Printout:
 *             ADVISORY:   ABORT" in fcode is not defined by IEEE 1275-1994
 *
 *      Error Detection:
 *          Performed by other routines.  If user selected not to
 *              allow  ABORT" , it will simply be treated as an
 *              unknown word.
 *          The string following it, however, will still be consumed.
 *
 *      Process Explanation:
 *          If the supplied  tok  was not  ABORTTXT , then return FALSE.
 *          If the  enable_abort_quote  flag is FALSE, consume the
 *              string following the Abort" token, but be careful to
 *              leave the  Abort" token in statbuf, as it will be used
 *              for the error message.
 *          Otherwise, create and prepare for processing the appropriate Macro:
 *              For Apple Style, we push the specified string onto the stack
 *                  and do -2 THROW (and hope the stack unwinds correctly).
 *              For Sun Style, we test the condition on top of the stack,
 *                  and if it's true, print the specified string before we
 *                  do the -2 THROW.
 *          We perform the underlying operations directly:  placing an "IF"
 *              (if Sun Style), then placing the string.  This bypasses
 *              any issues of double-parsing, as well as of doubly checking
 *              for a multi-line string.
 *          Finally, we perform the operational equivalents of the remainder
 *              of the command sequence.
 *
 *      Extraneous Remarks:
 *          I would have preferred not to have to directly perform the under-
 *              lying operations, and instead simply prepare the entire command
 *              sequence in a buffer, but I needed to handle the case where
 *              quote-escaped quotes are included in the string:  If the string
 *              were simply to be reproduced into the buffer, the quote-escaped
 *              quotes would appear as plain quote-marks and terminate the
 *              string parsing prematurely, leaving the rest of the string
 *              to be treated as code instead of text...
 *          Also, the introduction of the variability of whether to do the
 *               -2 THROW  or to compile-in the token for  ABORT  makes the
 *              buffer-interpretation scheme somewhat too messy for my tastes.
 *
 **************************************************************************** */
	
static bool abort_quote( fwtoken tok)
{
    bool retval = FALSE;
    if ( tok == ABORTTXT )
    {
	if ( ! enable_abort_quote )
	{
	    /* ABORT" is not enabled; we'd better consume the string  */
	    char *save_statbuf;
	    signed long wlen;
	    save_statbuf = strdup( (char *)statbuf);
	    wlen = get_string( FALSE);
	    strcpy( statbuf, save_statbuf);
	    free( save_statbuf);
	}else{
	    /* ABORT" is not to be used in FCODE drivers
	     * but Apple drivers do use it. Therefore we
	     * allow it. We push the specified string to
	     * the stack, do -2 THROW and hope that THROW
	     * will correctly unwind the stack.
	     * Presumably, Apple Source supplies its own
	     *  IF ... THEN
	     */
	    char *abort_string;
	    signed long wlen;

	    retval = TRUE;
	    tokenization_error (INFO, "ABORT\" in fcode not "
			    "defined by IEEE 1275-1994\n");
	    test_in_colon("ABORT\"", TRUE, TKERROR, NULL);
	    wlen=get_string( TRUE);

	    if ( sun_style_abort_quote )  emit_if();

	    emit_token("b(\")");
	    emit_string(statbuf, wlen);
	
	    if ( sun_style_abort_quote )  emit_token("type");

	    if ( abort_quote_throw )
	    {
		emit_literal( -2);
		emit_token("throw");
	    }else{
		emit_token("abort");
	}
		
	    if ( sun_style_abort_quote )  emit_then();
	        /*  Sun Style  */
		abort_string = " type -2 THROW THEN:" ;
}
	}
    return( retval );
}


/* **************************************************************************
 *
 *      Function name:  create_alias
 *      Synopsis:       Create an alias, as specified by the user
 *
 *      Associated FORTH word:                 ALIAS
 *
 *      Inputs:
 *         Parameters:                NONE
 *         Global Variables:
 *             incolon                Colon-def'n-in-progress indicator
 *             in_tokz_esc            "Tokenizer Escape" mode indicator
 *         Input Stream
 *             Two words will be read.
 *
 *      Outputs:
 *         Returned Value:            TRUE if succeeded.
 *         Global Variables:    
 *             statbuf                New name will be copied back into here.
 *         Memory Allocated
 *             The two words will be copied into freshly-allocated memory 
 *                 that will be passed to the create_..._alias()  routine.
 *         When Freed?
 *             When Current Device Vocabulary is "finished", or at end
 *                 of tokenization, or upon termination of program.
 *             If not able to create alias, the copies will be freed here.
 *
 *      Error Detection:
 *          If the ALIAS command was given during colon-definition, that
 *              can be handled by this tokenizer, but it is not supported
 *              by IEEE 1275-1994.  Issue a WARNING.
 *          If the new name is a copy of an existing word-name, issue a warning.
 *          If the word to which an alias is to be created does not exist
 *              in the appropriate mode -- relative to "Tokenizer-Escape" --
 *              that is an ERROR.
 *          If "instance" is in effect, the ALIAS command is an ERROR.
 *
 *      Process Explanation:
 *          Get two words -- the new name and the "old" word -- from the
 *              same line of input as the ALIAS command.
 *          Copy the new name back into statbuf for use in trace_creation.
 *          Determine whether or not we are in "Tokenizer-Escape" mode.
 *              Subsequent searches will take place in that same mode.
 *          If the "new" name already exists, issue a warning.
 *          In each vocabulary applicable to the current mode -- i.e., 
 *                  "Tokenizer-Escape" or "Normal" -- (except:  cannot
 *                  make aliases to "Locals"):
 *              Try using the  create_..._alias()  routine.
 *              If it succeeds, we are done.
 *          IMPORTANT:  The order in which we try the vocabularies MUST
 *              match the order in which  tokenize_one_word()  searches them. 
 *          If all the attempts failed, the "old" word does not exist;
 *              declare an ERROR and free up the memory that was allocated.
 *
 *      Extraneous Remarks:
 *          With the separation of the  tokenizer[  state, this
 *              function has become too complicated to keep as a
 *              simple  CASE  in the big  SWITCH  statement anymore...
 *
 *          I had earlier thought that it was sufficient to create a
 *              macro linking the "new" name to the "old" word.  There
 *              were too many cases, though, where that didn't work.
 *              This is cleaner.
 *
 *          I will not be adhering to the strict rules of structure in
 *              this routine, as it would get me nested too deeply...
 *
 *      Revision History:
 *          Updated Tue, 10 Jan 2006 by David L. Paktor
 *              Convert to  tic_hdr_t  type vocabularies.
 *
 **************************************************************************** */

static bool create_alias( void )
{
    char *new_alias ;

    validate_instance(ALIAS);
    if ( incolon )
    {
	 tokenization_error ( WARNING,
	    "ALIAS during colon-definition "
		"is not supported by IEEE 1275-1994\n");
}
    if ( get_word_in_line( "ALIAS") )
    {

	new_alias = strdup((char *)statbuf);

	if (get_word_in_line( "ALIAS") )
{
	    char *old_name = strdup((char *)statbuf) ;

	    /*  Copy the "new" alias name back into statbuf.
	     *      This is a HACK ^H^H^H^H awkward way to retrofit
	     *      support for the  trace_creation()  function.
	     */
	    strcpy( statbuf, new_alias);

	    /*  We don't call  trace_creation()  here because we don't
	     *      know if the creation succeeded.  However, we want
	     *      to issue a "Duplicate" warning based on the attempt,
	     *      even if it doesn't succeed.
	     *  We would prefer to have the "Trace" message precede the 
	     *      "Duplicate" warning, but we don't think it's worth
	     *      the effort.  When it becomes worthwhile, the way to
	     *      do it would be to factor out the block that handles
	     *      normal-tokenization versus "Tokenizer-Escape" mode;
	     *      condition the "Trace" message on its success-return, 
	     *      show the "Duplicate" warning in any case, then show
	     *      the error-message and do the cleanup conditioned on
	     *      a failure-return.
	     *  That will also obviate the need for a return value from
	     *      this routine and for the copy-back into statbuf.
	     */
	    warn_if_duplicate(new_alias);

	    /*
	     *  Here is where we begin trying the  create_..._alias() 
	     *      routines for the vocabularies.
	     */

	    /*
	     *  Distinguish between "Normal" tokenization mode
	     *  and "Tokenizer Escape" mode
	     */
	    if ( in_tokz_esc )
	    {
		if ( create_tokz_esc_alias( new_alias, old_name) )
		    return(TRUE);
	
		/*
		 *  Handle the classes of operatives that are common between
		 *      "Tokenizer Escape" mode and "Normal" tokenization mode.
		 *  Those classes include selected non-fcode forth constructs
		 *     and Conditional-Compilation Operators.
		 */
		{
		    tic_hdr_t *found = lookup_shared_word( old_name);
		    if ( found != NULL )
		    {
			if ( create_core_alias( new_alias, old_name) )
			    return(TRUE);
		    }
	}
	    }else{
        	/*  "Normal" tokenization mode  */
	
		/*  Can create aliases for "Locals", why not?  */
		if ( create_local_alias( new_alias, old_name) )
		    return(TRUE);

		/*
		 *  All other classes of operatives -- non-fcode forth
		 *      constructs, Standard and user-defined fcode
		 *      tokens, Macros, and Conditional-Compilation
		 *      Operators, -- are included in the "currently
		 *      active" vocabulary.
		 */

		if ( create_current_alias( new_alias, old_name) )
		    return(TRUE);
	
	    }    /*  End of separate handling for normal-tokenization mode
        	  *      versus  "Tokenizer-Escape" mode
		  */

	    /*  It's not a word, a macro or any of that other stuff.  */
	    tokenized_word_error(old_name);
	    free(old_name);
	}
	free (new_alias);
    }
    return(FALSE);
}

	
/* **************************************************************************
 *
 *      Function name:  string_err_check
 *      Synopsis:       Error-check after processing or Ignoring
 *                          simple strings
 *
 *      Inputs:
 *         Parameters:
 *             is_paren           TRUE if string is Dot-Paren  .( 
 *                                    FALSE if Ess-Quote  ( s"  )
 *             sav_lineno         Saved Line Number, for Unterminated Error
 *             strt_lineno        Start Line Number, for Multiline Warning
 *         Global Variables:
 *             noerrors 	  TRUE if ignoring errors
 *         Local Static Variables:
 *             got_until_eof      TRUE if reached end of buffer before delim.
 *
 *      Outputs:
 *         Returned Value:        TRUE if did not reach end of buffer, or,
 *                                    if ignoring errors, TRUE anyway.
 *
 *      Error Detection:
 *          Multi-line warning, "Unterminated" Error messages, as apppropriate
 *
 **************************************************************************** */

static  bool string_err_check( bool is_paren,
                                  unsigned int sav_lineno,
				      unsigned int strt_lineno )
{
    bool retval = noerrors ;
    char *item_typ = is_paren ?
	"Dot-Paren" : "Ess-Quote" ;
    if ( got_until_eof )   /*  Crude retrofit... */
    {
	warn_unterm( TKERROR, item_typ, sav_lineno );
    }else{
	retval = TRUE;
	warn_if_multiline( item_typ, strt_lineno );
	}
    return( retval);
}


/* **************************************************************************
 *
 *      Function name:  handle_internal
 *      Synopsis:       Perform the functions associated with FORTH words
 *                      that do not map directly to a single token.  This
 *                      is the functions that will go into the FUNCT field
 *                      of entries in the "FWords" and "Shared Words" lists.
 *      
 *      Inputs:
 *         Parameters:
 *             pfield               Param-field of the  tic_hdr_t  -type entry
 *                                      associated with the FORTH-Word (FWord)
 *                                      just read that is being "handled".
 *         Global Variables:
 *             statbuf              The word that was just read.
 *
 *      Outputs:
 *         Returned Value:          NONE
 *         Global Variables:
 *             statbuf              More words may be read.
 *
 *      Error Detection:
 *          Too numerous to list here...
 *
 *      Process Explanation:
 *          Recast the type of the param-field of a  tic_hdr_t -type
 *              entry and rename it "tok".
 *          The "tok" will be used as the control-expression for a
 *              SWITCH statement with a large number of CASE labels.
 *              Both "FWords" and "shared_words" list entries will
 *              be processed by this routine.
 *      
 *      Revision History:
 *      Updated Wed, 20 Jul 2005 by David L. Paktor
 *          Put handling of  ABORT"  under control of a run-time
 *              command-line switch.
 *          Put decision to support IBM-style Locals under control
 *              of a run-time command-line switch.
 *      Updated Tue, 17 Jan 2006 by David L. Paktor
 *          Convert to handler for  tic_hdr_t  type vocab entries.
 *
 *      Extraneous Remarks:
 *          We would prefer to keep this function private, so we will
 *              declare its prototype here and in the one other file
 *              where we need it, namely, dictionary.c, rather than
 *              exporting it widely in a  .h  file.
 *
 **************************************************************************** */

void handle_internal( tic_param_t pfield);
void handle_internal( tic_param_t pfield)
{
	fwtoken tok = pfield.fw_token;

	signed long wlen;
	unsigned int sav_lineno = lineno;    /*  For error message  */

	bool handy_toggle = TRUE ;   /*  Various uses...   */
	
#ifdef DEBUG_SCANNER
	printf("%s:%d: debug: tokenizing control word '%s'\n",
						iname, lineno, statbuf);
#endif
	switch (tok) {
	case BEGIN:
		emit_begin();
		break;

	case BUFFER:
		if ( create_word(tok) )
		{
		emit_token("b(buffer:)");
		}
		break;

	case CONST:
		if ( create_word(tok) )
		{
		emit_token("b(constant)");
		}
		break;

	case COLON:
		{
		    /*  Collect error- -detection or -reporting items,
		     *      but don't commit until we're sure the
		     *      creation was a success.
		     */
		    u16 maybe_last_colon_fcode = nextfcode ;
		    unsigned int maybe_last_colon_lineno = lineno;
		    unsigned int maybe_last_colon_abs_token_no = abs_token_no;
		    unsigned int maybe_last_colon_do_depth = do_loop_depth;
		    /*  last_colon_defname
		     *     has to wait until after call to  create_word()
		     */

		    if ( create_word(tok) )
		    {
			last_colon_fcode = maybe_last_colon_fcode;
			last_colon_lineno = maybe_last_colon_lineno;
			last_colon_abs_token_no = maybe_last_colon_abs_token_no;
			last_colon_do_depth = maybe_last_colon_do_depth;
			collect_input_filename( &last_colon_filename);
			/*  Now we can get  last_colon_defname  */
			if ( last_colon_defname != NULL )
			{
			    free( last_colon_defname);
			}
			last_colon_defname = strdup(statbuf);

		emit_token("b(:)");
		incolon=TRUE;
			hide_last_colon();
			lastcolon = opc;
		    }
		}
		break;
	
	case SEMICOLON:
		if ( test_in_colon("SEMICOLON", TRUE, TKERROR, NULL) )
		{
		    ret_stk_balance_rpt( "termination,", TRUE);
		    /*  Clear Control Structures just back to where
		     *      the current Colon-definition began.
		     */
		    clear_control_structs_to_limit(
			"End of colon-definition", last_colon_abs_token_no);

		    if ( ibm_locals )
		    {
			finish_locals();
			forget_locals();
		    }

		emit_token("b(;)");
		incolon=FALSE;
		    reveal_last_colon();
		}
		break;

	case CREATE:
		if ( create_word(tok) )
		{
		emit_token("b(create)");
		}
		break;

	case DEFER:
		if ( create_word(tok) )
		{
		emit_token("b(defer)");
		}
		break;

	case ALLOW_MULTI_LINE:
		report_multiline = FALSE;
		break;

	case OVERLOAD:
		if ( test_in_colon(statbuf, FALSE, WARNING, NULL) )
		{
		    do_not_overload = FALSE;
		}
		break;

	case DEFINED:
		if (get_word_in_line( statbuf) )
		{
		    eval_user_symbol(statbuf);
		}
		break;

	case CL_FLAG:
		if (get_word_in_line( statbuf) )
		{
		     set_cl_flag( statbuf, TRUE);
		}
		break;

	case SHOW_CL_FLAGS:
		show_all_cl_flag_settings( TRUE);
		break;

	case FIELD:
		if ( create_word(tok) )
		{
		emit_token("b(field)");
		}
		break;

	case VALUE:
		if ( create_word(tok) )
		{
		emit_token("b(value)");
		}
		break;
		
	case VARIABLE:
		if ( create_word(tok) )
		{
		emit_token("b(variable)");
		}
		break;

	case AGAIN:
		emit_again();
		break;

	case ALIAS:
		if ( create_alias() )
		{
		    trace_creation( ALIAS, statbuf);
		}
		break;

	case CONTROL:
		if ( get_word_in_line( statbuf) )
		{
		    emit_literal(statbuf[0]&0x1f);
		}
		break;

	case DO:
		emit_token("b(do)");
		mark_do();
		break;

	case CDO:
		emit_token("b(?do)");
		mark_do();
		break;

	case ELSE:
		emit_else();
		break;

	case CASE:
		emit_case();
		break;

	case ENDCASE:
		emit_endcase();
		break;

	case NEW_DEVICE:
		handy_toggle = FALSE;
	case FINISH_DEVICE:
		finish_or_new_device( handy_toggle );
		break;

	case FLITERAL:
		{
		    u32 val;
		    val = dpop();
		    emit_literal(val);
		}
		break;

	case OF:
		emit_of();
		break;

	case ENDOF:
		emit_endof();
		break;
		
	case EXTERNAL:
		set_hdr_flag( FLAG_EXTERNAL );
		break;
		
	case HEADERLESS:
		set_hdr_flag( FLAG_HEADERLESS );
		break;
	
	case HEADERS:
		set_hdr_flag( FLAG_HEADERS );
		break;

	case DECIMAL:
		/* in a definition this is expanded as macro "10 base !" */
		base_change ( 0x0a );
		break;
		
	case HEX:
		base_change ( 0x10 );
		break;

	case OCTAL:
		base_change ( 0x08 );
		break;

	case OFFSET16:
		if (!offs16)
		{
		    tokenization_error(INFO, "Switching to 16-bit offsets.\n");
		}else{
		    tokenization_error(WARNING,
			"Call of OFFSET16 is redundant.\n");
		}
		emit_token("offset16");
		offs16=TRUE;
		break;

	case IF:
		emit_if();
		break;

/* **************************************************************************
 *
 *      Still to be done:
 *          Correct analysis of Return-Stack usage within Do-Loops
 *              or before Loop Elements like I and J or UNLOOP or LEAVE.
 *
 **************************************************************************** */
	case UNLOOP:
		emit_token("unloop");
		must_be_deep_in_do(1);
		break;

	case LEAVE:
		emit_token("b(leave)");
		must_be_deep_in_do(1);
		break;

	case LOOP_I:
		emit_token("i");
		must_be_deep_in_do(1);
		break;

	case LOOP_J:
		emit_token("j");
		must_be_deep_in_do(2);
		break;
		
	case LOOP:
		emit_token("b(loop)");
		resolve_loop();
		break;
		
	case PLUS_LOOP:
		emit_token("b(+loop)");
		resolve_loop();
		break;


	case INSTANCE:
		{
		    bool set_instance_state = FALSE;
		    bool emit_instance = TRUE;
		    /*  We will treat "instance" in a colon-definition as
		     *      an error, but allow it to be emitted if we're
		     *      ignoring errors; if we're not ignoring errors,
		     *      there's no output anyway...
		     */
		    if ( test_in_colon(statbuf, FALSE, TKERROR, NULL) )
		    {   /*   We are in interpretation (not colon) state.  */
			/*  "Instance" not allowed during "global" scope  */ 
			if ( scope_is_global )
			{
			    glob_not_allowed( WARNING, FALSE );
			    emit_instance = FALSE;
			}else{
			    set_instance_state = TRUE;
			}
		    }
		    if ( emit_instance )
		    {
			if ( set_instance_state )
			{
			    /*  "Instance" isn't cumulative....  */
			    if ( is_instance )
			    {
				unresolved_instance( WARNING);
			    }
			    collect_input_filename( &instance_filename);
			    instance_lineno = lineno;
			    is_instance = TRUE;
			    dev_change_instance_warning = TRUE;
			}
			emit_token("instance");
		    }
		}
		break;
		
	case GLOB_SCOPE:
		if ( test_in_colon(statbuf, FALSE, TKERROR, NULL) )
		{
		    if ( INVERSE( is_instance) )
		    {
			enter_global_scope();
		    }else{
			tokenization_error( TKERROR,
			    "Global Scope not allowed.  "
			    "\"Instance\" is in effect; issued" );
			just_where_started( instance_filename,
					        instance_lineno );
		    }
		}
		break;

	case DEV_SCOPE:
		if ( test_in_colon(statbuf, FALSE, TKERROR, NULL) )
		{
		    resume_device_scope();
		}
		break;

	case TICK:             /*    '    */
		test_in_colon(statbuf, FALSE, WARNING, "[']");
	case BRACK_TICK:       /*   [']   */
		{
		    tic_hdr_t *token_entry;
		    if ( get_token( &token_entry) )
		    {
			emit_token("b(')");
			/* Emit the token; warning or whatever comes gratis */
			token_entry->funct( token_entry->pfield);
		    }
		}
		break;

	case F_BRACK_TICK:     /*  F['] <name>
	        		*     emits the token-number for <name>
				*  Mainly useful to compute the argument
				*     to   get-token   or  set-token
				*/
		{
		    tic_hdr_t *token_entry;
		    if ( get_token( &token_entry) )
		    {
			/*  "Obsolete" warning doesn't come gratis here...  */
			token_entry_warning( token_entry);
			/*  In Tokenizer-Escape mode, push the token  */
			if ( in_tokz_esc )
			{
			    dpush( token_entry->pfield.deflt_elem);
			}else{
			    emit_literal( token_entry->pfield.deflt_elem);
			}
		    }
		}
		break;

	case CHAR:
		handy_toggle = FALSE;
	case CCHAR:
		test_in_colon(statbuf, handy_toggle, WARNING,
		    handy_toggle ? "CHAR" : "[CHAR]" );
	case ASCII:
		if ( get_word_in_line( statbuf) )
		{
		    emit_literal(statbuf[0]);
		}
		break;
		
	case UNTIL:
		emit_until();
		break;

	case WHILE:
		emit_while();
		break;
		
	case REPEAT:
		emit_repeat();
		break;
		
	case THEN:
		emit_then();
		break;

	case IS:
		tokenization_error ( INFO,
		     "Substituting  TO  for deprecated  IS\n");
	case TO:
		if ( validate_to_target() )
		{
		emit_token("b(to)");
		}
		break;

	case FLOAD:
		if ( get_word_in_line( statbuf) )
		{
		    bool stream_ok ;
			
		    push_source( close_stream, NULL, TRUE) ;
			
		    tokenization_error( INFO, "FLOADing %s\n", statbuf );
			
		    stream_ok = init_stream( statbuf );
		    if ( INVERSE( stream_ok) )
		    {
			drop_source();
		    }
		}
		break;

	case STRING:         /*  Double-Quote ( " ) string  */
		handy_toggle = FALSE;
	case PSTRING:        /*  Dot-Quote  ( ." ) string   */
		wlen=get_string( TRUE);
		emit_token("b(\")");
		emit_string(statbuf, wlen);
		if ( handy_toggle )
		{
		    emit_token("type");
		}
		break;

	case SSTRING:        /*  Ess-Quote  ( s"  ) string  */
		handy_toggle = FALSE;
	case PBSTRING:       /*  Dot-Paren  .(   string  */
		if (*pc++=='\n') lineno++;
		{
		    unsigned int strt_lineno = lineno;
		    wlen = get_until( handy_toggle ? ')' : '"' );
		    if ( string_err_check( handy_toggle,
		             sav_lineno, strt_lineno) )
		    {
		emit_token("b(\")");
			emit_string(statbuf, wlen);
			if ( handy_toggle )
			{
		emit_token("type");
			}
		    }
		}
		break;

	case FUNC_NAME:
		if ( test_in_colon( statbuf, TRUE, TKERROR, NULL) )
		{
		    if ( in_tokz_esc )
		    {
			tokenization_error( P_MESSAGE, "Currently" );
			in_last_colon();
		    }else{
		emit_token("b(\")");
			emit_string( last_colon_defname,
		            strlen( last_colon_defname) );
			/*  if ( hdr_flag == FLAG_HEADERLESS ) { WARNING } */
		    }
		}
		break;

	case IFILE_NAME:
		emit_token("b(\")");
		emit_string( iname, strlen( iname) );
		break;

	case ILINE_NUM:
		emit_literal( lineno);
		break;
			
	case HEXVAL:
		base_val (0x10);
		break;
		
	case DECVAL:
		base_val (0x0a);
		break;
		
	case OCTVAL:
		base_val (8);
		break;

	case ASC_LEFT_NUM:
		handy_toggle = FALSE;
	case ASC_NUM:
		if (get_word_in_line( statbuf) )
		{
		    if ( handy_toggle )
		    {
			ascii_right_number( statbuf);
			} else {
			ascii_left_number( statbuf);
			}
		}
		break;

	case CONDL_ENDER:   /*  Conditional directives out of context  */
	case CONDL_ELSE:
		tokenization_error ( TKERROR,
		    "No conditional preceding %s directive\n",
			strupr(statbuf) );
		break;

	case PUSH_FCODE:
		tokenization_error( INFO,
		    "FCode-token Assignment Counter of 0x%x "
		    "has been saved on stack.\n", nextfcode );
		dpush( (long)nextfcode );
		break;

	case POP_FCODE:
		pop_next_fcode();
		break;

	case RESET_FCODE:
		tokenization_error( INFO,
		    "Encountered %s.  Resetting FCode-token "
			"Assignment Counter.  ", strupr(statbuf) );
		list_fcode_ranges( FALSE);
		reset_fcode_ranges();
		break;
		
	case EXIT:
		if ( test_in_colon( statbuf, TRUE, TKERROR, NULL)
		     || noerrors )
		{
		    ret_stk_balance_rpt( NULL, FALSE);
		    if ( ibm_locals )
		    {
			finish_locals ();
		    }
		    emit_token("exit");
		}
		break;

	case ESCAPETOK:
		enter_tokz_esc();
		break;
	
	case VERSION1:
	case FCODE_V1:
		fcode_starter( "version1", 1, FALSE) ;
		tokenization_error( INFO, "Using version1 header "
		    "(8-bit offsets).\n");
		break;
	
	case START1:
	case FCODE_V2:
	case FCODE_V3: /* Full IEEE 1275 */
		fcode_starter( "start1", 1, TRUE);
		break;
		
	case START0:
		fcode_starter( "start0", 0, TRUE);
		break;
		
	case START2:
		fcode_starter( "start2", 2, TRUE);
		break;
		
	case START4:
		fcode_starter( "start4", 4, TRUE);
		break;
		
	case END1:
		tokenization_error( WARNING, 
		    "Appearance of END1 in FCode source code "
			"is not intended by IEEE 1275-1994\n");
		handy_toggle = FALSE;
	case END0:
	case FCODE_END:
		if ( handy_toggle )
		{
		    you_are_here();
		}
		emit_token( handy_toggle ? "end0" : "end1" );
		fcode_ender();
		FFLUSH_STDOUT
		break;

	case RECURSE:
		if ( test_in_colon(statbuf, TRUE, TKERROR, NULL ) )
		{
		    emit_fcode(last_colon_fcode);
		}
		break;
		

	case RECURSIVE:
		if ( test_in_colon(statbuf, TRUE, TKERROR, NULL ) )
		{
		    reveal_last_colon();
		}
		break;

	case RET_STK_FETCH:
		ret_stk_access_rpt();
		emit_token( "r@");
		break;

	case RET_STK_FROM:
		ret_stk_access_rpt();
		bump_ret_stk_depth( -1);
		emit_token( "r>");
		break;

	case RET_STK_TO:
		bump_ret_stk_depth( 1);
		emit_token( ">r");
		break;

	case PCIHDR:
		emit_pcihdr();
		break;
	
	case PCIEND:
		finish_pcihdr();
		reset_fcode_ranges();
		FFLUSH_STDOUT
		break;

	case PCIREV:
		pci_image_rev = dpop();
		tokenization_error( INFO,
		    "PCI header revision=0x%04x%s\n", pci_image_rev,
			big_end_pci_image_rev ?
			    ".  Will be saved in Big-Endian format."
			    : ""  );
		break;

	case NOTLAST:
		handy_toggle = FALSE;
	case ISLAST:
		dpush(handy_toggle);
	case SETLAST:
		{
		    u32 val = dpop();
		    bool new_pili = BOOLVAL( (val != 0) );
		    if ( pci_is_last_image != new_pili )
		    {
			tokenization_error( INFO,
			    new_pili ?
				"Last image for PCI header.\n" :
				"PCI header not last image.\n" );
			pci_is_last_image = new_pili;
		    }
		}
		break;
		
	case SAVEIMG:
		if (get_word_in_line( statbuf) )
		{
		    free(oname);
		    oname = strdup( statbuf );
		    tokenization_error( INFO,
			"Output is redirected to file:  %s\n", oname);
		}
		break;

	case RESETSYMBS:
		tokenization_error( INFO,
		    "Resetting symbols defined in %s mode.\n",
			in_tokz_esc ? "tokenizer-escape" : "\"normal\"");
		if ( in_tokz_esc )
		{
		    reset_tokz_esc();
		}else{
		    reset_normal_vocabs();
		}	
		break;

	case FCODE_DATE:
		handy_toggle = FALSE;
	case FCODE_TIME:
		{
			time_t tt;
		    char temp_buffr[32];
			
			tt=time(NULL);
		    if ( handy_toggle )
		    {
			strftime(temp_buffr, 32, "%T %Z", localtime(&tt));
		    }else{
			strftime(temp_buffr, 32, "%m/%d/%Y", localtime(&tt));
		    }
		    if ( in_tokz_esc )
		    {
			tokenization_error( MESSAGE, temp_buffr);
		    }else{
			emit_token("b(\")");
			emit_string((u8 *)temp_buffr, strlen(temp_buffr) );
		    }
		}
		break;

	case ENCODEFILE:
		if (get_word_in_line( statbuf) )
		{
		encode_file( (char*)statbuf );
		}
		break;

	default:
	    /*  IBM-style Locals, under control of a switch  */
	    if ( ibm_locals )
	    {
		bool found_it = TRUE;
		switch (tok) {
		    case CURLY_BRACE:
			declare_locals( FALSE);
			break;
		    case DASH_ARROW:
			assign_local();
			break;
		    default:
			found_it = FALSE;
	}
		if ( found_it ) break;
}

	    /*  Down here, we have our last chance to recognize a token.
	     *      If  abort_quote  is disallowed, we will still consume
	     *      the string.  In case the string spans more than one
	     *      line, we want to make sure the line number displayed
	     *      in the error-message is the one on which the disallowed
	     *       abort_quote  token appeared, not the one where the
	     *      string ended; therefore, we might need to be able to
	     *      "fake-out" the line number...
	     */
{
		bool fake_out_lineno = FALSE;
		unsigned int save_lineno = lineno;
		unsigned int true_lineno;
		if ( abort_quote( tok) )
		{   break;
		}else{
		    if ( tok == ABORTTXT )  fake_out_lineno = TRUE;
		}
		true_lineno = lineno;

		if ( fake_out_lineno )  lineno = save_lineno;
		tokenization_error ( TKERROR,
		    "Unimplemented control word '%s'\n", strupr(statbuf) );
		if ( fake_out_lineno )  lineno = true_lineno;
	    }
	}
}
	
/* **************************************************************************
 *
 *      Function name:  skip_string
 *      Synopsis:       When Ignoring, skip various kinds of strings.  Maps
 *                          to string-handlers in handle_internal()...
 *
 *      Associated FORTH words:                 Double-Quote ( " ) string
 *                                              Dot-Quote  ( ." ) string
 *                                              Ess-Quote  ( s"  ) string
 *                                              Dot-Paren  .(   string
 *                                              ABORT" (even if not enabled)
 *             { (Local-Values declaration) and -> (Local-Values assignment)
 *                  are also handled if  ibm_locals  is enabled.
 *
 *      Inputs:
 *         Parameters:
 *             pfield               Param-field of the entry associated with
 *                                      the FWord that is being Ignored.
 *         Global Variables:
 *             statbuf              The word that was just read.
 *             pc                   Input-stream pointer
 *             lineno               Line-number, used for errors and warnings
 *             ibm_locals           TRUE if IBM-style Locals are enabled
 *
 *      Outputs:
 *         Returned Value:          NONE
 *
 *      Error Detection:
 *          Multi-line warnings, "Unterminated" Errors
 *              handled by called routines
 *
 *      Extraneous Remarks:
 *          We would prefer to keep this function private, too, so we
 *              will declare its prototype here and in the one other
 *              file where we need it, namely, dictionary.c, rather
 *              than exporting it widely in a  .h  file.
 *
 **************************************************************************** */

void skip_string( tic_param_t pfield);
void skip_string( tic_param_t pfield)
{
    fwtoken tok = pfield.fw_token;
    unsigned int sav_lineno = lineno;
    bool handy_toggle = TRUE ;   /*  Various uses...   */
			
    switch (tok) {
    case STRING:         /*  Double-Quote ( " ) string    */
    case PSTRING:        /*  Dot-Quote  ( ." ) string     */
    case ABORTTXT:       /*  ABORT", even if not enabled  */
	get_string( FALSE);   /*  Don't truncate; ignoring anyway  */
	/*  Will handle multi-line warnings, etc.   */
				break;
			
    case SSTRING:        /*  Ess-Quote  ( s"  ) string  */
	handy_toggle = FALSE;
    case PBSTRING:       /*  Dot-Paren  .(   string  */
			if (*pc++=='\n') lineno++;
	{
	    unsigned int strt_lineno = lineno;
	    get_until( handy_toggle ? ')' : '"' );
	    string_err_check( handy_toggle, sav_lineno, strt_lineno );
	}
	break;

    default:
	/*  IBM-style Locals, under control of a switch  */
	if ( ibm_locals )
	{
	    bool found_it = TRUE;
	    switch (tok) {
		case CURLY_BRACE:
		    declare_locals( TRUE);
		    break;
		case DASH_ARROW:
		    get_word();
		    break;
		default:
		    found_it = FALSE;
	    }
	    if ( found_it ) break;
	}

	tokenization_error ( FATAL,  "Program Error.  "
	    "Unimplemented skip-string word '%s'\n", strupr(statbuf) );
    }
}

/* **************************************************************************
 *
 *      Function name:  process_remark
 *      Synopsis:       The active function for remarks (backslash-space)
 *                          and comments (enclosed within parens)
 *
 *      Associated FORTH word(s):        \   (         
 *
 *      Inputs:
 *         Parameters:
 *             TIC entry "parameter field", init'd to delimiter character.
 *
 *      Outputs:
 *         Returned Value:          NONE
 *
 *      Error Detection:
 *          Warning if end-of-file encountered before delimiter.
 *          Warning if multi-line parentheses-delimited comment.
 *
 *      Process Explanation:
 *          Skip until the delimiter.
 *          If end-of-file was encountered, issue Warning.
 *          Otherwise, and if delimiter was not new-line,
 *              check for multi-line with Warning.
 *
 **************************************************************************** */

void process_remark( tic_param_t pfield )
{
    char until_char = (char)pfield.deflt_elem ;
    unsigned int start_lineno = lineno;

#ifdef DEBUG_SCANNER

    get_until(until_char);
			printf ("%s:%d: debug: stack diagram: %s)\n",
						iname, lineno, statbuf);
#else

    if ( skip_until( until_char) )
    {
	if ( until_char == '\n' )
	{
	    /*  Don't need any saved line number here ...  */
	    tokenization_error ( WARNING,
		"Unterminated remark.\n");
	}else{
	    warn_unterm( WARNING, "comment", start_lineno);
	}
    }else{
	if ( until_char != '\n' )
	{
	    pc++;
	    warn_if_multiline( "comment", start_lineno);
	}
    }
#endif  /*  DEBUG_SCANNER  */
}
		
			
/* **************************************************************************
 *
 *      Function name:  filter_comments
 *      Synopsis:       Process remarks and comments in special conditions
 *      
 *      Inputs:
 *         Parameters:
 *             inword             Current word just parsed
 *
 *      Outputs:
 *         Returned Value:        TRUE if Current word is a Comment-starter.
 *                                    Comment will be processed
 *
 *      Process Explanation:
 *          We want to be able to recognize any alias the user may have
 *              defined to a comment-delimiter, in whatever applicable
 *              vocabulary it might be.
 *          The active-function of any such alias will, of necessity, be
 *              the  process_remark()  routine, defined just above.
 *          We will search for the TIC-entry of the given word; if we don't    
 *              find it, it's not a comment-delimiter.  If we do find it, 
 *              and it is one, we invoke its active-function and return TRUE.
 *          We also want to permit the "allow-multiline-comments" directive   
 *              to be processed in the context that calls this routine, so
 *              we will check for that condition, too.
 *
 **************************************************************************** */

bool filter_comments( u8 *inword)
{
    bool retval = FALSE;
    tic_hdr_t *found = lookup_word( inword, NULL, NULL );
			
    if ( found != NULL )
    {
	if ( found->funct == process_remark )
	{
	    found->funct( found->pfield);
	    retval = TRUE;
	}else{
	    /*  Permit the "allow-multiline-comments" directive  */
	    if ( found->funct == handle_internal )
	    {
	        if ( found->pfield.fw_token == ALLOW_MULTI_LINE )
		{
		    /*   Make sure any intended side-effects occur...  */
		    found->funct( found->pfield);
		    retval = TRUE;
		}
	    }
	}
    }
    return ( retval );
		}

		
/* **************************************************************************
 *
 *      Function name:  tokenize_one_word
 *      Synopsis:       Tokenize the currently-obtained word
 *                          along with whatever it consumes.
 *
 *      Inputs:
 *         Parameters:
 *             wlen       Length of symbol just retrieved from the input stream
 *                              This is not really used here any more; it's
 *                              left over from an earlier implementation.
 *         Global Variables:        
 *             statbuf      The symbol (word) just retrieved from input stream.
 *             in_tokz_esc  TRUE if "Tokenizer-Escape" mode is in effect; a
 *                              different set of vocabularies from "Normal"
 *                              mode will be checked (along with those that
 *                              are common to both modes).  
 *             ibm_locals   Controls whether to check for IBM-style Locals;
 *                              set by means of a command-line switch.
 *
 *      Outputs:
 *         Returned Value:      NONE
 *         Global Variables:         
 *             statbuf          May be incremented    
 *             in_tokz_esc      May be set if the word just retrieved is
 *                                  the  tokenizer[   directive. 
 *             tic_found        
 *
 *      Error Detection:
 *           If the word could neither be identified nor processed as a number,
 *               that is an ERROR; pass it to  tokenized_word_error  for a
 *               message.
 *
 *      Process Explanation:
 *          Look for the word in each of the various lists and vocabularies
 *              in which it might be found, as appropriate to the current
 *              state of activity.
 *          If found, process it accordingly.
 *          If not found, try to process it as a number.
 *          If cannot process it as a number, declare an error.
 *
 *      Revision History:
 *          Updated Tue, 10 Jan 2006 by David L. Paktor
 *              Convert to  tic_hdr_t  type vocabularies.
 *          Updated Mon, 03 Apr 2006 by David L. Paktor
 *             Replaced bulky "Normal"-vs-"Escape" block with a call
 *                 to  lookup_word .  Attend to a small but important
 *                 side-effect of the "handle_<vocab>" routines that
 *                 feeds directly into the protection against self-
 *                 -recursion in a user-defined Macro:  Set the global
 *                 variable  tic_found  to the entry, just before we
 *                 execute it, and we're good to go... 
 *
 *      Extraneous Remarks:
 *          We trade off the strict rules of structure for simplicity
 *              of coding.
 *
 **************************************************************************** */
		
void tokenize_one_word( signed long wlen )
{
		
    /*  The shared lookup routine now handles everything.   */
    tic_hdr_t *found = lookup_word( statbuf, NULL, NULL );
		
    if ( found != NULL )
    {
	tic_found = found;
	found->funct( found->pfield);
	return ;
    }
		
    /*  It's not a word in any of our current contexts.
     *      Is it a number?
     */
    if ( handle_number() )
    {
	return ;
			}

    /*  Could not identify - give a shout. */
    tokenized_word_error( statbuf );
		}

/* **************************************************************************
 *
 *      Function name:  tokenize
 *      Synopsis:       Tokenize the current input stream.
 *                          May be called recursively for macros and such.
 *
 *      Revision History:
 *      Updated Thu, 24 Mar 2005 by David L. Paktor
 *          Factor-out comment-filtration; apply to  gather_locals
 *          Factor-out tokenizing a single word (for conditionals)
 *          Separate actions of "Tokenizer-Escape" mode.
 *
 **************************************************************************** */

void tokenize(void)
{
    signed long wlen = 0;
		
    while ( wlen >= 0 )
    {
	wlen = get_word();
	if ( wlen > 0 )
	{
	    tokenize_one_word( wlen );
	}
	}
}
openbios/fcode-utils/toke/scanner.c

Commit

toke/scanner.c