/* $Id: wp2x.c 1.10 91/08/18 15:05:41 raymond Exp $ */

/* Before compiling, read the section titled `portability concerns'. */

/************************************************************************
 * $Log:	wp2x.c $
 * Revision 1.10  91/08/18  15:05:41  raymond
 * Descriptor file stuff.
 * 
 * Revision 1.9  91/08/06  09:08:09  raymond
 * add missing `break' in check_arity
 * 
 * Revision 1.8  91/08/06  08:31:21  raymond
 * Avoid infinite loop if file is corrupted.
 * Better error-checking on configuration file (new output scheme).
 * 
 * Revision 1.7  91/08/02  13:35:37  raymond
 * Epsilonically better handling of environments that didn't end properly.
 * Change return type of main() to keep gcc quiet.
 * MSC support.
 * 
 * Revision 1.6  91/07/28  21:08:53  raymond
 * BeginTabs et al, FNote#, ENote#, NegateTotal, more unsupported codes
 * Improve character tokens, Header, Footer
 * Take care when people don't end lines with HRt
 * Fix major bugs in endnote processing, footnote numbering (and nobody
 *    noticed!)
 * More worries about signed characters.
 * 
 * Revision 1.5  91/07/23  22:59:43  raymond
 * Add COMMENT token, and some bug fixes.
 * 
 * Revision 1.4  91/07/23  22:09:23  raymond
 * Concessions to slightly non-ANSI compilers. (`const', `unsigned char')
 * More patches for machines with signed characters.
 * Fix blatant bug in hex constants.  (Amazed nobody noticed.)
 * New tags SetFn#, Header, Footer.
 * Warning messages for unsupported tokens.
 * Backslahes processed in character tags.
 * Fixed(?) footnotes, endnotes, page length changes.
 * Inserted missing `break's into the huge switch.
 * 
 * Revision 1.3  91/07/12  15:39:44  raymond
 * Spiffy Turbo C support.
 * Some <stdlib.h>'s don't declare errno et al.
 * Command line switches `-s' and `-n' added.
 * More cute warning messages.
 * Dots periodically emitted.
 * Give the enum of token types a name, to placate QuickC.
 * Fix problems with pitch changes and signed characters.
 * 
 * Revision 1.2  91/06/22  08:18:22  raymond
 * <process.h> and fputchar() aren't sufficiently portable.
 * strerror() fails to exist on some so-called ANSI platforms.
 * Removed assumption that characters are unsigned.
 * Forgot to #include <stdarg.h>
 * 
 */

/************************************************************************
 * PORTABILITY CONCERNS
 ************************************************************************
 *
 * If possible, compile with unsigned characters.  (Though I think
 * I've taken care of all the places where I assumed characters are
 * unsigned.)
 *
 * This program assumes that your compiler is fully ANSI-conformant.
 * Depending on how non-conformant your compiler is, you may need to
 * set the following symbols at compile time:
 *
 * NO_CONST -- set this if your compiler does not know what `const' means.
 * Cdecl    -- how to tag functions that are variadic.
 *
 * Cdecl is used if you need special declarations for variadic functions.
 * This is used by IBM PC compilers so that you can make the default
 * parameter passing Pascal-style or Fastcalls.
 *
 * Some very machine-dependent stuff happens when trying to open the
 * descriptor file.  Please read dopen.c as well.
 */

#ifdef NO_CONST
#define const
#endif

#ifndef Cdecl                       /* default is nothing */
#define Cdecl
#endif

/************************************************************************
 * This program divides naturally into two parts.
 *
 * The first part reads in the descriptor file and builds the expansions
 * for each of the identifiers listed above.
 * This is the easy part.
 *
 * The second part reads the input file and uses the expansions collected
 * in the first part to transform the file into the output.
 * This is the hard part.
 *
 ************************************************************************/

/* And now, the code.
 * We start off with some obvious header files.
 */

#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

/* Some platforms do not define these externals in stdlib.h */
extern int   Cdecl errno;
extern char *Cdecl sys_errlist[];
extern int   Cdecl sys_nerr;

/************************************************************************/
/* Some common idioms                                                   */
/************************************************************************/

#define do_nothing /* twiddle thumbs */

/************************************************************************/
/* Blowing up                                                           */
/************************************************************************/

/* The function "error" accepts two arguments.  A FILE pointer and
 * a printf-style argument list.  The printf-style arguments are
 * printed to stderr.  If the FILE is non-NULL, the the remaining
 * contents of the file are printed as well (to provide context), up
 * to 80 characters.
 */

void Cdecl error(FILE *fp, char *fmt, ...)
{
  int i;
  va_list ap;

  fputs("Error: ", stderr);
  va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap);
  fputc('\n', stderr);

  if (fp) {
    fprintf(stderr, "Unread text: ");
    for (i = 0; i < 80 && !feof(fp); i++) fputc(getc(fp), stderr);
    fputc('\n', stderr);
  }
  exit(1);
}

/************************************************************************/
/* Command-line switches                                                */
/************************************************************************/
int silent = 0;
int blipinterval = 1024;                /* display blips every 1K */
int blipcount;

/************************************************************************/
/* Basic file manipulations                                             */
/************************************************************************/

/* We here define a few basic functions.  Let us hope that the first
 * three functions' names are self-descriptive.
 */

int next_non_whitespace(FILE *fp)
{
  register int c;

  while ((c = getc(fp)) != EOF && isspace(c)) do_nothing;

  return c;
}

int next_non_space_or_tab(FILE *fp)
{
  register int c;

  while ((c = getc(fp)) != EOF && (c == ' ' || c == '\t')) do_nothing;

  return c;
}

void eat_until_newline(FILE *fp)
{
  register int c;

  while ((c = getc(fp)) != EOF && c != '\n') do_nothing;
}

/* The function parse_hex grabs a (no-more-than-two-character) hex
 * constant.  Similarly, parse_octal does the same for octal constants.
 */

int parse_hex(FILE *fp)
{
  register int c, value;

  if (!isxdigit(c = toupper(getc(fp))))
    error(fp, "Expecting a hex digit");

  if ((value = c - '0') > 9) value += '0' - 'A' + 10;

  if (!isxdigit(c = getc(fp))) { ungetc(c, fp); return value; }

  c = toupper(c);
  value = (value << 4) + c - '0';
  if (c > '9') value += '0' - 'A' + 10;
  return value;
}

int parse_octal(FILE *fp, register int c)
{
  register int value = c - '0';

  if ( (c = getc(fp)) < '0' || c > '7') { ungetc(c, fp); return value; }

  value = (value << 3) + c - '0';

  if ( (c = getc(fp)) < '0' || c > '7') { ungetc(c, fp); return value; }

  return (value << 3) + c - '0';
}


/************************************************************************/
/* Storing the input strings                                            */
/************************************************************************/

/* The input strings are allocated from a large pool we set up at
 * startup.  This lets us do our thing without having to fight
 * with people like malloc and friends.  This method does limit
 * our configuration file to 32K, however.  We hope that this is
 * not a problem.  (It also means that the program can be translated
 * to almost any other language without too much difficulty.)
 *
 * Here's how it works.
 *
 * "pool" is an array of POOL_SIZE characters.  The value of POOL_SIZE
 * is flexible, but shouldn't exceed 65535, since that's the size of
 * an IBM PC segment.  If your configuration file is more than 64K,
 * then there's probably something wrong.
 *
 * "pool_ptr" points to the next character in "pool" that hasn't been
 * used for anything yet.
 *
 * "top_of_pool" points one character beyond the end of pool, so we can
 * see if we've run out of memory.
 *
 * When we want to put something into the pool, we simply store into "pool"
 * and increment "pool_ptr" appropriately.
 *
 * Access to these variables is done through the following functions,
 * implemented as macros.
 *
 * "anchor_string()" is called before you start throwing things into
 * the pool.  It returns a pointer to the beginning of the string
 * being built up.
 *
 * "add_to_string(c)" adds the character "c" to the string being built up.
 *
 * "finish_string()" gets ready for building a new string.  We check
 * that we did not overflow our pool.  We pull the sneaky trick of
 * a dummy else clause so that [1] "else"s match up properly if this
 * is nested inside an "if" statement, [2] the semicolon gets eaten
 * up correctly.
 *
 * "remove_string(s)" removes all strings from the one called "s" onwards.
 *
 */

#define POOL_SIZE   32768U

char pool[POOL_SIZE];
char *pool_ptr = pool;
#define top_of_pool (pool + POOL_SIZE)

#define anchor_string() pool_ptr
#define add_to_string(c) (*pool_ptr++ = c)
#define finish_string() \
     if (pool_ptr >= top_of_pool) error(NULL, "string pool overflow."); \
     else do_nothing
#define remove_string(s) (pool_ptr = s)

/************************************************************************/
/* Remembering the expansions                                           */
/************************************************************************/

/* The array "expansion" contains the expansions for everything.
 * Everything is initialized to NULL.
 *
 * We set up things as follows:
 *  expansion[0..255]  contain the expansions for the possible characters.
 *  expansion[256...]  contain the expansions for the special codes.
 *
 * Make sure this table is kept in parallel with the names[] array
 *
 *
 */

/*      name   value          When is it expanded? */
/*      ----    ---           -------------------- */
enum token_type {          /* Some compilers do not like unnamed enums */
        typeout = 256,     /* Typed out as soon as it is encountered */
        BEGIN        ,     /* Before the first character of the file */
        END          ,     /* After the last character of the file   */
        Comment      ,     /* For wp2x-generated comments            */
        eComment     ,

        PageNo       ,     /* Current page number */
        RomanPage    ,     /* Set page number (to roman numerals) */
        ArabicPage   ,     /* Set page number (to arabic) */

        HSpace       ,     /* unbreakable space (`Hard space') */

        Tab          ,     /* Tab character */
        BeginTabs    ,     /* Begin tab settings */

        /* DO NOT CHANGE THE RELATIVE ORDER OF THESE FOUR TOKENS */
        SetTab       ,     /* Set normal tabstop at %d */
        SetTabCenter ,     /* Set center tabstop at %d */
        SetTabRight  ,     /* Set right-justified tab at %d */
        SetTabDecimal,     /* Set decimal tab at %d */

        EndTabs      ,     /* End tab settings */

        HPg          ,     /* Hard page break */
        CondEOP      ,     /* Conditional end-of-page */
        HRt          ,     /* Hard return */
        SRt          ,     /* Soft return */

        NHyph        ,     /* Normal hyphen */
        NHyphE       ,     /* Normal hyphen at the end of a line */
        HHyph        ,     /* Hard (nonbreakable) hyphen */
        DHyph        ,     /* Discretionary hyphen */
        DHyphE       ,     /* Discretionary hyphen at the end of a line */
        NoHyphWord   ,     /* Do not hyphenate this word */

        Marg         ,     /* Margin settings */
        TopMarg      ,     /* Set top margin */
        PageLength   ,     /* Set page length */

        SS           ,     /* Single spacing */
        DS           ,     /* Double spacing */
        OHS          ,     /* 1.5 spacing (One and a Half Spacing) */
        TS           ,     /* Triple spacing */
        LS           ,     /* Generic line spacing */
        LPI          ,     /* set 6 or 8 LPI */

        Bold         ,     /* Begin boldface */
        eBold        ,     /* End boldface */
        Und          ,     /* Begin underline */
        eUnd         ,     /* End underline */
        Red          ,     /* Begin redline */
        eRed         ,     /* End redline */
        Strike       ,     /* Begin strikeout */
        eStrike      ,     /* End strikeout */
        Rev          ,     /* Begin reverse video */
        eRev         ,     /* End reverse video */

        Over         ,     /* Overstrike */
        eOver        ,     /* [mythical "end overstroke" code] */
        Sup          ,     /* Superscript */
        eSup         ,     /* [mythical "end superscript" code] */
        Sub          ,     /* Subscript */
        eSub         ,     /* [mythical "end subscript" code] */

        UpHalfLine   ,     /* Advance printer up 1/2 line */
        DownHalfLine ,     /* Advance printer down 1/2 line */
        AdvanceToHalfLine, /* Advance to absolute vertical position */

        Indent       ,     /* Indented paragraph */
        DIndent      ,     /* Left-and-right-indented paragraph */
        eIndent      ,     /* End indented paragraph */
        MargRel      ,     /* Margin release (unknown argument) */

        Center       ,     /* Center current line */
        eCenter      ,     /* End centering */
        CenterHere   ,     /* Center line around current column */
        eCenterHere  ,     /* End centering */

        Align        ,     /* Begin alignment */
        eAlign       ,     /* End alignment */
        AlignChar    ,     /* Set alignment character */
        FlushRight   ,     /* Begin flush right */
        eFlushRight  ,     /* End flush right */

        Math         ,     /* Begin math mode */
        eMath        ,     /* End math mode */
        MathCalc     ,     /* Begin math calc mode */
        MathCalcColumn,    /* Math calc column */

        SubTtl       ,     /* Do subtotal */
        IsSubTtl     ,     /* Subtotal entry */
        Ttl          ,     /* Do total */
        IsTtl        ,     /* Total entry */
        GrandTtl     ,     /* Do grand total */
        NegateTotal  ,     /* Negate current total */

        Col          ,     /* Begin column mode */
        eCol         ,     /* End column mode */

        Fn           ,     /* Begin footnote */
        eFn          ,     /* End footnote */
        En           ,     /* Begin endnote */
        eEn          ,     /* End endnote */
        SetFnNum     ,     /* Set footnote number */
        FNoteNum     ,     /* Footnote number */
        ENoteNum     ,     /* Endnote number */
        TableMarker  ,     /* Table of contents or whatever marker */

        Hyph         ,     /* Hyphenation on */
        eHyph        ,     /*             off */
        Just         ,     /* Justification on */
        eJust        ,     /*               off */
        Wid          ,     /* Widow/orphan protection on */
        eWid         ,     /*                         off */
        HZone        ,     /* Hyphenation zone */
        DAlign       ,     /* Decimal alignment character */

        Header       ,     /* Begin header text */
        eHeader      ,     /* End header text */
        Footer       ,     /* Begin footer text */
        eFooter      ,     /* End footer text */

        Supp         ,     /* Suppress formatting for one page */
        CtrPg        ,     /* Center page vertically */

        SetFont      ,     /* Change pitch or font */
        SetBin       ,     /* Select paper bin (0, 1, ...) */

        PN           ,     /* Page number position (PN+0 through PN+8) */

/* Internal tokens for unsupported operations */
        UnsupportedPlaceHolder = PN + 9,
        SetPageNumberColumn,
        SetTabs,
        SetUnderlineMode,
        DefineColumn,
        SetFootnoteAttributes,
        SetParagraphNumberingStyle,
        NumberedParagraph,
        BeginMarkedText,
        EndMarkedText,
        DefineMarkedText,
        DefineIndexMark,
        DefineMathColumns,
        Obsolete,
        ReservedCode,
        UnknownCode,
        LastToken
};

char *expansion[LastToken];


/************************************************************************/
/* Naming the identifiers                                               */
/************************************************************************/
/* Extreme care must be taken to ensure that this list parallels the list
 * of token names above.
 */

typedef struct identifier {
    char *name;
    int arity;
} Identifier;

Identifier names[] = {
    { "typeout", 0 },
    { "BEGIN", 0 },
    { "END", 0 },
    { "Comment", 0 },
    { "comment", 0 },
    { "PageNo", 0 },
    { "RomanPage", 1 },
    { "ArabicPage", 1 },
    { "HSpace", 0 },
    { "Tab", 0 },
    { "BeginTabs", 0 },
    { "SetTab", 1 },
    { "SetTabCenter", 1 },
    { "SetTabRight", 1 },
    { "SetTabDecimal", 1 },
    { "EndTabs", 0 },
    { "HPg", 0 },
    { "CondEOP", 1 },
    { "HRt", 0 },
    { "SRt", 0 },
    { "-", 0 },        /* NHyph */
    { "--", 0 },       /* NHyphE */
    { "=", 0 },        /* HHyph */
    { "\\-", 0 },      /* DHyph */
    { "\\--", 0 },     /* DHyphE */
    { "NoHyphWord", 0 },
    { "Marg", 2 },
    { "TopMarg", 1 },
    { "PageLength", 1 },
    { "SS", 0 },
    { "DS", 0 },
    { "1.5S", 0 },    /* OHS */
    { "TS", 0 },
    { "LS", 1 },
    { "LPI", 1 },
    { "Bold", 0 },
    { "bold", 0 },
    { "Und", 0 },
    { "und", 0 },
    { "Red", 0 },
    { "red", 0 },
    { "Strike", 0 },
    { "strike", 0 },
    { "Rev", 0 },
    { "rev", 0 },
    { "Over", 0 },
    { "over", 0 },
    { "Sup", 0 },
    { "sup", 0 },
    { "Sub", 0 },
    { "sub", 0 },
    { "UpHalfLine", 0 },
    { "DownHalfLine", 0 },
    { "AdvanceToHalfLine", 2 },
    { "Indent", 0 },
    { "DIndent", 0 },
    { "indent", 0 },
    { "MarginRelease", 1 },
    { "Center", 0 },
    { "center", 0 },
    { "CenterHere", 0 },
    { "centerhere", 0 },
    { "Align", 0 },
    { "align", 0 },
    { "AlignChar", 1 },
    { "FlushRight", 0 },
    { "flushright", 0 },
    { "Math", 0 },
    { "math", 0 },
    { "MathCalc", 0 },
    { "MathCalcColumn", 0 },
    { "SubTotal", 0 },
    { "IsSubTotal", 0 },
    { "Total", 0 },
    { "IsTotal", 0 },
    { "GrandTotal", 0 },
    { "NegateTotal", 0 },
    { "Col", 0 },
    { "col", 0 },
    { "Fn", 0 },
    { "fn", 0 },
    { "En", 0 },
    { "en", 0 },
    { "SetFn#", 1 },
    { "FNote#", 0 },
    { "ENote#", 0 },
    { "TableMarker", 0 },
    { "Hyph", 0 },
    { "hyph", 0 },
    { "Just", 0 },
    { "just", 0 },
    { "Wid", 0 },
    { "wid", 0 },
    { "HZone", 2 },
    { "DAlign", 1 },
    { "Header", 0 },
    { "header", 0 },
    { "Footer", 0 },
    { "footer", 0 },
    { "Supp", 1 },
    { "CtrPg", 0 },
    { "SetFont", 2 },
    { "SetBin", 1 },
    { "PN0", 0 },
    { "PN1", 0 },
    { "PN2", 0 },
    { "PN3", 0 },
    { "PN4", 0 },
    { "PN5", 0 },
    { "PN6", 0 },
    { "PN7", 0 },
    { "PN8", 0 },
    { NULL, 0 },  /* UnsupportedPlaceHolder -- keeps match_identifier happy */
    { "set page number column", 0 },
    { "extended tabs", 0 },
    { "underline mode", 0 },
    { "define column", 0 },
    { "footnote attributes", 0 },
    { "paragraph numbering style", 0 },
    { "numbered paragraph", 0 },
    { "begin marked text", 0 },
    { "end marked text", 0 },
    { "define marked text", 0 },
    { "define index mark", 0 },
    { "define math columns", 0 },
    { "WPCorp obsolete", 0 },
    { "WPCorp reserved", 0 },
    { "WPCorp undefined", 0 },
};

/* The file pointer "descriptor" points to our descriptor file
 * and "input" points to our input file.
 *
 * Kinda makes sense that way.
 */

FILE *descriptor, *input;

/* And the function match_identifier(s) takes a string and converts
 * it to its corresponding integer.  Or blows up if it couldn't
 * find one.
 */

int match_identifier(const char *s)
{
  Identifier *I;

  /* Maybe it is a special character */
  if (s[0] == '\'' && s[2] == '\'' && s[3] == '\0')
    return (int) (unsigned char) s[1];

  /* Else it must be a multi-character guy */
  for (I = names; I->name; I++)
    if (!strcmp(I->name, s)) return typeout + (I - names);

  /* Otherwise, I don't know what to do with it */
  error(descriptor, "Unknown identifier %s", s);
  /*NOTREACHED*/
  return 0;
}

/* check_arity ensures that the expansion string is valid */
void check_arity(int ident, char *t)
{
  char *s;
  int arity = 0;
  if (ident > typeout) arity = names[ident-typeout].arity;
  for (s = t; *s; s++) {
    if (*s != '%') continue;
    switch (*++s) {
    case '\n':
      if (s != t+1)
        error(descriptor, "%s: `%%\\n' not at start of expansion",
              names[ident-typeout].name);
        break;
    case '1':
    case 'c':
      if (arity < 1) goto bad_escape;
      break;
    case '2':
      if (arity < 2) goto bad_escape;
      break;
    case '%':
      break;
    default:
bad_escape:
      error(descriptor, "%s: invalid escape `%%%c'", names[ident-typeout].name, *s);
    }
  }
}

/************************************************************************/
/* Reading input from the descriptor file                               */
/************************************************************************/

/* The macro igetc() gets a character from the input file.
 * the macro dgetc() gets a character from the descriptor file.
*/

#define igetc() getc(input)
#define dgetc() getc(descriptor)

/* expand_backslash() is called when a backslash is encountered in
 * the descriptor file.  Its job is to parse a backslash-sequence.
 * The usual C-escapes (\a \b \f \n \r \t \v) are understood, as
 * well as the octal escape \000 [up to three octal digits] and
 * the hex escape \xFF [up to two hex digits].
 */

int expand_backslash(void) {
    int c;

    switch (c = dgetc()) {
	case 'a': c = '\a'; break;
	case 'b': c = '\b'; break;
	case 'f': c = '\f'; break;
	case 'n': c = '\n'; break;
	case 'r': c = '\r'; break;
	case 't': c = '\t'; break;
	case 'v': c = '\v'; break;
	case 'x':
	case 'X': c = parse_hex(descriptor); break;
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7': c = parse_octal(descriptor, c); break;
	default:  /* c = c; */ break;
    }
    return c;
}

/* The function read_identifier() attempts to match an identifier
 * in the descriptor file.  It returns EOF if the end of the descriptor
 * file was reached, or the code of the identifier we found.
 * (or blows up if an error was detected.)
 * We build the identifier in "s", with the help of our
 * pool-managing functions above, then discard it, immediately,
 * since we don't use it any more.
 */

int read_identifier(void)
{
  register int c;      /* A character we have read */
  char *s;    /* The identifier we are building */
  int ident;   /* The identifier we found */

  /* Skip over comments */
  while ((c = next_non_whitespace(descriptor)) == '#')
      eat_until_newline(descriptor);

  if (c == EOF) return EOF;

  /* At this point, "c" contains the first letter of a potential
   * identifier.  Let's see what it could possibly be.
   */
  s = anchor_string();
  if (c == '\'') {                      /* a character token */
    add_to_string(c);
    if ((c = dgetc()) == '\\') c = expand_backslash();
    add_to_string(c);
    if ((c = dgetc()) != '\'')
      error(descriptor, "Invalid character identifier");
    add_to_string(c);
    c = next_non_space_or_tab(descriptor);
  } else do {                           /* a name token */
    add_to_string(c);
    c = next_non_space_or_tab(descriptor);
    if (c == '\\') c = expand_backslash();
  } while (c != EOF && c != '=' && c != '\n');

  if (c != '=') error(descriptor, "Identifier not followed by = sign");
      /* A boo-boo.  Something bad happened. */

  add_to_string('\0');   /* Make it a standard C string. */
  finish_string();

  ident = match_identifier(s); /* Go find one. */

  remove_string(s); /* And we're done with it now. */

  return ident;
}

/* The function grab_expansion() reads expansion text from the
 * descriptor file and adds it to the pool, returning a pointer
 * to the string it just created.
 *
 * After anchoring a new string, we look for the opening quotation
 * mark, then start gobbling characters.  Everything gets copied
 * straight into the string.
 *
 */

char *grab_expansion(void)
{
  register int c; /* Characters being read */
  char *s;   /* The string we are building */

  s = anchor_string();

  if (next_non_whitespace(descriptor) != '\"')
    error(descriptor, "Quotation mark expected");

  /* Now read the stream until we hit another quotation mark. */

  while ((c = dgetc()) != EOF && c != '\"') {
    if (c == '\\') c = expand_backslash();
    add_to_string(c);
  }
  add_to_string('\0');
  finish_string();
  return s;
}

/* Ah, now with all of these beautiful functions waiting for us,
 * we can now write our first Useful Function:  do_descriptor_file.
 * It reads the descriptor file and loads up the "expansion" array
 * with the text expansions we are reading from the file.
 *
 * If we grabbed the expansion of a "typeout", we type it out
 * and discard the string.
 *
 * We stop when the descriptor file runs dry.
 *
 */

void do_descriptor_file(void)
{
  register int ident;

  while ((ident = read_identifier()) != EOF) {
    expansion[ident] = grab_expansion();
    if (ident == typeout && !silent) {
      fputs(expansion[typeout], stderr); remove_string(expansion[typeout]);
      expansion[typeout] = NULL;
    } else check_arity(ident, expansion[ident]);
  }
}

/************************************************************************/
/* Reading from the input file                                          */
/************************************************************************/

/* The function verify(c) checks that the next character in the input
 * stream is indeed "c".  It eats the character, if all is well.
 * If something went wrong, we complain to stderr, but keep going.
 */

void verify(int c)
{
  int d = igetc();
  if (d != c) fprintf(stderr, "Warning: Expected %02X but received %02X.\n", c, d);
}

/* The function gobble(n) simply eats "n" characters from the input
 * file.
 */
void gobble(int n)
{
  while (n--) (void) igetc();
}

int last_HRt = 0;                       /* most recent output was HRt */

/* Processing a special code simply entails dumping its expansion.
 * If the expansion is NULL, then we either
 *   [1] print nothing, if it is a code,
 *   [2] print the character itself, if it is an ASCII character.
 *
 * In dumping its expansion, we expand the following percent-escapes:
 *
 *  The percent-escapes are:
 *      %\n  -- newline if previous character was not a newline
 *              (meaningful only as first character in sequence)
 *      %1   -- first parameter, in decimal form
 *      %2   -- second parameter, in decimal form
 *      %c   -- first parameter, in character form
 *      %%   -- literal percent sign
 *
 *  all other %-escapes are flagged as warnings (but should never occur,
 *  since they are trapped at the time the descriptor file is read.)
 */
void process(int c, int d1, int d2)
{
  char *s;
  static int last_newline = 0;

  last_HRt = 0;                         /* the killer switch sets this */

  if (expansion[c] == NULL) {           /* invent a default action */
    if (c >= ' ' && c < 128) {
      putchar(c);                       /* regular characters emit themselves */
      last_newline = 0;
      return;
    } else if (c < 256) {               /* single character */
      expansion[c] = anchor_string(); /* emits itself */
      add_to_string(c); add_to_string('\0');
      finish_string();
      if (!silent) fprintf(stderr, "Warning: No expansion for %02X (%c)\n", c, c);
    } else {                            /* provide null expansion */
      expansion[c] = "";
      if (!silent) {
        fprintf(stderr, "Warning: No expansion for %s\n", names[c-typeout].name);
      }
    }
  }

  s = expansion[c];
  if (!*s) return;    /* the rest of the code assumes non-null string */
  do {
    if (*s != '%') putchar(*s++);
    else {
      s++;
      switch (*s++) {
      case '\n':
        if (!last_newline) putchar('\n'); break;
      case '1':
        printf("%d", d1); break;
      case '2':
        printf("%d", d2); break;
      case 'c':
        putchar(d1); break;
      case '%':
        putchar('%'); break;
      default:
        fprintf(stderr, "Internal error:  Invalid escape, %%%c\n", s[-1]);
        break;
      }
    }
  } while (*s);
  last_newline = s[-1] == '\n';
}

#define process0(c)     process(c,0,0)
#define process1(c,a)   process(c,a,0)
#define process2(c,a,b) process(c,a,b)

void unsupported(int c)
{
  if (!silent && !expansion[c]) {
    expansion[c] = "";
    fprintf(stderr, "Warning: `%s' code not supported\n", names[c-typeout].name);
  }
  process0(Comment); fputs(names[c-typeout].name, stdout); process0(eComment);
}

/* The function gobble_until(c) eats characters from the input file
 * until it reaches a c or reaches EOF.
 */
void gobble_until(int c)
{
  int i;
  while ((i = igetc()) != EOF && (int) (unsigned char) i != c) do_nothing;
}

/* line_spacing(l) is called whenever we hit a line-spacing-change command.
 * The argument is the desired line spacing, multiplied by two.
 * So single spacing gets a 2, 1.5 spacing gets a 3, etc.
 */
void line_spacing(int l)
{
  switch (l) {
    case 2: process0(SS); break;
    case 3: process0(OHS); break;
    case 4: process0(DS); break;
    case 6: process0(TS); break;
    default: process1(LS, l); break;
  }
}

int environment_status = 0;             /* cleanup at HRt */
void leave_environment(int force_HRt) {
    if (environment_status) {
      process0(environment_status);
      environment_status = 0;
    }
    if (force_HRt && !last_HRt) process0(HRt);
}

/* The "note_status" flag has one of three values:
 *    0   if we are not inside a note
 *    1   if we are inside a footnote
 *    2   if we are inside an endnote
 *
 * The function handle_note() is called to deal with footnotes and
 * endnotes.  It adjusts the note_status accordingly.
 */

int note_status = 0;

void handle_note(void)
{
  if (note_status) {
    leave_environment(1); process0(note_status); note_status = 0;
  } else {          /* Decide whether it is an endnote or a footnote */
    if (igetc() & 2)  { process0(En); note_status = eEn; gobble(5); }
                else  { process0(Fn); note_status = eFn; gobble(7); }
    verify(0xFF);
    gobble(2);                                  /* margins */
  }
}

/* The tab_table is a bit field.  Each set bit represents a tabstop.
 * Note, however, that the bits are counted from MSB to LSB.
 *
 * The tab_attribute_table is a nybble field.  The n'th nybble represents
 * the attributes of the n'th tabstop.
 */
unsigned char tab_table[32];
unsigned char tab_attribute_table[20];
int next_attribute;

void process_tab_attribute(int i) {
    int b;

    if (next_attribute & 1) b = tab_attribute_table[next_attribute/2] & 3;
    else b = (tab_attribute_table[next_attribute/2] / 16) & 3;
    next_attribute++;

    /* Bottom two bites define what kind of tab.
     * Bit 2 is set if we need dot filling.
     * Bit 3 is unused.
     * We `&3' above because we won't support dot filling.
     */
    process1(SetTab + b, i);
}

void process_tab_table(void) {
    int i;
    next_attribute = 0;

    process0(BeginTabs);
    for (i = 0; i < 32; i++) {
        if (tab_table[i] == 0) continue;    /* early out */
        if (tab_table[i] & 0x80) process_tab_attribute(i * 8 + 0);
        if (tab_table[i] & 0x40) process_tab_attribute(i * 8 + 1);
        if (tab_table[i] & 0x20) process_tab_attribute(i * 8 + 2);
        if (tab_table[i] & 0x10) process_tab_attribute(i * 8 + 3);
        if (tab_table[i] & 0x08) process_tab_attribute(i * 8 + 4);
        if (tab_table[i] & 0x04) process_tab_attribute(i * 8 + 5);
        if (tab_table[i] & 0x02) process_tab_attribute(i * 8 + 6);
        if (tab_table[i] & 0x01) process_tab_attribute(i * 8 + 7);
    }
    process0(EndTabs);
}

void handle_tabs(void) {
    /* pad the tables to force no new tabs, and left tabs everywhere */
    memset(tab_table, 0, sizeof(tab_table));
    memset(tab_attribute_table, 0, sizeof(tab_attribute_table));

    fread(tab_table, 20, 1, input);     /* old-style tabs */
    process_tab_table();
}

void handle_extended_tabs(void) {
    fread(tab_table, 32, 1, input);
    fread(tab_attribute_table, 20, 1, input);
    process_tab_table();
}

/* The FF_status flag tells us what we should do when we encounter an 0xFF.
 * It contains the token code of the active code, or 0 if no code is active.
 */

int FF_status = 0;

void handle_FF(void)
{
    if (FF_status) {                            /* finish header/footer */
        leave_environment(1);
        process0(FF_status);
        gobble(2);
        verify(0xD1);
        FF_status = 0;
    } else process0(0xFF);
}

/* The function process_token does all of the real work.
 * Given the first character of a token, we eat up everything
 * that belongs to that token.  This routine might be called
 * recursively, since some tokens are defined in terms of other
 * tokens.  (For example, the subscript code is expanded as
 *   [Sub] <character being subscripted> [sub]
 * and the <character being subscripted> might involve other token
 * expansions; specifically, it might be an IBM Extended character.)
 *
 * Luckily, most of our tokens are not recursive.  The macro
 *     bracket(before, after)
 * does the recursive stuff for us, bracketing the next token
 * between expansions of "before" and "after".
 *
 */

#define bracket(before,after) process0(before); process_token(); \
                              process0(after);

int process_token(void)
{
  int c = igetc();

  if (c == EOF) return 0;

  c = (int) (unsigned char) c;

  if (!--blipcount && !silent) {
    blipcount = blipinterval;
    putc('.', stderr);
  }

  switch (c) {   /* Codes listed in numerical rather than logical order */

   case 0x02: process0(PageNo); break;                   /* Page number */

   case 0x09: process0(Tab); break;                    /* Tab character */

   case 0x8C:                            /* Soft page break after a HRt */
   case 0x0A:                                            /* Hard Return */
              last_HRt = 0; leave_environment(1); last_HRt = 1; break;
   case 0x0B:                            /* Soft page break after a SRt */
   case 0x0D: process0(SRt); break;                      /* Soft Return */

   case 0x0C: process0(HPg); break;                        /* Hard Page */

   case '-' : process0(HHyph); break;             /* Nonbreaking hyphen */

   case 0x80: break;                                             /* NOP */
   case 0x81: process0(Just); break;             /* Right justification */
   case 0x82: process0(eJust); break;                   /* Ragged right */
   case 0x83:                                          /* End centering */
   case 0x84: leave_environment(0); break;          /* End aligned text */
   case 0x85: process0(MathCalc); break;             /* Begin math calc */
   case 0x86: process0(CtrPg); break;         /* Center page vertically */
   case 0x87: process0(Col); break;                /* Begin column mode */
   case 0x88: process0(eCol); break;                 /* End column mode */
   case 0x89: process0(Tab); break;           /* Tab after right margin */
   case 0x8A: process0(Wid); break;          /* Widow/orphan protection */
   case 0x8B: process0(eWid); break;            /* Allow widows/orphans */
/* case 0x8C: see 0x0A */
   case 0x8D:                                /* Footnote/Endnote number */
              process0(note_status == eFn ? FNoteNum : ENoteNum); break;
   case 0x8E:
   case 0x8F: unsupported(ReservedCode); break;       /* Reserved codes */
   case 0x90: process0(Red); break;                    /* Begin redline */
   case 0x91: process0(eRed); break;                     /* End redline */
   case 0x92: process0(Strike); break;               /* Begin strikeout */
   case 0x93: process0(eStrike); break;                /* End strikeout */
   case 0x94: process0(Und); break;                /* Begin underlining */
   case 0x95: process0(eUnd); break;                 /* End underlining */
   case 0x96: process0(Rev); break;              /* Begin reverse video */
   case 0x97: process0(eRev); break;               /* End reverse video */
   case 0x98: process0(TableMarker); break;/* Table of something marker */
   case 0x99: bracket(Over, eOver); break;                /* Overstrike */
   case 0x9A: process0(NoHyphWord); break;/* Do not hyphenate this word */
   case 0x9B: break;                           /* End of generated text */
   case 0x9C: process0(eBold); break;                   /* End boldface */
   case 0x9D: process0(Bold); break;                  /* Begin boldface */
   case 0x9E: process0(eHyph); break;             /* Forbid hyphenation */
   case 0x9F: process0(Hyph); break;               /* Allow hyphenation */
   case 0xA0: process0(HSpace); break;                    /* Hard space */
   case 0xA1: process0(SubTtl); break;                   /* Do subtotal */
   case 0xA2: process0(IsSubTtl); break;              /* Subtotal entry */
   case 0xA3: process0(Ttl); break;                         /* Do total */
   case 0xA4: process0(IsTtl); break;                    /* Total entry */
   case 0xA5: process0(GrandTtl); break;              /* Do grand total */
   case 0xA6: process0(MathCalcColumn); break;      /* Math calc column */
   case 0xA7: process0(Math); break;                 /* Begin math mode */
   case 0xA8: process0(eMath); break;                  /* End math mode */
   case 0xA9: process0(NHyph); break;        /* Normal breakable hyphen */
   case 0xAA:                                  /* Hyphen at end of line */
   case 0xAB: process0(NHyphE); break;         /* Hyphen at end of page */
   case 0xAC: process0(DHyph); break;           /* Discretionary hyphen */
   case 0xAD:                           /* Discretionary hyphen at EOLn */
   case 0xAE: process0(DHyphE); break;  /* Discretionary hyphen at EOPg */
   case 0xAF:                                   /* EOT columns and EOLn */
   case 0xB0: break;                            /* EOT columns and EOPg */

   case 0xB1: process0(NegateTotal); break;     /* Negate current total */

   case 0xBC: bracket(Sup, eSup); break;                 /* Superscript */
   case 0xBD: bracket(Sub, eSub); break;                   /* Subscript */
   case 0xBE: process0(UpHalfLine); break;       /* Advance 1/2 line up */
   case 0xBF: process0(DownHalfLine); break;   /* Advance 1/2 line down */

   case 0xC0: gobble(2); c = igetc();                  /* Margin change */
              process2(Marg, c, igetc()); verify(0xC0); break;

   case 0xC1: gobble(1); line_spacing(igetc()); verify(0xC1); break;
                                                 /* Line spacing change */

   case 0xC2: process1(MargRel, igetc());             /* Margin release */
              verify(0xC2); break;


   case 0xC3:                                            /* Center text */
              leave_environment(0);
              switch (igetc()) {
              case 0: process0(Center);       /* Center between margins */
                      environment_status = eCenter; break;
              case 1:                   /* Center around current column */
                      process0(CenterHere);
                      environment_status = eCenterHere; break;
              }
              gobble(2); verify(0xC3); break;

   case 0xC4:                                   /* Align or Flush Right */
              leave_environment(0);
              c = igetc();
              /* if high bit on c is set, then dot fill.  (Ignore)      */
              switch (c & 0x7f) {
              case 0x0C:
              case 0x0A: process1(FlushRight, igetc());/* alignment col */
                         environment_status = eFlushRight;
                         break;
              default:   process2(Align, c, igetc());/* alignment column */
                         environment_status = eAlign;
                         break;
              }
              gobble(1);                                       /* trash */
              verify(0xC4);
              break;

   case 0xC5: gobble(2); c = igetc();               /* Hyphenation zone */
              process2(HZone, c, igetc()); verify(0xC5); break;

   case 0xC6: gobble(1);                        /* Page number position */
              process0(PN + igetc()); verify(0xC6); break;

   case 0xC7: gobble(2); c = igetc();                /* New page number */
              c = (c<<8) + (unsigned char)igetc();
              process1( (c&0x8000) ? RomanPage : ArabicPage, c&0x7fff);
              verify(0xC7); break;

   case 0xC8: gobble(3);                      /* Set Page number column */
              /* next 3 bytes are <left> <center> <right> */
              gobble(3);
              unsupported(SetPageNumberColumn);
              verify(0xC8); break;

   case 0xC9: gobble(20);                                   /* Set tabs */
              handle_tabs();
              verify(0xC9); break;

   case 0xCA: process1(CondEOP, igetc());    /* Conditional end of page */
              verify(0xCA); break;

   case 0xCB:                                      /* Set pitch or font */
              gobble(2);                          /* old pitch and font */
              c = igetc();
              process2(SetFont, c, igetc());   /* pitch and font number */
                              /* negative pitch means proportional font */
              verify(0xCB); break;

   case 0xCC:                                     /* Indented paragraph */
              leave_environment(0);
              gobble(1); process1(Indent, igetc()); verify(0xCC);
              environment_status = eIndent; break;
                                          /* (really: Temporary margin) */

   case 0xCD:                          /* Indented paragraph (obsolete) */
              leave_environment(0);
              process1(Indent, igetc()); verify(0xCD);
              environment_status = eIndent; break;
                                          /* (really: Temporary margin) */

   case 0xCE: gobble(1); process1(TopMarg, igetc());  /* Set top margin */
              verify(0xCE); break;

   case 0xCF:                 /* Suppress headers/footers for this page */
              process1(Supp, (unsigned char)igetc());
              verify(0xCF); break;

   case 0xD0: gobble(2); /* old form length */       /* Set page length */
              process1(PageLength, igetc());          /* lines per page */
              gobble(1);                             /* new page length */
              verify(0xD0); break;

   case 0xD1:                                          /* header/footer */
              c = igetc();                                  /* def byte */
              gobble(1);                              /* old half-lines */
              if (c&2) { process0(Footer); FF_status = eFooter; }
                  else { process0(Header); FF_status = eHeader; }
              verify(0xFF); verify(0xFF);                  /* separator */
              gobble(2);                       /* left and right margin */
              break;                             /* continue processing */

   case 0xD2: gobble(5);                           /* obsolete footnote */
              unsupported(Obsolete);
              gobble_until(0xD2);
              break;

   case 0xD3: gobble(2);              /* obsolete `set footnote number' */
              unsupported(Obsolete);
              verify(0xD3);
              break;

   case 0xD4:                            /* Advance to half line number */
              c = igetc(); /* current line number */
              process2(AdvanceToHalfLine, c, igetc());/* desired line # */
              verify(0xD4); break;

   case 0xD5: gobble(1); process1(LPI, igetc());    /* Set LPI (6 or 8) */
              verify(0xD5); break;

   case 0xD6:                                      /* set extended tabs */
              /* next 4 bytes are <old start><old increment>
                                  <new start><new increment> */
              gobble(4);
              unsupported(SetTabs);
              verify(0xD6); break;

   case 0xD7: gobble(63);                        /* Define math columns */
              unsupported(DefineMathColumns);
              verify(0xD7); break;

   case 0xD8: gobble(1); process1(AlignChar, igetc());
              verify(0xD8); break;           /* Set alignment character */

   case 0xD9: gobble(2);                     /* obsolete margin release */
              unsupported(Obsolete);
              verify(0xD9);
              break;

   case 0xDA: gobble(1+1);                        /* Set underline mode */
              /* second byte is a bit field.
               *       1 = double-underline (default single),
               *       2 = underline spaces (default don't)
               */
              unsupported(SetUnderlineMode);
              verify(0xDA); break;

   case 0xDB:                                   /* Set sheet feeder bin */
              gobble(1); process1(SetBin, igetc());
              verify(0xDB); break;

   /* We ignore these codes, since they are followed by an 0x0C or an 0x8C */
   case 0xDC: gobble(7); verify(0xDC); break;      /* End-of-page codes */

   case 0xDD: gobble(22);                             /* define columns */
              unsupported(DefineColumn);
              verify(0xDD);

   case 0xDE: environment_status = 0;         /* End indented paragraph */
              gobble(2); process0(eIndent); verify(0xDE); break;

   case 0xDF:                                   /* invisible characters */
              gobble_until(0xDF);
              break;

   case 0xE0:                              /* Doubly-indented paragraph */
              leave_environment(0);
              gobble(1); process1(DIndent, igetc()); verify(0xE0);
              environment_status = eIndent; break;

   case 0xE1: process0((unsigned char)igetc()); verify(0xE1); break;
                                                       /* IBM character */

   case 0xE2: handle_note(); break;              /* footnote or endnote */

   case 0xE3: gobble(74+74);                     /* footnote attributes */
              unsupported(SetFootnoteAttributes);
              verify(0xE3);
              break;

   case 0xE4: gobble(2); /* old */               /* set footnote number */
              /* bit 7 of second byte doesn't count, and the value
               * is offset by one.
               */
              c = igetc() & 0x3f;
              c = (c << 7) + (igetc() & 0x7f);
              process1(SetFnNum, 1 + c);
              verify(0xE4);
              break;

   case 0xE5:                              /* paragraph numbering style */
              gobble(7+7+7+7);
              unsupported(SetParagraphNumberingStyle);
              verify(0xE5);
              break;

   case 0xE6:                                       /* paragraph number */
              gobble(2+7);
              unsupported(NumberedParagraph);
              verify(0xE6);
              break;

   case 0xE9:                                      /* begin marked text */
              gobble(6);
              unsupported(BeginMarkedText);
              verify(0xE9);
              break;

   case 0xEA:                                        /* end marked text */
              unsupported(EndMarkedText);
              gobble_until(0xEA);
              break;

   case 0xEB:                                     /* define marked text */
              gobble(30);
              unsupported(DefineMarkedText);
              verify(0xEB);
              break;

   case 0xEC:                                      /* define index mark */
              gobble(2);
              unsupported(DefineIndexMark);
              verify(0xEC);
              break;

   case 0xED:                                   /* Table of authorities */
              unsupported(DefineIndexMark);
              gobble_until(0xED);
              break;
   case 0xEE:                                   /* paragraph number def */
              gobble(42);
              unsupported(SetParagraphNumberingStyle);
              verify(0xEE);
              break;

   case 0xEF:                                       /* paragraph number */
              gobble(16);
              unsupported(NumberedParagraph);
              verify(0xEF);
              break;

   case 0xF1: gobble(32 + 20);                          /* Tab settings */
              handle_extended_tabs();
              verify(0xF1);
              break;

   case 0xF3:                                      /* column definition */
              gobble(98);
              unsupported(DefineColumn);
              verify(0xF3);
              break;


   case 0xB2:
   case 0xB3:
   case 0xB4:
   case 0xB5:
   case 0xB6:
   case 0xB7:
   case 0xB8:
   case 0xB9:
   case 0xBA:

   case 0xF0:

   case 0xF2:
   case 0xF4:
   case 0xF5:
   case 0xF6:
   case 0xF7:
   case 0xF8:
   case 0xF9:
   case 0xFA:
   case 0xFB:
   case 0xFC:
   case 0xFD:
   case 0xFE: unsupported(UnknownCode);  break;      /* undefined codes */

   case 0xFF: handle_FF(); break;

   default: process0(c); break;
  }
  return 1;
}

/* Now do the other Useful Function.
 */
void process_input(void)
{
  process0(BEGIN);
  while (process_token()) do_nothing;
  process0(END);
}


/************************************************************************/
/* The main program                                                     */
/************************************************************************/

/* First, a pretty little function which tries to open a file and
 * complains loudly if it cannot.
 */

FILE *efopen(const char *s, const char *m)
{
  FILE *fp = fopen(s, m);

  if (fp == NULL) {
    fprintf(stderr, "Error: Cannot open %s", s);
    if (errno > 0 && errno < sys_nerr)
        fprintf(stderr, " (%s)\n", s, sys_errlist[errno]);
    fprintf(stderr, "\n");
    exit(1);
  }

  return fp;
}

#include "dopen.c"			/* ickiness with file opening */

/* Our main program does very little, really.
 *
 * After checking the command line, it proceeds to open the descriptor
 * file in text mode, and the input file in binary mode.
 * It then calls our two Useful Functions in turn, closing each file
 * after it has served its purpose.
 */

int Cdecl main(int argc, char **argv)
{
  while (--argc && **++argv == '-') {
    while (*++*argv) switch (**argv) {
    case 's': silent = 1; break;
    case 'n': blipinterval = atoi(&argv[0][1]); goto finarg;
    default:  goto usage;
    }
finarg: ;
  }
  blipcount = blipinterval;

  if (argc != 2) {
usage:
    fprintf(stderr, "usage: wp2x descriptor input > output\n");
    exit(2);
  }

  dopen(argv[0]);
  input = efopen(argv[1], "rb");

  do_descriptor_file();
  fclose(descriptor);

  process_input();
  fclose(input);
  return 0;
}
