/* bobscn.c - a lexical scanner */
/*
	Copyright (c) 1991, by David Michael Betz
	All rights reserved
*/

#include <setjmp.h>
#include "bob.h"

/* useful definitions */
#define LSIZE	200

/* keyword table */
static struct { char *kt_keyword; int kt_token; } ktab[] = {
{ "class",	T_CLASS		},
{ "static",	T_STATIC	},
{ "if",		T_IF		},
{ "else",	T_ELSE		},
{ "while",	T_WHILE		},
{ "return",	T_RETURN	},
{ "for",	T_FOR		},
{ "break",	T_BREAK		},
{ "continue", 	T_CONTINUE	},
{ "do",		T_DO		},
{ "new",	T_NEW		},
{ "nil",	T_NIL		},
{ NULL,		0		}};

/* token name table */
static char *t_names[] = {
"<eof>",
"<string>",
"<identifier>",
"<number>",
"class",
"static",
"if",
"else",
"while",
"return",
"for",
"break",
"continue",
"do",
"new",
"<=",
"==",
"!=",
">=",
"<<",
">>",
"&&",
"||",
"++",
"--",
"+=",
"-=",
"*=",
"/=",
"::",
"->"};

/* global variables */
int t_value;		/* numeric value */
char t_token[TKNSIZE+1];/* token string */

/* local variables */
static int (*getcf)();	/* getc function */
static void *getcd;	/* getc data */
static int savetkn;	/* look ahead token */
static int savech;	/* look ahead character */
static int lastch;	/* last input character */
static char line[LSIZE];/* last input line */
static char *lptr;	/* line pointer */
static int lnum;	/* line number */

/* init_scanner - initialize the scanner */
init_scanner(gf,gd)
  int (*gf)(); void *gd;
{
    /* remember the getc function and data */
    getcf = gf; getcd = gd;

    /* setup the line buffer */
    lptr = line; *lptr = '\0';
    lnum = 0;

    /* no lookahead yet */
    savetkn = T_NOTOKEN;
    savech = '\0';

    /* no last character */
    lastch = '\0';
}

/* token - get the next token */
int token()
{
    int tkn;

    if ((tkn = savetkn) != T_NOTOKEN)
	savetkn = T_NOTOKEN;
    else
	tkn = rtoken();
    return (tkn);
}

/* stoken - save a token */
stoken(tkn)
  int tkn;
{
    savetkn = tkn;
}

/* tkn_name - get the name of a token */
char *tkn_name(tkn)
  int tkn;
{
    static char tname[2];
    if (tkn <= _TMAX)
	return (t_names[tkn]);
    tname[0] = tkn;
    tname[1] = '\0';
    return (tname);
}

/* rtoken - read the next token */
static int rtoken()
{
    int ch,ch2;

    /* check the next character */
    for (;;)
	switch (ch = skipspaces()) {
	case EOF:	return (T_EOF);
	case '"':	return (getstring());
	case '\'':	return (getcharacter());
	case '<':	switch (ch = getch()) {
			case '=':
			    return (T_LE);
			case '<':
			    return (T_SHL);
			default:
			    savech = ch;
			    return ('<');
			}
	case '=':	switch (ch = getch()) {
			case '=':
			    return (T_EQ);
			default:
			    savech = ch;
			    return ('=');
			}
	case '!':	switch (ch = getch()) {
			case '=':
			    return (T_NE);
			default:
			    savech = ch;
			    return ('!');
			}
	case '>':	switch (ch = getch()) {
			case '=':
			    return (T_GE);
			case '>':
			    return (T_SHR);
			default:
			    savech = ch;
			    return ('>');
			}
	case '&':	switch (ch = getch()) {
			case '&':
			    return (T_AND);
			default:
			    savech = ch;
			    return ('&');
			}
	case '|':	switch (ch = getch()) {
			case '|':
			    return (T_AND);
			default:
			    savech = ch;
			    return ('|');
			}
	case '+':	switch (ch = getch()) {
			case '+':
			    return (T_INC);
			case '=':
			    return (T_ADDEQ);
			default:
			    savech = ch;
			    return ('+');
			}
	case '-':	switch (ch = getch()) {
			case '-':
			    return (T_DEC);
			case '=':
			    return (T_SUBEQ);
			case '>':
			    return (T_MEMREF);
			default:
			    savech = ch;
			    return ('-');
			}
	case '*':	switch (ch = getch()) {
			case '=':
			    return (T_MULEQ);
			default:
			    savech = ch;
			    return ('*');
			}
	case '/':	switch (ch = getch()) {
			case '=':
			    return (T_DIVEQ);
			case '/':
			    while ((ch = getch()) != EOF)
				if (ch == '\n')
				    break;
			    break;
			case '*':
			    ch = ch2 = EOF;
			    for (; (ch2 = getch()) != EOF; ch = ch2)
				if (ch == '*' && ch2 == '/')
				    break;
			    break;
			default:
			    savech = ch;
			    return ('/');
			}
			break;
	case ':':	switch (ch = getch()) {
			case ':':
			    return (T_CC);
			default:
			    savech = ch;
			    return (':');
			}
	default:	if (isdigit(ch))
			    return (getnumber(ch));
			else if (isidchar(ch))
			    return (getid(ch));
			else {
			    t_token[0] = ch;
			    t_token[1] = '\0';
			    return (ch);
			}
	}
}

/* getstring - get a string */
static int getstring()
{
    char *p;
    int ch;

    /* get the string */
    p = t_token;
    while ((ch = literalch()) != EOF && ch != '"')
	*p++ = ch;
    if (ch == EOF)
	savech = EOF;
    *p = '\0';
    return (T_STRING);
}

/* getcharacter - get a character constant */
static int getcharacter()
{
    t_value = literalch();
    t_token[0] = t_value;
    t_token[1] = '\0';
    if (getch() != '\'')
	parse_error("Expecting a closing single quote");
    return (T_NUMBER);
}

/* literalch - get a character from a literal string */
static int literalch()
{
    int ch;
    if ((ch = getch()) == '\\')
	switch (ch = getch()) {
	case 'n':  ch = '\n'; break;
	case 't':  ch = '\t'; break;
	case EOF:  ch = '\\'; savech = EOF; break;
	}
    return (ch);
}

/* getid - get an identifier */
static int getid(ch)
  int ch;
{
    char *p;
    int i;

    /* get the identifier */
    p = t_token; *p++ = ch;
    while ((ch = getch()) != EOF && isidchar(ch))
	*p++ = ch;
    savech = ch;
    *p = '\0';

    /* check to see if it is a keyword */
    for (i = 0; ktab[i].kt_keyword != NULL; ++i)
	if (strcmp(ktab[i].kt_keyword,t_token) == 0)
	    return (ktab[i].kt_token);
    return (T_IDENTIFIER);
}

/* getnumber - get a number */
static int getnumber(ch)
  int ch;
{
    char *p;

    /* get the number */
    p = t_token; *p++ = ch; t_value = ch - '0';
    while ((ch = getch()) != EOF && isdigit(ch)) {
	t_value = t_value * 10 + ch - '0';
	*p++ = ch;
    }
    savech = ch;
    *p = '\0';
    return (T_NUMBER);
}

/* skipspaces - skip leading spaces */
static skipspaces()
{
    int ch;
    while ((ch = getch()) != '\0' && isspace(ch))
	;
    return (ch);
}

/* isidchar - is this an identifier character */
static int isidchar(ch)
  int ch;
{
    return (isupper(ch)
         || islower(ch)
         || isdigit(ch)
         || ch == '_');
}

/* getch - get the next character */
static int getch()
{
    int ch;
    
    /* check for a lookahead character */
    if ((ch = savech) != '\0')
	savech = '\0';

    /* check for a buffered character */
    else {
	while ((ch = *lptr++) == '\0') {

	    /* check for being at the end of file */
	    if (lastch == EOF)
		return (EOF);

	    /* read the next line */
	    lptr = line;
	    while ((lastch = (*getcf)(getcd)) != EOF && lastch != '\n')
		*lptr++ = lastch;
	    *lptr++ = '\n'; *lptr = '\0';
	    lptr = line;
	    ++lnum;
	}
    }

    /* return the current character */
    return (ch);
}

/* parse_error - report an error in the current line */
parse_error(msg)
  char *msg;
{
    extern jmp_buf error_trap;
    char buf[LSIZE],*src,*dst;

    /* redisplay the line with the error */
    sprintf(buf,">>> %s <<<\n>>> in line %d <<<\n%s",msg,lnum,line);
    osputs(buf);

    /* point to the position immediately following the error */
    for (src = line, dst = buf; src < lptr-1; ++src)
	*dst++ = (*src == '\t' ? '\t' : ' ');
    *dst++ = '^'; *dst++ = '\n'; *dst = '\0';
    osputs(buf);

    /* invoke the error trap */
    longjmp(error_trap,1);
}
