UNIT Tokenizer; { Turbo Pascal 4.0-up; QuickPascal 1.0 }

INTERFACE
  USES   Dos, FNameHax, Back1Hax;

TYPE   Tkind =
         (newline,whitespace,symbol,number,
          qstring,bracom,eolcom,special);

CONST  KindName:
           ARRAY[Tkind] OF string[10] =
             ('newline','whitespace','symbol','number',
              'qstring','bracom','eolcom','special');

VAR    KindOfToken: Tkind;

PROCEDURE ReadToken(VAR f:text; VAR t:string);
{ - Reads a token from f into t, consuming
      exactly the characters that make up the token.
  - Sets global variable KindOfToken to the kind of token read.
  - KindName[KindOfToken] is then a printable description
      of the kind of token. }

IMPLEMENTATION

CONST  WhiteSpaces:   SET OF char = [#0..#9,#11..#12,#14..' '];
       Alphabetics:   SET OF char = ['a'..'z','A'..'Z','_'];
       Digits:        SET OF char = ['0'..'9'];

PROCEDURE ReadToken(VAR f:text; VAR t:string);

LABEL  OneTooMany, AllDone;

VAR    c, quote, prev: Char;

BEGIN
  read(f,c); t:=c;                   { Get first char of token }

  IF (c=^M) THEN                            { NEWLINE - Type 1 }
    BEGIN                                    { ^M^J or just ^M }
      KindOfToken := newline;
      read(f,c); t:=t+c;          { Look at the next character }
      IF c=^J THEN      { If it's ^J, it belongs to this token }
         GOTO AllDone                             { so keep it }
      ELSE
         GOTO OneTooMany   { Otherwise we've read one too many }
    END

  ELSE IF (c=^J) THEN                       { NEWLINE - Type 2 }
    BEGIN                                       { ^J by itself }
      KindOfToken := newline;
      GOTO AllDone
    END

  ELSE IF c IN WhiteSpaces THEN                   { WHITESPACE }
    BEGIN
      KindOfToken := whitespace;
      REPEAT
        read(f,c); t:=t+c
      UNTIL NOT (c IN WhiteSpaces);
      GOTO OneTooMany
    END

  ELSE IF c IN Alphabetics THEN                       { SYMBOL }
    BEGIN
      KindOfToken := symbol;
      REPEAT
        read(f,c); t:=t+c
      UNTIL (NOT (c IN Alphabetics)) AND (NOT (c in Digits));
      GOTO OneTooMany
    END

  ELSE IF (c='''') OR (c='"') THEN             { QUOTED STRING }
    BEGIN
      KindOfToken := qstring;
      quote := c;
      REPEAT
        read(f,c); t:=t+c;
        IF (c=quote) THEN
          IF NextChar(f) <> quote THEN   { found end of string }
            GOTO AllDone
          ELSE          { it's a quote written double; grab it }
            BEGIN
              read(f,c); t:=t+c
            END
      UNTIL eoln(f);
      FileError(f,'Expecting '+quote+', found end of line')
    END

  ELSE IF (c IN Digits) OR
    ((c='.') AND (NextChar(f) IN Digits)) THEN        { NUMBER }
    BEGIN
      KindOfToken := number;
      IF c<>'.' THEN
        REPEAT
          read(f,c); t:=t+c                   { Some digits... }
        UNTIL NOT (c IN Digits);
      IF c='.' THEN                         { Decimal point... }
        REPEAT
          read(f,c); t:=t+c                   { More digits... }
        UNTIL NOT (c IN Digits);
      IF (c='E') or (c='e') THEN           { E for exponent... }
        BEGIN
          read(f,c); t:=t+c;
          IF (c IN Digits) OR
               (c='+') OR (c='-') THEN        { +, -, or digit }
            REPEAT
              read(f,c); t:=t+c           { Digits of exponent }
            UNTIL NOT (c IN Digits)
        END;
      GOTO OneTooMany
    END

  ELSE IF c='{' THEN              { BRACKETED COMMENT - Type 1 }
    BEGIN
      KindOfToken := bracom;
      t:=' ';          { Return a blank to the calling program }
      REPEAT
        read(f,c);
        IF c='}' THEN GOTO AllDone
      UNTIL eof(f);
      FileError(f,'Expecting ''}'', found end of file')
    END

  ELSE IF (c='(') AND
         (NextChar(f)='*') THEN   { BRACKETED COMMENT - Type 2 }
    BEGIN
      KindOfToken := bracom;
      t:=' ';          { Return a blank to the calling program }
      REPEAT
        prev:=c; read(f,c);
        IF (c=')') AND (prev='*') THEN GOTO AllDone
      UNTIL eof(f);
      FileError(f,'Expecting ''*)'', found end of file')
    END

  ELSE IF c='%' THEN                  { COMMENT TO END OF LINE }
    BEGIN
      KindOfToken := eolcom;
      REPEAT
        read(f,c); t:=t+c
      UNTIL (c=^M) OR (c=^J);
      GOTO OneTooMany
    END

  ELSE                    { THE CHARACTER BY ITSELF IS A TOKEN }
    BEGIN
      KindOfToken := special;
      GOTO AllDone
    END;

OneTooMany:    { Jump here if an extra character has been read }
  back1char(f);
  delete(t,length(t),1);

AllDone:          { Jump here if nothing else needs to be done }
END;

END. { Tokenizer }