Extracting Tokens

Parser operates only with tokens - basic elements of the language. Tokens like identifiers and constants have also additional data structures where actual data is stored.
    TToken = (
      Token_EndOfLine,
      Token_Identifier,
      Token_Constant,
      Token_Plus,
      Token_Minus,
      Token_Asterisk,
      Token_Slash,
      Token_Equal,
      Token_NotEqual,
      Token_Greater,
      Token_Lower,
      Token_GreaterOrEqual,
      Token_LowerOrEqual,
      Token_LeftParenthesis,
      Token_RightParenthesis,
      Token_LeftBracket,
      Token_RightBracket,
      Token_Period,
      Token_Comma,
      Token_Colon,
      Token_Semicolon,
      Token_Caret,
      Token_Assignment,
      Token_PeriodPeriod,
      Token_At,
      Token_INLINE_Separator,
      Token_EqualAsSeparator,
      Token_1B,
      Token_AND,
      Token_ARRAY,
      Token_BEGIN,
      Token_CASE,
      Token_CONST,
      Token_CONSTRUCTOR,
      Token_DESTRUCTOR,
      Token_DIV,
      Token_DO,
      Token_DOWNTO,
      Token_ELSE,
      Token_END,
      Token_ASM,
      Token_FILE,
      Token_FOR,
      Token_2B,
      Token_FUNCTION,
      Token_GOTO,
      Token_IF,
      Token_IMPLEMENTATION,
      Token_IN,
      Token_INLINE,
      Token_INTERFACE,
      Token_33,
      Token_LABEL,
      Token_MOD,
      Token_NIL,
      Token_NOT,
      Token_OBJECT,
      Token_OF,
      Token_OR,
      Token_PACKED,
      Token_PROCEDURE,
      Token_PROGRAM,
      Token_RECORD,
      Token_REPEAT,
      Token_SET,
      Token_SHL,
      Token_SHR,
      Token_STRING,
      Token_THEN,
      Token_TYPE,
      Token_TO,
      Token_UNIT,
      Token_UNTIL,
      Token_USES,
      Token_VAR,
      Token_INHERITED,
      Token_WHILE,
      Token_WITH,
      Token_XOR,
      Token_ConstantIdentifier,
      Token_TypeIdentifier,
      Token_VariableIdentifier,
      Token_ProcedureIdentifier,
      Token_UnitIdentifier,
      Token_OBJECT_PROCEDURE_FUNCTION_TypeIdentifier,
      Token_LabelIdentifier,
      Token_SystemProcedure,
      Token_SystemFunction,
      Token_SystemFunctionOrProcedure,
      Token_Port,
      Token_Mem,
      Token_OpenString,
      Token_AsmSegmentReference,
      Token_AsmOffsetReference,
      Token_5E,
      Token_5F,
      Token_60,
      Token_61,
      Token_62,
      Token_63,
      Token_ABSOLUTE,
      Token_PUBLIC,
      Token_PRIVATE);
This is one of the most important procedures in Turbo Pascal. It reads source file by calling NextNonCommentCharacter function and extracts tokens with associated data. This procedure identifies reserved words, string constants, integer constants, real constants, symbols, identifiers and other Pascal language elements.
Procedure GetNextToken;
Var NextChar, TempPtr: PChar;

  Procedure CheckIfIdentifierOrReservedWord;
  Var IdLen: Byte;
      DummyPtr: Pointer;
      DummyWord: Word;
  begin
    IdLen := 0;
    CurrentIdentifierHash := 0;
    While NextChar^ in ['0'..'9', '_', 'A'..'Z', 'a'..'z'] do
      begin
        If IdLen < High (CurrentIdentifier) then
          begin
            Inc (IdLen);
            CurrentIdentifier [IdLen] := NextChar^;
            Inc (CurrentIdentifierHash, Byte (NextChar^) and $DF);
          end;
        Inc (NextChar);
      end;
    CurrentIdentifier [0] := Char (IdLen);
    CurrentIdentifierHash := (CurrentIdentifierHash - IdLen) shl 1;
    If not IsIdentifierInSymbolTable (@ReservedWords, Token, DummyPtr, DummyWord) then Token := Token_Identifier;
  end;

  Function IsRealConstant: Boolean;
  Var Word64Lo, Word64Hi, TempWord64Lo, TempWord64Hi: LongRec;
      MantissaExponent, MantissaExponentIncrement, Exponent: Word;
      ExponentSign: Char;
      SavedFirstChar: PChar;
      Err: Integer;
      RealStr: String [128];
  begin
    IsRealConstant := False;
    SavedFirstChar := NextChar;
    Word64Lo.Long := 0;
    Word64Hi.Long := 0;
    MantissaExponent := 0;
    MantissaExponentIncrement := 0;
    Repeat
      Case NextChar^ of
        '0'..'9': begin
                    If Word64Hi.WordH < $0CCC then
                      begin
                        { Word64 := Word64 * 10 + Digit }
                        Inc (MantissaExponent, MantissaExponentIncrement);
                      end else Inc (MantissaExponent, MantissaExponentIncrement + 1);
                  end;
        '.': If MantissaExponentIncrement = 0 then Dec (MantissaExponentIncrement)
               else Break;
      end;
      Inc (NextChar);
    until not (NextChar^ in ['0'..'9', '.']);
    TempWord64Lo := Word64Lo;
    TempWord64Hi := Word64Hi;
    If UpCase (NextChar^) = 'E' then
      begin
        Inc (NextChar);
        ExponentSign := NextChar^;
        If NextChar^ in ['+', '-'] then Inc (NextChar);
        Exponent := 0;
        While NextChar^ in ['0'..'9'] do
          If Exponent < 1000 then                                               { If exponent is greater than 9999      }
            begin                                                               { TPC evaluates up to 4 digits of       }
              Exponent := 10 * Exponent + Byte (NextChar [0]) - Ord ('0');      { exponent and then takes next digit    }
              Inc (NextChar);                                                   { for next token.                       }
            end else Exit;                                                      { BUG in TPC:                           }
        If ExponentSign = '-' then Exponent := MantissaExponent - Exponent      { 1e-4932 is not calculated correctly   }
          else Exponent := MantissaExponent + Exponent;

      end;
    RealStr [0] := Char (NextChar - SavedFirstChar);
    Move (SavedFirstChar^, RealStr [1], Byte (RealStr [0]));
    Val (RealStr, NumericConstant.Extended, Err);
    IsRealConstant := Err = 0;
  end;


  Procedure CheckRealConstant;
  begin
    If not IsRealConstant then
      begin
        ErrorSourcePosition := NextChar;
        Error (ErrorInRealConstant);
      end;
    ConstantTypeDef := Ptr (SystemUnitSegment, Extended_TypeOffset);
    Token := Token_Constant;
  end;

  Function CheckIntegerNumericConstant (Var NextChar: PChar): TToken;
  begin
    If not IsIntegerNumericConstant (NextChar) then
      begin
        ErrorSourcePosition := NextChar;
        Error (ErrorInIntegerConstant);
      end;
    NumericConstant.LongInt := IntegerNumericConstant;
    ConstantTypeDef := Ptr (SystemUnitSegment, LongInt_TypeOffset);
    CheckIntegerNumericConstant := Token_Constant;
  end;

begin
  CheckStack;
  NextChar := NextNonCommentCharacter;
  ErrorSourcePosition := NextChar;
  Case NextChar [0] of
    #0: begin
          Token := Token_EndOfLine;
          Inc (NextChar);
        end;
    '!', '"',
    '%', '&',
    '('..'/',
    ':'..'@',
    '['..'^',
    '`':      begin
                Case NextChar [0] of
                  '!',
                  '"',
                  '%',
                  '&',
                  '?',
                  '\',
                  '`': Error (SyntaxError);
                  '(': If NextChar [1] = '.' then
                        begin
                          Token := Token_LeftBracket;
                          Inc (NextChar);
                        end else Token := Token_LeftParenthesis;
                  ')': Token := Token_RightParenthesis;
                  '*': Token := Token_Asterisk;
                  '+': Token := Token_Plus;
                  ',': Token := Token_Comma;
                  '-': Token := Token_Minus;
                  '.': Case NextChar [1] of
                        '.': begin
                                Token := Token_PeriodPeriod;
                                Inc (NextChar);
                              end;
                        ')': begin
                                Token := Token_RightBracket;
                                Inc (NextChar);
                              end;
                        else Token := Token_Period;
                      end;
                  '/': Token := TokenForSlash;
                  ':': If NextChar [1] = '=' then
                        begin
                          Token := Token_Assignment;
                          Inc (NextChar);
                        end else Token := Token_Colon;
                  ';': Token := Token_Semicolon;
                  '<': Case NextChar [1] of
                        '>': begin
                                Token := Token_NotEqual;
                                Inc (NextChar);
                              end;
                        '=': begin
                                Token := Token_LowerOrEqual;
                                Inc (NextChar);
                              end;
                        else Token := Token_Lower;
                      end;
                  '=': Token := TokenForEqual;
                  '>': If NextChar [1] = '=' then
                        begin
                          Token := Token_GreaterOrEqual;
                          Inc (NextChar);
                        end else Token := Token_Greater;
                  '@': Token := Token_At;
                  '[': Token := Token_LeftBracket;
                  ']': Token := Token_RightBracket;
                  '^': Token := Token_Caret;
                end;
                Inc (NextChar);
              end;
    '#',
    '''': Token := ProcessStringConstant (NextChar);
    '$':  Token := CheckIntegerNumericConstant (NextChar);
    '0'..'9': begin
                TempPtr := NextChar;
                Repeat
                  Inc (TempPtr);
                until not (TempPtr [0] in ['0'..'9']);
                Case UpCase (TempPtr [0]) of
                  'E': CheckRealConstant;
                  '.': Case TempPtr [1] of
                         '.',
                         ')': Token := CheckIntegerNumericConstant (NextChar);
                         else CheckRealConstant;
                       end;
                  else Token := CheckIntegerNumericConstant (NextChar);
                end;
              end;
    'A'..'Z',
    'a'..'z',
    '_':      CheckIfIdentifierOrReservedWord;
    else Error (SyntaxError);
  end;
  CurrentSourceFile^.CurrentPosition := Ofs (NextChar^);
end;
This function expects specified token and gets the next one or reports error if the expected token is not found.
Procedure ExpectTokenAndGetNext (ExpectedToken: TToken);
Const SpecialErrors = 22;
      ExpectedTokenErrors: Array [1..SpecialErrors] of TExpectedTokenError = (
        (Token: Token_Identifier;       Error: IdentifierExpected),
        (Token: Token_LabelIdentifier;  Error: LabelIdentifierExpected),
        (Token: Token_BEGIN;            Error: BEGIN_expected),
        (Token: Token_END;              Error: END_expected),
        (Token: Token_DO;               Error: DO_expected),
        (Token: Token_OF;               Error: OF_expected),
        (Token: Token_INTERFACE;        Error: INTERFACE_expected),
        (Token: Token_THEN;             Error: THEN_expected),
        (Token: Token_IMPLEMENTATION;   Error: IMPLEMENTATION_expected),
        (Token: Token_UNIT;             Error: UNIT_expected),
        (Token: Token_Semicolon;        Error: SemicolonExpected),
        (Token: Token_Colon;            Error: ColonExpected),
        (Token: Token_Comma;            Error: CommaExpected),
        (Token: Token_LeftParenthesis;  Error: LeftParenthesisExpected),
        (Token: Token_RightParenthesis; Error: RightParenthesisExpected),
        (Token: Token_Equal;            Error: EqualExpected),
        (Token: Token_EqualAsSeparator; Error: EqualExpected),
        (Token: Token_Assignment;       Error: AssignmentExpected),
        (Token: Token_LeftBracket;      Error: LeftBracketExpected),
        (Token: Token_RightBracket;     Error: RightBracketExpected),
        (Token: Token_Period;           Error: PeriodExpected),
        (Token: Token_PeriodPeriod;     Error: PeriodPeriodExpected));
Var N: Byte;
begin
  If ExpectedToken <> Token then
    begin
      For N := 1 to SpecialErrors do
        If ExpectedTokenErrors [N].Token = ExpectedToken then Error (ExpectedTokenErrors [N].Error);
      Error (SyntaxError);
    end;
  GetNextToken;
end;
Another function that checks for specified token but doesn't report error if it is not found.
Function CheckAndGetNextToken (TokenToCheck: TToken): Boolean;
begin
  CheckAndGetNextToken := False;
  If Token <> TokenToCheck then Exit;
  CheckAndGetNextToken := True;
  GetNextToken;
end;
 
 
 
© 2017 Turbo Pascal | Privacy Policy