namespace STCompiler.Compiler; using System; using System.Text; using System.Collections.Generic; using STCompiler.Common; public enum TokType { IDENT, INT, REAL, ASSIGN, SEMI, LPAREN, RPAREN, PLUS, MINUS, MUL, DIV, LT, GT, LE, GE, EQ, NEQ, IF, THEN, ELSE, END_IF, WHILE, DO, END_WHILE, FOR, TO, BY, END_FOR, PROGRAM, VAR, END_VAR, END_PROGRAM, ARRAY, OF, LBRACKET, RBRACKET, DOTS, EOF } public class Token{ public TokType Type; public string Text; public int Line; public Token(TokType t, string s, int line) { Type=t; Text=s; Line=line; } } public class CompileError { public int Line; public string Message; public CompileError(int line, string msg) { Line=line; Message=msg; } } public class StLexer { private readonly string src; private int i; private int currentLine = 1; public List Errors = new(); public StLexer(string s){src=s;} char Peek()=> i i+1= src.Length) return '\0'; char c = src[i++]; if (c == '\n') currentLine++; return c; } void AddError(string msg) => Errors.Add(new CompileError(currentLine, msg)); public Token NextToken() { while (char.IsWhiteSpace(Peek())) Next(); if (Peek()=='\0') return new Token(TokType.EOF,"", currentLine); // Skip line comments starting with '//' if (Peek() == '/' && Peek2() == '/') { // consume '//' Next(); Next(); // skip until end of line or EOF while (Peek() != '\0' && Peek() != '\n') Next(); // consume newline if present if (Peek() == '\n') Next(); // restart tokenization after the comment return NextToken(); } if (char.IsLetter(Peek())||Peek()=='_'){ var sb=new StringBuilder(); int startLine = currentLine; while (char.IsLetterOrDigit(Peek())||Peek()=='_') sb.Append(Next()); var s=sb.ToString().ToUpperInvariant(); return s switch { "PROGRAM"=>new Token(TokType.PROGRAM,s,startLine), "VAR"=>new Token(TokType.VAR,s,startLine), "END_VAR"=>new Token(TokType.END_VAR,s,startLine), "END_PROGRAM"=>new Token(TokType.END_PROGRAM,s,startLine), "IF"=>new Token(TokType.IF,s,startLine), "THEN"=>new Token(TokType.THEN,s,startLine), "ELSE"=>new Token(TokType.ELSE,s,startLine), "END_IF"=>new Token(TokType.END_IF,s,startLine), "WHILE"=>new Token(TokType.WHILE,s,startLine), "DO"=>new Token(TokType.DO,s,startLine), "END_WHILE"=>new Token(TokType.END_WHILE,s,startLine), "FOR"=>new Token(TokType.FOR,s,startLine), "TO"=>new Token(TokType.TO,s,startLine), "BY"=>new Token(TokType.BY,s,startLine), "END_FOR"=>new Token(TokType.END_FOR,s,startLine), "ARRAY"=>new Token(TokType.ARRAY,s,startLine), "OF"=>new Token(TokType.OF,s,startLine), _=>new Token(TokType.IDENT,s,startLine) }; } if (char.IsDigit(Peek())) { var sb = new StringBuilder(); int startLine = currentLine; bool isFloat = false; // Ganze Zahl vor dem Dezimalpunkt oder Bereichsoperator while(char.IsDigit(Peek())) sb.Append(Next()); // Prüfe auf Bereichsoperator (..) oder Dezimalpunkt (.) if (Peek() == '.') { if (Peek2() == '.') { // Es ist ein Bereichsoperator (..) // Don't consume the dots, just return the number return new Token(TokType.INT, sb.ToString(), startLine); } else { // Es ist ein Dezimalpunkt isFloat = true; sb.Append(Next()); while(char.IsDigit(Peek())) sb.Append(Next()); } } // Optional: Exponentialdarstellung if (Peek() == 'E' || Peek() == 'e') { isFloat = true; sb.Append(Next()); if (Peek() == '+' || Peek() == '-') sb.Append(Next()); if (!char.IsDigit(Peek())) { AddError("Expected digits after exponent"); return new Token(TokType.EOF, "", startLine); } while(char.IsDigit(Peek())) sb.Append(Next()); } return new Token(isFloat ? TokType.REAL : TokType.INT, sb.ToString(), startLine); } int tokenLine = currentLine; if (Peek()==':'){ Next(); if(Peek()=='='){ Next(); return new Token(TokType.ASSIGN,":=",tokenLine); } AddError("Expected '=' after ':' for assignment"); // Bei einem einzelnen ':' geben wir EOF zurück und stoppen das Parsen i--; // Gehen einen Schritt zurück, damit der fehlerhafte ':' Token beim nächsten Mal neu gelesen wird return new Token(TokType.EOF,"",tokenLine); } if (Peek()=='<'){ Next(); if (Peek()=='='){Next(); return new Token(TokType.LE,"<=",tokenLine);} if (Peek()=='>'){Next(); return new Token(TokType.NEQ,"<>",tokenLine);} return new Token(TokType.LT,"<",tokenLine); } if (Peek()=='>'){ Next(); if (Peek()=='='){Next(); return new Token(TokType.GE,">=",tokenLine);} return new Token(TokType.GT,">",tokenLine); } if (Peek()=='='){Next();return new Token(TokType.EQ,"=",tokenLine);} char c=Next(); if (c == ';') return new Token(TokType.SEMI,";",tokenLine); if (c == '(') return new Token(TokType.LPAREN,"(",tokenLine); if (c == ')') return new Token(TokType.RPAREN,")",tokenLine); if (c == '[') return new Token(TokType.LBRACKET,"[",tokenLine); if (c == ']') return new Token(TokType.RBRACKET,"]",tokenLine); if (c == '+') return new Token(TokType.PLUS,"+",tokenLine); if (c == '-') return new Token(TokType.MINUS,"-",tokenLine); if (c == '*') return new Token(TokType.MUL,"*",tokenLine); if (c == '/') return new Token(TokType.DIV,"/",tokenLine); if (c == '.') { if (Peek() == '.') { Next(); return new Token(TokType.DOTS,"..",tokenLine); } // Put back the dot for floating point numbers i--; return NextToken(); } AddError($"Unexpected character '{c}'"); return new Token(TokType.EOF,"",tokenLine); // Skip invalid character } }