Compiler aufgeräumt
This commit is contained in:
161
STCompiler.Compiler/Lexer.cs
Normal file
161
STCompiler.Compiler/Lexer.cs
Normal file
@ -0,0 +1,161 @@
|
||||
namespace STCompiler.Compiler;
|
||||
|
||||
using System;
|
||||
using System.Text;
|
||||
using System.Collections.Generic;
|
||||
using STCompiler.Common;
|
||||
|
||||
public enum TokType {
|
||||
IDENT, INT, REAL, ASSIGN, SEMI, LPAREN, RPAREN,
|
||||
PLUS, MINUS, MUL, DIV,
|
||||
LT, GT, LE, GE, EQ, NEQ,
|
||||
IF, THEN, ELSE, END_IF,
|
||||
WHILE, DO, END_WHILE,
|
||||
FOR, TO, BY, END_FOR,
|
||||
PROGRAM, VAR, END_VAR, END_PROGRAM,
|
||||
EOF
|
||||
}
|
||||
|
||||
public class Token{
|
||||
public TokType Type;
|
||||
public string Text;
|
||||
public int Line;
|
||||
public Token(TokType t, string s, int line) { Type=t; Text=s; Line=line; }
|
||||
}
|
||||
|
||||
public class CompileError {
|
||||
public int Line;
|
||||
public string Message;
|
||||
public CompileError(int line, string msg) { Line=line; Message=msg; }
|
||||
}
|
||||
|
||||
public class StLexer {
|
||||
private readonly string src;
|
||||
private int i;
|
||||
private int currentLine = 1;
|
||||
public List<CompileError> Errors = new();
|
||||
public StLexer(string s){src=s;}
|
||||
char Peek()=> i<src.Length?src[i]:'\0';
|
||||
char Peek2()=> i+1<src.Length?src[i+1]:'\0';
|
||||
char Next(){
|
||||
if (i >= src.Length) return '\0';
|
||||
char c = src[i++];
|
||||
if (c == '\n') currentLine++;
|
||||
return c;
|
||||
}
|
||||
|
||||
void AddError(string msg) => Errors.Add(new CompileError(currentLine, msg));
|
||||
|
||||
public Token NextToken() {
|
||||
while (char.IsWhiteSpace(Peek())) Next();
|
||||
if (Peek()=='\0') return new Token(TokType.EOF,"", currentLine);
|
||||
|
||||
// Skip line comments starting with '//'
|
||||
if (Peek() == '/' && Peek2() == '/') {
|
||||
// consume '//'
|
||||
Next(); Next();
|
||||
// skip until end of line or EOF
|
||||
while (Peek() != '\0' && Peek() != '\n') Next();
|
||||
// consume newline if present
|
||||
if (Peek() == '\n') Next();
|
||||
// restart tokenization after the comment
|
||||
return NextToken();
|
||||
}
|
||||
|
||||
if (char.IsLetter(Peek())||Peek()=='_'){
|
||||
var sb=new StringBuilder();
|
||||
int startLine = currentLine;
|
||||
while (char.IsLetterOrDigit(Peek())||Peek()=='_') sb.Append(Next());
|
||||
var s=sb.ToString().ToUpperInvariant();
|
||||
return s switch {
|
||||
"PROGRAM"=>new Token(TokType.PROGRAM,s,startLine),
|
||||
"VAR"=>new Token(TokType.VAR,s,startLine),
|
||||
"END_VAR"=>new Token(TokType.END_VAR,s,startLine),
|
||||
"END_PROGRAM"=>new Token(TokType.END_PROGRAM,s,startLine),
|
||||
"IF"=>new Token(TokType.IF,s,startLine),
|
||||
"THEN"=>new Token(TokType.THEN,s,startLine),
|
||||
"ELSE"=>new Token(TokType.ELSE,s,startLine),
|
||||
"END_IF"=>new Token(TokType.END_IF,s,startLine),
|
||||
"WHILE"=>new Token(TokType.WHILE,s,startLine),
|
||||
"DO"=>new Token(TokType.DO,s,startLine),
|
||||
"END_WHILE"=>new Token(TokType.END_WHILE,s,startLine),
|
||||
"FOR"=>new Token(TokType.FOR,s,startLine),
|
||||
"TO"=>new Token(TokType.TO,s,startLine),
|
||||
"BY"=>new Token(TokType.BY,s,startLine),
|
||||
"END_FOR"=>new Token(TokType.END_FOR,s,startLine),
|
||||
_=>new Token(TokType.IDENT,s,startLine)
|
||||
};
|
||||
}
|
||||
|
||||
if (char.IsDigit(Peek())) {
|
||||
var sb = new StringBuilder();
|
||||
int startLine = currentLine;
|
||||
bool isFloat = false;
|
||||
|
||||
// Ganze Zahl vor dem Dezimalpunkt
|
||||
while(char.IsDigit(Peek()))
|
||||
sb.Append(Next());
|
||||
|
||||
// Optional: Dezimalpunkt und Nachkommastellen
|
||||
if (Peek() == '.') {
|
||||
isFloat = true;
|
||||
sb.Append(Next());
|
||||
while(char.IsDigit(Peek()))
|
||||
sb.Append(Next());
|
||||
}
|
||||
|
||||
// Optional: Exponentialdarstellung
|
||||
if (Peek() == 'E' || Peek() == 'e') {
|
||||
isFloat = true;
|
||||
sb.Append(Next());
|
||||
if (Peek() == '+' || Peek() == '-')
|
||||
sb.Append(Next());
|
||||
if (!char.IsDigit(Peek())) {
|
||||
AddError("Expected digits after exponent");
|
||||
return new Token(TokType.EOF, "", startLine);
|
||||
}
|
||||
while(char.IsDigit(Peek()))
|
||||
sb.Append(Next());
|
||||
}
|
||||
|
||||
return new Token(isFloat ? TokType.REAL : TokType.INT, sb.ToString(), startLine);
|
||||
}
|
||||
|
||||
int tokenLine = currentLine;
|
||||
if (Peek()==':'){
|
||||
Next();
|
||||
if(Peek()=='='){
|
||||
Next();
|
||||
return new Token(TokType.ASSIGN,":=",tokenLine);
|
||||
}
|
||||
AddError("Expected '=' after ':' for assignment");
|
||||
// Bei einem einzelnen ':' geben wir EOF zurück und stoppen das Parsen
|
||||
i--; // Gehen einen Schritt zurück, damit der fehlerhafte ':' Token beim nächsten Mal neu gelesen wird
|
||||
return new Token(TokType.EOF,"",tokenLine);
|
||||
}
|
||||
if (Peek()=='<'){
|
||||
Next();
|
||||
if (Peek()=='='){Next(); return new Token(TokType.LE,"<=",tokenLine);}
|
||||
if (Peek()=='>'){Next(); return new Token(TokType.NEQ,"<>",tokenLine);}
|
||||
return new Token(TokType.LT,"<",tokenLine);
|
||||
}
|
||||
if (Peek()=='>'){
|
||||
Next();
|
||||
if (Peek()=='='){Next(); return new Token(TokType.GE,">=",tokenLine);}
|
||||
return new Token(TokType.GT,">",tokenLine);
|
||||
}
|
||||
if (Peek()=='='){Next();return new Token(TokType.EQ,"=",tokenLine);}
|
||||
|
||||
char c=Next();
|
||||
if (c == ';') return new Token(TokType.SEMI,";",tokenLine);
|
||||
if (c == '(') return new Token(TokType.LPAREN,"(",tokenLine);
|
||||
if (c == ')') return new Token(TokType.RPAREN,")",tokenLine);
|
||||
if (c == '+') return new Token(TokType.PLUS,"+",tokenLine);
|
||||
if (c == '-') return new Token(TokType.MINUS,"-",tokenLine);
|
||||
if (c == '*') return new Token(TokType.MUL,"*",tokenLine);
|
||||
if (c == '/') return new Token(TokType.DIV,"/",tokenLine);
|
||||
|
||||
AddError($"Unexpected character '{c}'");
|
||||
return new Token(TokType.EOF,"",tokenLine); // Skip invalid character
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user