Compiler aufgeräumt
This commit is contained in:
@ -44,590 +44,8 @@ class Program {
|
||||
}
|
||||
}
|
||||
|
||||
// === AST ===
|
||||
public enum VarType {
|
||||
// Boolean
|
||||
BOOL=1,
|
||||
// Unsigned integers
|
||||
BYTE=2, WORD=3, DWORD=4, LWORD=5,
|
||||
// Signed integers
|
||||
SINT=6, INT=7, DINT=8, LINT=9,
|
||||
// Unsigned integers (alternative names)
|
||||
USINT=10, UINT=11, UDINT=12, ULINT=13,
|
||||
// Floating point
|
||||
REAL=14, LREAL=15
|
||||
}
|
||||
public abstract class StNode{}
|
||||
public class ProgramNode:StNode{ public List<VarDecl> Vars=new(); public List<Stmt> Stmts=new(); }
|
||||
public class VarDecl:StNode{
|
||||
required public string Name;
|
||||
public VarType Type;
|
||||
public Expr? Init;
|
||||
}
|
||||
|
||||
public abstract class Stmt:StNode{}
|
||||
public class AssignStmt:Stmt{
|
||||
required public string Target;
|
||||
required public Expr Expr;
|
||||
}
|
||||
public class IfStmt:Stmt{
|
||||
required public Expr Cond;
|
||||
public List<Stmt> ThenStmts=new();
|
||||
public List<Stmt> ElseStmts=new();
|
||||
}
|
||||
public class WhileStmt:Stmt{
|
||||
required public Expr Cond;
|
||||
public List<Stmt> Body=new();
|
||||
}
|
||||
public class ForStmt:Stmt{
|
||||
required public string Var;
|
||||
required public Expr Start;
|
||||
required public Expr End;
|
||||
public Expr Step = new IntExpr(1);
|
||||
public List<Stmt> Body=new();
|
||||
}
|
||||
|
||||
public abstract class Expr:StNode {
|
||||
public VarType Type; // Speichert den Typ des Ausdrucks
|
||||
}
|
||||
public class IntExpr:Expr {
|
||||
public long Value;
|
||||
public IntExpr(long v, VarType type = VarType.DINT) {
|
||||
Value = v;
|
||||
Type = type;
|
||||
}
|
||||
}
|
||||
public class RealExpr:Expr {
|
||||
public double Value;
|
||||
public RealExpr(double v, VarType type = VarType.REAL) {
|
||||
Value = v;
|
||||
Type = type;
|
||||
}
|
||||
}
|
||||
public class VarExpr:Expr {
|
||||
public string Name;
|
||||
public VarExpr(string n, VarType type) {
|
||||
Name = n;
|
||||
Type = type;
|
||||
}
|
||||
}
|
||||
public class BinaryExpr:Expr {
|
||||
public Expr L;
|
||||
public Expr R;
|
||||
public TokType Op;
|
||||
public BinaryExpr(Expr l, TokType op, Expr r) {
|
||||
L = l;
|
||||
Op = op;
|
||||
R = r;
|
||||
Type = DetermineResultType(l.Type, r.Type);
|
||||
}
|
||||
|
||||
private static VarType DetermineResultType(VarType left, VarType right) {
|
||||
// Wenn einer der Operanden LREAL ist, ist das Ergebnis LREAL
|
||||
if (left == VarType.LREAL || right == VarType.LREAL)
|
||||
return VarType.LREAL;
|
||||
// Wenn einer der Operanden REAL ist, ist das Ergebnis REAL
|
||||
if (left == VarType.REAL || right == VarType.REAL)
|
||||
return VarType.REAL;
|
||||
// Bei gemischten Integer-Typen nehmen wir den größeren
|
||||
if ((int)left > (int)right)
|
||||
return left;
|
||||
return right;
|
||||
}
|
||||
}
|
||||
|
||||
// === TOKENIZER ===
|
||||
public enum TokType {
|
||||
IDENT, INT, REAL, ASSIGN, SEMI, LPAREN, RPAREN,
|
||||
PLUS, MINUS, MUL, DIV,
|
||||
LT, GT, LE, GE, EQ, NEQ,
|
||||
IF, THEN, ELSE, END_IF,
|
||||
WHILE, DO, END_WHILE,
|
||||
FOR, TO, BY, END_FOR,
|
||||
PROGRAM, VAR, END_VAR, END_PROGRAM,
|
||||
EOF
|
||||
}
|
||||
public class Token{
|
||||
public TokType Type;
|
||||
public string Text;
|
||||
public int Line;
|
||||
public Token(TokType t, string s, int line) { Type=t; Text=s; Line=line; }
|
||||
}
|
||||
|
||||
public class CompileError {
|
||||
public int Line;
|
||||
public string Message;
|
||||
public CompileError(int line, string msg) { Line=line; Message=msg; }
|
||||
}
|
||||
|
||||
public class StLexer {
|
||||
private readonly string src;
|
||||
private int i;
|
||||
private int currentLine = 1;
|
||||
public List<CompileError> Errors = new();
|
||||
public StLexer(string s){src=s;}
|
||||
char Peek()=> i<src.Length?src[i]:'\0';
|
||||
char Peek2()=> i+1<src.Length?src[i+1]:'\0';
|
||||
char Next(){
|
||||
if (i >= src.Length) return '\0';
|
||||
char c = src[i++];
|
||||
if (c == '\n') currentLine++;
|
||||
return c;
|
||||
}
|
||||
|
||||
void AddError(string msg) => Errors.Add(new CompileError(currentLine, msg));
|
||||
|
||||
public Token NextToken() {
|
||||
while (char.IsWhiteSpace(Peek())) Next();
|
||||
if (Peek()=='\0') return new Token(TokType.EOF,"", currentLine);
|
||||
|
||||
// Skip line comments starting with '//'
|
||||
if (Peek() == '/' && Peek2() == '/') {
|
||||
// consume '//'
|
||||
Next(); Next();
|
||||
// skip until end of line or EOF
|
||||
while (Peek() != '\0' && Peek() != '\n') Next();
|
||||
// consume newline if present
|
||||
if (Peek() == '\n') Next();
|
||||
// restart tokenization after the comment
|
||||
return NextToken();
|
||||
}
|
||||
|
||||
if (char.IsLetter(Peek())||Peek()=='_'){
|
||||
var sb=new StringBuilder();
|
||||
int startLine = currentLine;
|
||||
while (char.IsLetterOrDigit(Peek())||Peek()=='_') sb.Append(Next());
|
||||
var s=sb.ToString().ToUpperInvariant();
|
||||
return s switch {
|
||||
"PROGRAM"=>new Token(TokType.PROGRAM,s,startLine),
|
||||
"VAR"=>new Token(TokType.VAR,s,startLine),
|
||||
"END_VAR"=>new Token(TokType.END_VAR,s,startLine),
|
||||
"END_PROGRAM"=>new Token(TokType.END_PROGRAM,s,startLine),
|
||||
"IF"=>new Token(TokType.IF,s,startLine),
|
||||
"THEN"=>new Token(TokType.THEN,s,startLine),
|
||||
"ELSE"=>new Token(TokType.ELSE,s,startLine),
|
||||
"END_IF"=>new Token(TokType.END_IF,s,startLine),
|
||||
"WHILE"=>new Token(TokType.WHILE,s,startLine),
|
||||
"DO"=>new Token(TokType.DO,s,startLine),
|
||||
"END_WHILE"=>new Token(TokType.END_WHILE,s,startLine),
|
||||
"FOR"=>new Token(TokType.FOR,s,startLine),
|
||||
"TO"=>new Token(TokType.TO,s,startLine),
|
||||
"BY"=>new Token(TokType.BY,s,startLine),
|
||||
"END_FOR"=>new Token(TokType.END_FOR,s,startLine),
|
||||
_=>new Token(TokType.IDENT,s,startLine)
|
||||
};
|
||||
}
|
||||
|
||||
if (char.IsDigit(Peek())) {
|
||||
var sb = new StringBuilder();
|
||||
int startLine = currentLine;
|
||||
bool isFloat = false;
|
||||
|
||||
// Ganze Zahl vor dem Dezimalpunkt
|
||||
while(char.IsDigit(Peek()))
|
||||
sb.Append(Next());
|
||||
|
||||
// Optional: Dezimalpunkt und Nachkommastellen
|
||||
if (Peek() == '.') {
|
||||
isFloat = true;
|
||||
sb.Append(Next());
|
||||
while(char.IsDigit(Peek()))
|
||||
sb.Append(Next());
|
||||
}
|
||||
|
||||
// Optional: Exponentialdarstellung
|
||||
if (Peek() == 'E' || Peek() == 'e') {
|
||||
isFloat = true;
|
||||
sb.Append(Next());
|
||||
if (Peek() == '+' || Peek() == '-')
|
||||
sb.Append(Next());
|
||||
if (!char.IsDigit(Peek())) {
|
||||
AddError("Expected digits after exponent");
|
||||
return new Token(TokType.EOF, "", startLine);
|
||||
}
|
||||
while(char.IsDigit(Peek()))
|
||||
sb.Append(Next());
|
||||
}
|
||||
|
||||
return new Token(isFloat ? TokType.REAL : TokType.INT, sb.ToString(), startLine);
|
||||
}
|
||||
|
||||
int tokenLine = currentLine;
|
||||
if (Peek()==':'){
|
||||
Next();
|
||||
if(Peek()=='='){
|
||||
Next();
|
||||
return new Token(TokType.ASSIGN,":=",tokenLine);
|
||||
}
|
||||
AddError("Expected '=' after ':' for assignment");
|
||||
// Bei einem einzelnen ':' geben wir EOF zurück und stoppen das Parsen
|
||||
i--; // Gehen einen Schritt zurück, damit der fehlerhafte ':' Token beim nächsten Mal neu gelesen wird
|
||||
return new Token(TokType.EOF,"",tokenLine);
|
||||
}
|
||||
if (Peek()=='<'){
|
||||
Next();
|
||||
if (Peek()=='='){Next(); return new Token(TokType.LE,"<=",tokenLine);}
|
||||
if (Peek()=='>'){Next(); return new Token(TokType.NEQ,"<>",tokenLine);}
|
||||
return new Token(TokType.LT,"<",tokenLine);
|
||||
}
|
||||
if (Peek()=='>'){
|
||||
Next();
|
||||
if (Peek()=='='){Next(); return new Token(TokType.GE,">=",tokenLine);}
|
||||
return new Token(TokType.GT,">",tokenLine);
|
||||
}
|
||||
if (Peek()=='='){Next();return new Token(TokType.EQ,"=",tokenLine);}
|
||||
|
||||
char c=Next();
|
||||
if (c == ';') return new Token(TokType.SEMI,";",tokenLine);
|
||||
if (c == '(') return new Token(TokType.LPAREN,"(",tokenLine);
|
||||
if (c == ')') return new Token(TokType.RPAREN,")",tokenLine);
|
||||
if (c == '+') return new Token(TokType.PLUS,"+",tokenLine);
|
||||
if (c == '-') return new Token(TokType.MINUS,"-",tokenLine);
|
||||
if (c == '*') return new Token(TokType.MUL,"*",tokenLine);
|
||||
if (c == '/') return new Token(TokType.DIV,"/",tokenLine);
|
||||
|
||||
AddError($"Unexpected character '{c}'");
|
||||
return new Token(TokType.EOF,"",tokenLine); // Skip invalid character
|
||||
}
|
||||
}
|
||||
|
||||
// === PARSER ===
|
||||
public record Symbol {
|
||||
required public string Name;
|
||||
public VarType Type;
|
||||
public int Index;
|
||||
}
|
||||
|
||||
public class StParser {
|
||||
StLexer lex;
|
||||
Token cur;
|
||||
Dictionary<string,Symbol> syms = new();
|
||||
public List<CompileError> Errors => lex.Errors;
|
||||
public bool HasErrors => Errors.Count > 0;
|
||||
|
||||
public StParser(string s){
|
||||
lex=new StLexer(s);
|
||||
cur=lex.NextToken();
|
||||
}
|
||||
|
||||
void Next()=>cur=lex.NextToken();
|
||||
|
||||
void AddError(string msg) => lex.Errors.Add(new CompileError(cur.Line, msg));
|
||||
|
||||
bool Expect(TokType t){
|
||||
if(cur.Type!=t) {
|
||||
AddError($"Expected {t}, got {cur.Type}");
|
||||
return false;
|
||||
}
|
||||
Next();
|
||||
return true;
|
||||
}
|
||||
|
||||
public ProgramNode? ParseProgram(){
|
||||
var p=new ProgramNode();
|
||||
if (!Expect(TokType.PROGRAM)) return null;
|
||||
|
||||
if(cur.Type==TokType.IDENT) Next();
|
||||
|
||||
if(cur.Type==TokType.VAR){
|
||||
Next();
|
||||
while(cur.Type==TokType.IDENT) {
|
||||
var varDecl = ParseVarDecl();
|
||||
if (varDecl == null) return null;
|
||||
p.Vars.Add(varDecl);
|
||||
}
|
||||
if (!Expect(TokType.END_VAR)) return null;
|
||||
}
|
||||
|
||||
while(cur.Type!=TokType.END_PROGRAM && cur.Type!=TokType.EOF) {
|
||||
var stmt = ParseStmt();
|
||||
if (stmt == null) return null;
|
||||
p.Stmts.Add(stmt);
|
||||
}
|
||||
|
||||
if (!Expect(TokType.END_PROGRAM)) return null;
|
||||
return p;
|
||||
}
|
||||
|
||||
VarDecl? ParseVarDecl(){
|
||||
if (cur.Type != TokType.IDENT) {
|
||||
AddError("Expected identifier for variable declaration");
|
||||
return null;
|
||||
}
|
||||
string name=cur.Text.ToUpperInvariant(); // Variablennamen in Großbuchstaben
|
||||
Next();
|
||||
|
||||
if (cur.Type != TokType.IDENT) {
|
||||
AddError("Expected type name");
|
||||
return null;
|
||||
}
|
||||
string tname=cur.Text.ToLowerInvariant();
|
||||
Next();
|
||||
|
||||
VarType? vt = tname switch {
|
||||
// Boolean
|
||||
"bool" => VarType.BOOL,
|
||||
// Unsigned integers
|
||||
"byte" => VarType.BYTE,
|
||||
"word" => VarType.WORD,
|
||||
"dword" => VarType.DWORD,
|
||||
"lword" => VarType.LWORD,
|
||||
// Signed integers
|
||||
"sint" => VarType.SINT,
|
||||
"int" => VarType.INT,
|
||||
"dint" => VarType.DINT,
|
||||
"lint" => VarType.LINT,
|
||||
// Unsigned integers (alternative names)
|
||||
"usint" => VarType.USINT,
|
||||
"uint" => VarType.UINT,
|
||||
"udint" => VarType.UDINT,
|
||||
"ulint" => VarType.ULINT,
|
||||
// Floating point
|
||||
"real" => VarType.REAL,
|
||||
"lreal" => VarType.LREAL,
|
||||
_ => null
|
||||
};
|
||||
|
||||
if (vt == null) {
|
||||
AddError($"Unknown type '{tname}'");
|
||||
return null;
|
||||
}
|
||||
|
||||
Expr? init=null;
|
||||
if(cur.Type==TokType.ASSIGN){
|
||||
Next(); // consume :=
|
||||
init=ParseExpr();
|
||||
if (init == null) {
|
||||
AddError($"Expected expression after ':=' in variable declaration");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
if (!Expect(TokType.SEMI)) return null;
|
||||
|
||||
// Füge Variable zur Symboltabelle hinzu
|
||||
syms[name] = new Symbol { Name = name, Type = vt.Value };
|
||||
|
||||
return new VarDecl{Name=name,Type=vt.Value,Init=init};
|
||||
}
|
||||
|
||||
Stmt? ParseAssign() {
|
||||
// Der Aufrufer hat bereits geprüft, dass wir bei einem IDENT sind
|
||||
string target = cur.Text.ToUpperInvariant(); // Variablennamen in Großbuchstaben
|
||||
Next(); // consume identifier
|
||||
if (cur.Type != TokType.ASSIGN) {
|
||||
AddError($"Expected ':=' after identifier '{target}'");
|
||||
return null;
|
||||
}
|
||||
Next(); // consume :=
|
||||
var e = ParseExpr();
|
||||
if (e == null) return null;
|
||||
if (!Expect(TokType.SEMI)) return null;
|
||||
return new AssignStmt{Target=target, Expr=e};
|
||||
}
|
||||
|
||||
Stmt? ParseStmt(){
|
||||
switch(cur.Type) {
|
||||
case TokType.IF: return ParseIf();
|
||||
case TokType.WHILE: return ParseWhile();
|
||||
case TokType.FOR: return ParseFor();
|
||||
case TokType.IDENT:
|
||||
return ParseAssign();
|
||||
default:
|
||||
AddError($"Unexpected token {cur.Type} in statement");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
IfStmt? ParseIf(){
|
||||
Next(); // IF
|
||||
var cond=ParseExpr();
|
||||
if (cond == null) return null;
|
||||
if (!Expect(TokType.THEN)) return null;
|
||||
|
||||
var node=new IfStmt{Cond=cond};
|
||||
while(cur.Type!=TokType.ELSE && cur.Type!=TokType.END_IF && cur.Type!=TokType.EOF) {
|
||||
var stmt = ParseStmt();
|
||||
if (stmt == null) return null;
|
||||
node.ThenStmts.Add(stmt);
|
||||
}
|
||||
|
||||
if(cur.Type==TokType.ELSE){
|
||||
Next();
|
||||
while(cur.Type!=TokType.END_IF && cur.Type!=TokType.EOF) {
|
||||
var stmt = ParseStmt();
|
||||
if (stmt == null) return null;
|
||||
node.ElseStmts.Add(stmt);
|
||||
}
|
||||
}
|
||||
|
||||
if (!Expect(TokType.END_IF)) return null;
|
||||
if (!Expect(TokType.SEMI)) return null;
|
||||
return node;
|
||||
}
|
||||
|
||||
WhileStmt? ParseWhile(){
|
||||
Next(); // WHILE
|
||||
var cond=ParseExpr();
|
||||
if (cond == null) return null;
|
||||
if (!Expect(TokType.DO)) return null;
|
||||
|
||||
var ws=new WhileStmt{Cond=cond};
|
||||
while(cur.Type!=TokType.END_WHILE && cur.Type!=TokType.EOF) {
|
||||
var stmt = ParseStmt();
|
||||
if (stmt == null) return null;
|
||||
ws.Body.Add(stmt);
|
||||
}
|
||||
|
||||
if (!Expect(TokType.END_WHILE)) return null;
|
||||
if (!Expect(TokType.SEMI)) return null;
|
||||
return ws;
|
||||
}
|
||||
|
||||
ForStmt? ParseFor(){
|
||||
Next(); // FOR
|
||||
if (cur.Type != TokType.IDENT) {
|
||||
AddError("Expected identifier for FOR loop variable");
|
||||
return null;
|
||||
}
|
||||
string varName = cur.Text.ToUpperInvariant(); // Variablennamen in Großbuchstaben
|
||||
Next(); // consume identifier
|
||||
|
||||
if (cur.Type != TokType.ASSIGN) {
|
||||
AddError($"Expected ':=' after identifier '{varName}'");
|
||||
return null;
|
||||
}
|
||||
Next(); // consume :=
|
||||
var start = ParseExpr();
|
||||
if (start == null) return null;
|
||||
|
||||
if (!Expect(TokType.TO)) return null;
|
||||
var end = ParseExpr();
|
||||
if (end == null) return null;
|
||||
|
||||
Expr step = new IntExpr(1);
|
||||
if(cur.Type==TokType.BY){
|
||||
Next();
|
||||
step = ParseExpr() ?? step;
|
||||
}
|
||||
|
||||
if (!Expect(TokType.DO)) return null;
|
||||
|
||||
var fs = new ForStmt{Var=varName, Start=start, End=end, Step=step};
|
||||
while(cur.Type!=TokType.END_FOR && cur.Type!=TokType.EOF) {
|
||||
var stmt = ParseStmt();
|
||||
if (stmt == null) return null;
|
||||
fs.Body.Add(stmt);
|
||||
}
|
||||
|
||||
if (!Expect(TokType.END_FOR)) return null;
|
||||
if (!Expect(TokType.SEMI)) return null;
|
||||
return fs;
|
||||
}
|
||||
|
||||
Expr? ParseExpr() => ParseCompare();
|
||||
|
||||
Expr? ParseCompare(){
|
||||
var l = ParseAddSub();
|
||||
if (l == null) return null;
|
||||
|
||||
while(cur.Type is TokType.LT or TokType.GT or TokType.LE or TokType.GE or TokType.EQ or TokType.NEQ){
|
||||
var op=cur.Type;
|
||||
Next();
|
||||
var r=ParseAddSub();
|
||||
if (r == null) return null;
|
||||
l=new BinaryExpr(l,op,r);
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
Expr? ParseAddSub(){
|
||||
var l=ParseMulDiv();
|
||||
if (l == null) return null;
|
||||
|
||||
while(cur.Type==TokType.PLUS||cur.Type==TokType.MINUS){
|
||||
var op=cur.Type;
|
||||
Next();
|
||||
var r=ParseMulDiv();
|
||||
if (r == null) return null;
|
||||
l=new BinaryExpr(l,op,r);
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
Expr? ParseMulDiv(){
|
||||
var l=ParsePrimary();
|
||||
if (l == null) return null;
|
||||
|
||||
while(cur.Type==TokType.MUL||cur.Type==TokType.DIV){
|
||||
var op=cur.Type;
|
||||
Next();
|
||||
var r=ParsePrimary();
|
||||
if (r == null) return null;
|
||||
l=new BinaryExpr(l,op,r);
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
Expr? ParsePrimary(){
|
||||
int startLine = cur.Line;
|
||||
// Support unary + and -
|
||||
if (cur.Type == TokType.PLUS || cur.Type == TokType.MINUS) {
|
||||
var sign = cur.Type;
|
||||
Next();
|
||||
var p = ParsePrimary();
|
||||
if (p == null) return null;
|
||||
Expr zero;
|
||||
if (p.Type == VarType.REAL || p.Type == VarType.LREAL) {
|
||||
zero = new RealExpr(0.0, p.Type);
|
||||
} else {
|
||||
zero = new IntExpr(0, p.Type);
|
||||
}
|
||||
var op = sign == TokType.PLUS ? TokType.PLUS : TokType.MINUS;
|
||||
return new BinaryExpr(zero, op, p);
|
||||
}
|
||||
|
||||
switch(cur.Type) {
|
||||
case TokType.INT:
|
||||
if (!long.TryParse(cur.Text, out var v)) {
|
||||
AddError($"Invalid integer literal '{cur.Text}'");
|
||||
return null;
|
||||
}
|
||||
Next();
|
||||
return new IntExpr(v, VarType.DINT);
|
||||
|
||||
case TokType.REAL:
|
||||
if (!double.TryParse(cur.Text, out var d)) {
|
||||
AddError($"Invalid floating point literal '{cur.Text}'");
|
||||
return null;
|
||||
}
|
||||
Next();
|
||||
return new RealExpr(d);
|
||||
|
||||
case TokType.IDENT:
|
||||
string n = cur.Text.ToUpperInvariant(); // Variablennamen in Großbuchstaben
|
||||
Next();
|
||||
Symbol? sym;
|
||||
if (!syms.TryGetValue(n, out sym)) {
|
||||
AddError($"Undeclared variable '{n}'");
|
||||
return null;
|
||||
}
|
||||
return new VarExpr(n, sym.Type);
|
||||
|
||||
case TokType.LPAREN:
|
||||
Next();
|
||||
var e = ParseExpr();
|
||||
if (e == null) return null;
|
||||
if (!Expect(TokType.RPAREN)) return null;
|
||||
return e;
|
||||
|
||||
default:
|
||||
AddError($"Unexpected token {cur.Type} in expression");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parser, Lexer und AST wurden ausgelagert nach Parser.cs, Lexer.cs und Ast.cs
|
||||
// Die Implementierungen dort werden vom Entry-Point genutzt.
|
||||
|
||||
// === BYTECODE ===
|
||||
public class BytecodeEmitter {
|
||||
|
||||
Reference in New Issue
Block a user