Verhalten wie ein compiler. Es werden Fehler in der Konsole angegeben statt Exceptions zu schmeißen

This commit is contained in:
2025-10-12 17:15:10 +02:00
parent dbd7715193
commit d5c6653e8a
10 changed files with 385 additions and 141 deletions

View File

@ -14,11 +14,30 @@ class Program {
var input = File.ReadAllText(args[0]);
var parser = new StParser(input);
var prog = parser.ParseProgram();
var emitter = new BytecodeEmitter();
emitter.Compile(prog);
File.WriteAllBytes(args[1], emitter.BuildBinary());
Console.WriteLine($"Wrote {args[1]}: consts={emitter.ConstantsCount}, vars={emitter.VarCount}, code={emitter.CodeLength}");
return 0;
if (parser.HasErrors) {
Console.WriteLine($"Compilation failed with {parser.Errors.Count} errors:");
foreach (var error in parser.Errors) {
Console.WriteLine($"Error at line {error.Line}: {error.Message}");
}
return 1;
}
if (prog == null) {
Console.WriteLine("Compilation failed: invalid program structure");
return 1;
}
try {
var emitter = new BytecodeEmitter();
emitter.Compile(prog);
File.WriteAllBytes(args[1], emitter.BuildBinary());
Console.WriteLine($"Wrote {args[1]}: consts={emitter.ConstantsCount}, vars={emitter.VarCount}, code={emitter.CodeLength}");
return 0;
} catch (Exception ex) {
Console.WriteLine($"Internal compiler error: {ex.Message}");
return 1;
}
}
}
@ -26,13 +45,33 @@ class Program {
public enum VarType { Int8=1, Int16=2, Int32=3, Byte=4, Bool=5 }
public abstract class StNode{}
public class ProgramNode:StNode{ public List<VarDecl> Vars=new(); public List<Stmt> Stmts=new(); }
public class VarDecl:StNode{ public string Name; public VarType Type; public Expr? Init; }
public class VarDecl:StNode{
required public string Name;
public VarType Type;
public Expr? Init;
}
public abstract class Stmt:StNode{}
public class AssignStmt:Stmt{ public string Target; public Expr Expr; }
public class IfStmt:Stmt{ public Expr Cond; public List<Stmt> ThenStmts=new(); public List<Stmt> ElseStmts=new(); }
public class WhileStmt:Stmt{ public Expr Cond; public List<Stmt> Body=new(); }
public class ForStmt:Stmt{ public string Var; public Expr Start, End, Step = new IntExpr(1); public List<Stmt> Body=new(); }
public class AssignStmt:Stmt{
required public string Target;
required public Expr Expr;
}
public class IfStmt:Stmt{
required public Expr Cond;
public List<Stmt> ThenStmts=new();
public List<Stmt> ElseStmts=new();
}
public class WhileStmt:Stmt{
required public Expr Cond;
public List<Stmt> Body=new();
}
public class ForStmt:Stmt{
required public string Var;
required public Expr Start;
required public Expr End;
public Expr Step = new IntExpr(1);
public List<Stmt> Body=new();
}
public abstract class Expr:StNode{}
public class IntExpr:Expr{ public int Value; public IntExpr(int v){Value=v;} }
@ -50,204 +89,402 @@ public enum TokType {
PROGRAM, VAR, END_VAR, END_PROGRAM,
EOF
}
public class Token{ public TokType Type; public string Text; public Token(TokType t,string s){Type=t;Text=s;} }
public class Token{
public TokType Type;
public string Text;
public int Line;
public Token(TokType t, string s, int line) { Type=t; Text=s; Line=line; }
}
public class CompileError {
public int Line;
public string Message;
public CompileError(int line, string msg) { Line=line; Message=msg; }
}
public class StLexer {
private readonly string src; private int i;
private readonly string src;
private int i;
private int currentLine = 1;
public List<CompileError> Errors = new();
public StLexer(string s){src=s;}
char Peek()=> i<src.Length?src[i]:'\0';
char Next()=> i<src.Length?src[i++]:'\0';
char Next(){
if (i >= src.Length) return '\0';
char c = src[i++];
if (c == '\n') currentLine++;
return c;
}
void AddError(string msg) => Errors.Add(new CompileError(currentLine, msg));
public Token NextToken() {
while (char.IsWhiteSpace(Peek())) Next();
if (Peek()=='\0') return new Token(TokType.EOF,"");
if (Peek()=='\0') return new Token(TokType.EOF,"", currentLine);
if (char.IsLetter(Peek())||Peek()=='_'){
var sb=new StringBuilder();
int startLine = currentLine;
while (char.IsLetterOrDigit(Peek())||Peek()=='_') sb.Append(Next());
var s=sb.ToString().ToUpperInvariant();
return s switch {
"PROGRAM"=>new Token(TokType.PROGRAM,s),
"VAR"=>new Token(TokType.VAR,s),
"END_VAR"=>new Token(TokType.END_VAR,s),
"END_PROGRAM"=>new Token(TokType.END_PROGRAM,s),
"IF"=>new Token(TokType.IF,s),
"THEN"=>new Token(TokType.THEN,s),
"ELSE"=>new Token(TokType.ELSE,s),
"END_IF"=>new Token(TokType.END_IF,s),
"WHILE"=>new Token(TokType.WHILE,s),
"DO"=>new Token(TokType.DO,s),
"END_WHILE"=>new Token(TokType.END_WHILE,s),
"FOR"=>new Token(TokType.FOR,s),
"TO"=>new Token(TokType.TO,s),
"BY"=>new Token(TokType.BY,s),
"END_FOR"=>new Token(TokType.END_FOR,s),
_=>new Token(TokType.IDENT,s)
"PROGRAM"=>new Token(TokType.PROGRAM,s,startLine),
"VAR"=>new Token(TokType.VAR,s,startLine),
"END_VAR"=>new Token(TokType.END_VAR,s,startLine),
"END_PROGRAM"=>new Token(TokType.END_PROGRAM,s,startLine),
"IF"=>new Token(TokType.IF,s,startLine),
"THEN"=>new Token(TokType.THEN,s,startLine),
"ELSE"=>new Token(TokType.ELSE,s,startLine),
"END_IF"=>new Token(TokType.END_IF,s,startLine),
"WHILE"=>new Token(TokType.WHILE,s,startLine),
"DO"=>new Token(TokType.DO,s,startLine),
"END_WHILE"=>new Token(TokType.END_WHILE,s,startLine),
"FOR"=>new Token(TokType.FOR,s,startLine),
"TO"=>new Token(TokType.TO,s,startLine),
"BY"=>new Token(TokType.BY,s,startLine),
"END_FOR"=>new Token(TokType.END_FOR,s,startLine),
_=>new Token(TokType.IDENT,s,startLine)
};
}
if (char.IsDigit(Peek())) {
var sb=new StringBuilder();
int startLine = currentLine;
while(char.IsDigit(Peek())) sb.Append(Next());
return new Token(TokType.INT,sb.ToString());
return new Token(TokType.INT,sb.ToString(),startLine);
}
if (Peek()==':'){ Next(); if(Peek()=='='){Next(); return new Token(TokType.ASSIGN,":=");} }
int tokenLine = currentLine;
if (Peek()==':'){
Next();
if(Peek()=='='){
Next();
return new Token(TokType.ASSIGN,":=",tokenLine);
}
AddError("Expected '=' after ':' for assignment");
// Bei einem einzelnen ':' geben wir EOF zurück und stoppen das Parsen
i--; // Gehen einen Schritt zurück, damit der fehlerhafte ':' Token beim nächsten Mal neu gelesen wird
return new Token(TokType.EOF,"",tokenLine);
}
if (Peek()=='<'){
Next();
if (Peek()=='='){Next(); return new Token(TokType.LE,"<=");}
if (Peek()=='>'){Next(); return new Token(TokType.NEQ,"<>");}
return new Token(TokType.LT,"<");
if (Peek()=='='){Next(); return new Token(TokType.LE,"<=",tokenLine);}
if (Peek()=='>'){Next(); return new Token(TokType.NEQ,"<>",tokenLine);}
return new Token(TokType.LT,"<",tokenLine);
}
if (Peek()=='>'){
Next();
if (Peek()=='='){Next(); return new Token(TokType.GE,">=");}
return new Token(TokType.GT,">");
if (Peek()=='='){Next(); return new Token(TokType.GE,">=",tokenLine);}
return new Token(TokType.GT,">",tokenLine);
}
if (Peek()=='='){Next();return new Token(TokType.EQ,"=");}
if (Peek()=='='){Next();return new Token(TokType.EQ,"=",tokenLine);}
char c=Next();
return c switch {
';'=>new Token(TokType.SEMI,";"),
'('=>new Token(TokType.LPAREN,"("),
')'=>new Token(TokType.RPAREN,")"),
'+'=>new Token(TokType.PLUS,"+"),
'-'=>new Token(TokType.MINUS,"-"),
'*'=>new Token(TokType.MUL,"*"),
'/'=>new Token(TokType.DIV,"/"),
_=>throw new Exception($"Unexpected '{c}'")
};
if (c == ';') return new Token(TokType.SEMI,";",tokenLine);
if (c == '(') return new Token(TokType.LPAREN,"(",tokenLine);
if (c == ')') return new Token(TokType.RPAREN,")",tokenLine);
if (c == '+') return new Token(TokType.PLUS,"+",tokenLine);
if (c == '-') return new Token(TokType.MINUS,"-",tokenLine);
if (c == '*') return new Token(TokType.MUL,"*",tokenLine);
if (c == '/') return new Token(TokType.DIV,"/",tokenLine);
AddError($"Unexpected character '{c}'");
return new Token(TokType.EOF,"",tokenLine); // Skip invalid character
}
}
// === PARSER ===
public class StParser {
StLexer lex; Token cur;
public StParser(string s){lex=new StLexer(s);cur=lex.NextToken();}
void Next()=>cur=lex.NextToken();
void Expect(TokType t){if(cur.Type!=t)throw new Exception($"Expected {t}, got {cur.Type}");Next();}
StLexer lex;
Token cur;
public List<CompileError> Errors => lex.Errors;
public bool HasErrors => Errors.Count > 0;
public ProgramNode ParseProgram(){
public StParser(string s){
lex=new StLexer(s);
cur=lex.NextToken();
}
void Next()=>cur=lex.NextToken();
void AddError(string msg) => lex.Errors.Add(new CompileError(cur.Line, msg));
bool Expect(TokType t){
if(cur.Type!=t) {
AddError($"Expected {t}, got {cur.Type}");
return false;
}
Next();
return true;
}
public ProgramNode? ParseProgram(){
var p=new ProgramNode();
Expect(TokType.PROGRAM);
if (!Expect(TokType.PROGRAM)) return null;
if(cur.Type==TokType.IDENT) Next();
if(cur.Type==TokType.VAR){
Next();
while(cur.Type==TokType.IDENT) p.Vars.Add(ParseVarDecl());
Expect(TokType.END_VAR);
while(cur.Type==TokType.IDENT) {
var varDecl = ParseVarDecl();
if (varDecl == null) return null;
p.Vars.Add(varDecl);
}
if (!Expect(TokType.END_VAR)) return null;
}
while(cur.Type!=TokType.END_PROGRAM&&cur.Type!=TokType.EOF)
p.Stmts.Add(ParseStmt());
Expect(TokType.END_PROGRAM);
while(cur.Type!=TokType.END_PROGRAM && cur.Type!=TokType.EOF) {
var stmt = ParseStmt();
if (stmt == null) return null;
p.Stmts.Add(stmt);
}
if (!Expect(TokType.END_PROGRAM)) return null;
return p;
}
VarDecl ParseVarDecl(){
string name=cur.Text; Expect(TokType.IDENT);
string tname=cur.Text.ToLowerInvariant(); Expect(TokType.IDENT);
VarType vt=tname switch{
"int8"=>VarType.Int8,"int16"=>VarType.Int16,"int32"=>VarType.Int32,"byte"=>VarType.Byte,"bool"=>VarType.Bool,
_=>throw new Exception($"Unknown type {tname}")
};
Expr? init=null;
if(cur.Type==TokType.ASSIGN){Next();init=ParseExpr();}
Expect(TokType.SEMI);
return new VarDecl{Name=name,Type=vt,Init=init};
}
Stmt ParseStmt(){
if(cur.Type==TokType.IF) return ParseIf();
if(cur.Type==TokType.WHILE) return ParseWhile();
if(cur.Type==TokType.FOR) return ParseFor();
if(cur.Type==TokType.IDENT){
var n=cur.Text; Next(); Expect(TokType.ASSIGN);
var e=ParseExpr(); Expect(TokType.SEMI);
return new AssignStmt{Target=n,Expr=e};
VarDecl? ParseVarDecl(){
if (cur.Type != TokType.IDENT) {
AddError("Expected identifier for variable declaration");
return null;
}
throw new Exception($"Unexpected token {cur.Type} in stmt");
string name=cur.Text;
Next();
if (cur.Type != TokType.IDENT) {
AddError("Expected type name");
return null;
}
string tname=cur.Text.ToLowerInvariant();
Next();
VarType? vt = tname switch {
"int8" => VarType.Int8,
"int16" => VarType.Int16,
"int32" => VarType.Int32,
"byte" => VarType.Byte,
"bool" => VarType.Bool,
_ => null
};
if (vt == null) {
AddError($"Unknown type '{tname}'");
return null;
}
Expr? init=null;
if(cur.Type==TokType.ASSIGN){
Next(); // consume :=
init=ParseExpr();
if (init == null) {
AddError($"Expected expression after ':=' in variable declaration");
return null;
}
}
if (!Expect(TokType.SEMI)) return null;
return new VarDecl{Name=name,Type=vt.Value,Init=init};
}
IfStmt ParseIf(){
Next();
var cond=ParseExpr(); Expect(TokType.THEN);
Stmt? ParseAssign() {
// Der Aufrufer hat bereits geprüft, dass wir bei einem IDENT sind
string target = cur.Text;
Next(); // consume identifier
if (cur.Type != TokType.ASSIGN) {
AddError($"Expected ':=' after identifier '{target}'");
return null;
}
Next(); // consume :=
var e = ParseExpr();
if (e == null) return null;
if (!Expect(TokType.SEMI)) return null;
return new AssignStmt{Target=target, Expr=e};
}
Stmt? ParseStmt(){
switch(cur.Type) {
case TokType.IF: return ParseIf();
case TokType.WHILE: return ParseWhile();
case TokType.FOR: return ParseFor();
case TokType.IDENT:
return ParseAssign();
default:
AddError($"Unexpected token {cur.Type} in statement");
return null;
}
}
IfStmt? ParseIf(){
Next(); // IF
var cond=ParseExpr();
if (cond == null) return null;
if (!Expect(TokType.THEN)) return null;
var node=new IfStmt{Cond=cond};
while(cur.Type!=TokType.ELSE&&cur.Type!=TokType.END_IF)
node.ThenStmts.Add(ParseStmt());
while(cur.Type!=TokType.ELSE && cur.Type!=TokType.END_IF && cur.Type!=TokType.EOF) {
var stmt = ParseStmt();
if (stmt == null) return null;
node.ThenStmts.Add(stmt);
}
if(cur.Type==TokType.ELSE){
Next();
while(cur.Type!=TokType.END_IF)
node.ElseStmts.Add(ParseStmt());
while(cur.Type!=TokType.END_IF && cur.Type!=TokType.EOF) {
var stmt = ParseStmt();
if (stmt == null) return null;
node.ElseStmts.Add(stmt);
}
}
Expect(TokType.END_IF); Expect(TokType.SEMI);
if (!Expect(TokType.END_IF)) return null;
if (!Expect(TokType.SEMI)) return null;
return node;
}
WhileStmt ParseWhile(){
WhileStmt? ParseWhile(){
Next(); // WHILE
var cond=ParseExpr(); Expect(TokType.DO);
var cond=ParseExpr();
if (cond == null) return null;
if (!Expect(TokType.DO)) return null;
var ws=new WhileStmt{Cond=cond};
while(cur.Type!=TokType.END_WHILE)
ws.Body.Add(ParseStmt());
Expect(TokType.END_WHILE); Expect(TokType.SEMI);
while(cur.Type!=TokType.END_WHILE && cur.Type!=TokType.EOF) {
var stmt = ParseStmt();
if (stmt == null) return null;
ws.Body.Add(stmt);
}
if (!Expect(TokType.END_WHILE)) return null;
if (!Expect(TokType.SEMI)) return null;
return ws;
}
ForStmt ParseFor(){
ForStmt? ParseFor(){
Next(); // FOR
string varName = cur.Text; Expect(TokType.IDENT);
Expect(TokType.ASSIGN);
Expr start = ParseExpr();
Expect(TokType.TO);
Expr end = ParseExpr();
if (cur.Type != TokType.IDENT) {
AddError("Expected identifier for FOR loop variable");
return null;
}
string varName = cur.Text;
Next(); // consume identifier
if (cur.Type != TokType.ASSIGN) {
AddError($"Expected ':=' after identifier '{varName}'");
return null;
}
Next(); // consume :=
var start = ParseExpr();
if (start == null) return null;
if (!Expect(TokType.TO)) return null;
var end = ParseExpr();
if (end == null) return null;
Expr step = new IntExpr(1);
if(cur.Type==TokType.BY){
Next();
step = ParseExpr();
step = ParseExpr() ?? step;
}
Expect(TokType.DO);
if (!Expect(TokType.DO)) return null;
var fs = new ForStmt{Var=varName, Start=start, End=end, Step=step};
while(cur.Type!=TokType.END_FOR)
fs.Body.Add(ParseStmt());
Expect(TokType.END_FOR); Expect(TokType.SEMI);
while(cur.Type!=TokType.END_FOR && cur.Type!=TokType.EOF) {
var stmt = ParseStmt();
if (stmt == null) return null;
fs.Body.Add(stmt);
}
if (!Expect(TokType.END_FOR)) return null;
if (!Expect(TokType.SEMI)) return null;
return fs;
}
Expr ParseExpr()=>ParseCompare();
Expr ParseCompare(){
var l=ParseAddSub();
Expr? ParseExpr() => ParseCompare();
Expr? ParseCompare(){
var l = ParseAddSub();
if (l == null) return null;
while(cur.Type is TokType.LT or TokType.GT or TokType.LE or TokType.GE or TokType.EQ or TokType.NEQ){
var op=cur.Type; Next();
var op=cur.Type;
Next();
var r=ParseAddSub();
if (r == null) return null;
l=new BinaryExpr(l,op,r);
}
return l;
}
Expr ParseAddSub(){
Expr? ParseAddSub(){
var l=ParseMulDiv();
if (l == null) return null;
while(cur.Type==TokType.PLUS||cur.Type==TokType.MINUS){
var op=cur.Type;Next();var r=ParseMulDiv();l=new BinaryExpr(l,op,r);
var op=cur.Type;
Next();
var r=ParseMulDiv();
if (r == null) return null;
l=new BinaryExpr(l,op,r);
}
return l;
}
Expr ParseMulDiv(){
Expr? ParseMulDiv(){
var l=ParsePrimary();
if (l == null) return null;
while(cur.Type==TokType.MUL||cur.Type==TokType.DIV){
var op=cur.Type;Next();var r=ParsePrimary();l=new BinaryExpr(l,op,r);
var op=cur.Type;
Next();
var r=ParsePrimary();
if (r == null) return null;
l=new BinaryExpr(l,op,r);
}
return l;
}
Expr ParsePrimary(){
if(cur.Type==TokType.INT){int v=int.Parse(cur.Text);Next();return new IntExpr(v);}
if(cur.Type==TokType.IDENT){string n=cur.Text;Next();return new VarExpr(n);}
if(cur.Type==TokType.LPAREN){Next();var e=ParseExpr();Expect(TokType.RPAREN);return e;}
throw new Exception($"Unexpected token {cur.Type}");
Expr? ParsePrimary(){
int startLine = cur.Line;
switch(cur.Type) {
case TokType.INT:
if (!int.TryParse(cur.Text, out var v)) {
AddError($"Invalid integer literal '{cur.Text}'");
return null;
}
Next();
return new IntExpr(v);
case TokType.IDENT:
string n=cur.Text;
Next();
return new VarExpr(n);
case TokType.LPAREN:
Next();
var e=ParseExpr();
if (e == null) return null;
if (!Expect(TokType.RPAREN)) return null;
return e;
default:
AddError($"Unexpected token {cur.Type} in expression");
return null;
}
}
}
}
// === BYTECODE ===
public class BytecodeEmitter {
List<int> consts=new(); Dictionary<string,Symbol> syms=new(); List<byte> code=new();
class Symbol{public string Name;public VarType Type;public int Index;}
class Symbol {
required public string Name;
public VarType Type;
public int Index;
}
public int ConstantsCount=>consts.Count; public int VarCount=>syms.Count; public int CodeLength=>code.Count;
public void Compile(ProgramNode p){
@ -264,11 +501,21 @@ public class BytecodeEmitter {
EmitByte(0xF0); // Program End
}
Dictionary<TokType, byte> opCodes = new() {
{TokType.PLUS, 0x10}, {TokType.MINUS, 0x11}, {TokType.MUL, 0x12}, {TokType.DIV, 0x13},
{TokType.LT, 0x14}, {TokType.GT, 0x15}, {TokType.LE, 0x16}, {TokType.GE, 0x17},
{TokType.EQ, 0x18}, {TokType.NEQ, 0x19}
};
void EmitStmt(Stmt s){
switch(s){
case AssignStmt a:
Symbol? symbol = null;
if (!syms.TryGetValue(a.Target, out symbol)) {
throw new Exception($"Undeclared variable '{a.Target}'");
}
EmitExpr(a.Expr);
EmitByte(0x03); EmitU16((ushort)syms[a.Target].Index);
EmitByte(0x03); EmitU16((ushort)symbol.Index);
break;
case IfStmt iff:
@ -293,9 +540,13 @@ public class BytecodeEmitter {
break;
case ForStmt f:
Symbol? forSymbol = null;
if (!syms.TryGetValue(f.Var, out forSymbol)) {
throw new Exception($"Undeclared variable '{f.Var}'");
}
// Initialisierung: var := start
EmitExpr(f.Start);
EmitByte(0x03); EmitU16((ushort)syms[f.Var].Index);
EmitByte(0x03); EmitU16((ushort)forSymbol.Index);
int cmpPos = code.Count; // Position des Vergleichs
EmitExpr(new VarExpr(f.Var));
@ -322,8 +573,6 @@ public class BytecodeEmitter {
EmitExpr(f.End);
EmitByte(0x03); EmitU16((ushort)syms[f.Var].Index);
break;
default: throw new Exception($"Unknown stmt {s.GetType().Name}");
}
}
@ -333,16 +582,18 @@ public class BytecodeEmitter {
case IntExpr ie:
int ci=AddConst(ie.Value);EmitByte(0x01);EmitU16((ushort)ci);break;
case VarExpr ve:
int vi=syms[ve.Name].Index;EmitByte(0x02);EmitU16((ushort)vi);break;
Symbol? symbol = null;
if (!syms.TryGetValue(ve.Name, out symbol)) {
throw new Exception($"Undeclared variable '{ve.Name}'");
}
EmitByte(0x02);EmitU16((ushort)symbol.Index);break;
case BinaryExpr be:
if (!opCodes.ContainsKey(be.Op)) {
throw new Exception($"Unknown operator '{be.Op}'");
}
EmitExpr(be.L);EmitExpr(be.R);
EmitByte(be.Op switch{
TokType.PLUS=>0x10,TokType.MINUS=>0x11,TokType.MUL=>0x12,TokType.DIV=>0x13,
TokType.LT=>0x14,TokType.GT=>0x15,TokType.LE=>0x16,TokType.GE=>0x17,
TokType.EQ=>0x18,TokType.NEQ=>0x19,
_=>throw new Exception("bad op")
}); break;
default: throw new Exception("bad expr");
EmitByte(opCodes[be.Op]);
break;
}
}