This commit is contained in:
2025-10-11 23:55:58 +02:00
parent 384f9c7197
commit f6aef1f0d1
11 changed files with 124 additions and 121 deletions

View File

@ -23,19 +23,19 @@ class Program {
// === AST === // === AST ===
public enum VarType { Int8=1, Int16=2, Int32=3, Byte=4, Bool=5 } public enum VarType { Int8=1, Int16=2, Int32=3, Byte=4, Bool=5 }
public abstract class StNode{}
public class ProgramNode:StNode{ public List<VarDecl> Vars=new(); public List<Stmt> Stmts=new(); }
public class VarDecl:StNode{ public string Name; public VarType Type; public Expr? Init; }
public abstract class StNode {} public abstract class Stmt:StNode{}
public class ProgramNode : StNode { public List<VarDecl> Vars=new(); public List<Stmt> Stmts=new(); } public class AssignStmt:Stmt{ public string Target; public Expr Expr; }
public class VarDecl : StNode { public string Name; public VarType Type; public Expr? Init; } public class IfStmt:Stmt{ public Expr Cond; public List<Stmt> ThenStmts=new(); public List<Stmt> ElseStmts=new(); }
public class WhileStmt:Stmt{ public Expr Cond; public List<Stmt> Body=new(); }
public abstract class Stmt : StNode {} public abstract class Expr:StNode{}
public class AssignStmt : Stmt { public string Target; public Expr Expr; } public class IntExpr:Expr{ public int Value; public IntExpr(int v){Value=v;} }
public class IfStmt : Stmt { public Expr Cond; public List<Stmt> ThenStmts=new(); public List<Stmt> ElseStmts=new(); } public class VarExpr:Expr{ public string Name; public VarExpr(string n){Name=n;} }
public class BinaryExpr:Expr{ public Expr L; public Expr R; public TokType Op; public BinaryExpr(Expr l,TokType op,Expr r){L=l;Op=op;R=r;} }
public abstract class Expr : StNode {}
public class IntExpr : Expr { public int Value; public IntExpr(int v){Value=v;} }
public class VarExpr : Expr { public string Name; public VarExpr(string n){Name=n;} }
public class BinaryExpr : Expr { public Expr L; public Expr R; public TokType Op; public BinaryExpr(Expr l, TokType op, Expr r){L=l;Op=op;R=r;} }
// === TOKENIZER === // === TOKENIZER ===
public enum TokType { public enum TokType {
@ -43,10 +43,11 @@ public enum TokType {
PLUS, MINUS, MUL, DIV, PLUS, MINUS, MUL, DIV,
LT, GT, LE, GE, EQ, NEQ, LT, GT, LE, GE, EQ, NEQ,
IF, THEN, ELSE, END_IF, IF, THEN, ELSE, END_IF,
WHILE, DO, END_WHILE,
PROGRAM, VAR, END_VAR, END_PROGRAM, PROGRAM, VAR, END_VAR, END_PROGRAM,
EOF EOF
} }
public class Token { public TokType Type; public string Text; public Token(TokType t,string s){Type=t;Text=s;} } public class Token{ public TokType Type; public string Text; public Token(TokType t,string s){Type=t;Text=s;} }
public class StLexer { public class StLexer {
private readonly string src; private int i; private readonly string src; private int i;
@ -58,39 +59,45 @@ public class StLexer {
while (char.IsWhiteSpace(Peek())) Next(); while (char.IsWhiteSpace(Peek())) Next();
if (Peek()=='\0') return new Token(TokType.EOF,""); if (Peek()=='\0') return new Token(TokType.EOF,"");
if (char.IsLetter(Peek())||Peek()=='_') { if (char.IsLetter(Peek())||Peek()=='_'){
var sb=new StringBuilder(); var sb=new StringBuilder();
while (char.IsLetterOrDigit(Peek())||Peek()=='_') sb.Append(Next()); while (char.IsLetterOrDigit(Peek())||Peek()=='_') sb.Append(Next());
var s=sb.ToString(); var s=sb.ToString().ToUpperInvariant();
switch(s.ToUpperInvariant()) { return s switch {
case "PROGRAM": return new Token(TokType.PROGRAM,s); "PROGRAM"=>new Token(TokType.PROGRAM,s),
case "VAR": return new Token(TokType.VAR,s); "VAR"=>new Token(TokType.VAR,s),
case "END_VAR": return new Token(TokType.END_VAR,s); "END_VAR"=>new Token(TokType.END_VAR,s),
case "END_PROGRAM": return new Token(TokType.END_PROGRAM,s); "END_PROGRAM"=>new Token(TokType.END_PROGRAM,s),
case "IF": return new Token(TokType.IF,s); "IF"=>new Token(TokType.IF,s),
case "THEN": return new Token(TokType.THEN,s); "THEN"=>new Token(TokType.THEN,s),
case "ELSE": return new Token(TokType.ELSE,s); "ELSE"=>new Token(TokType.ELSE,s),
case "END_IF": return new Token(TokType.END_IF,s); "END_IF"=>new Token(TokType.END_IF,s),
default: return new Token(TokType.IDENT,s); "WHILE"=>new Token(TokType.WHILE,s),
} "DO"=>new Token(TokType.DO,s),
"END_WHILE"=>new Token(TokType.END_WHILE,s),
_=>new Token(TokType.IDENT,s)
};
} }
if (char.IsDigit(Peek())) { if (char.IsDigit(Peek())) {
var sb=new StringBuilder(); var sb=new StringBuilder();
while (char.IsDigit(Peek())) sb.Append(Next()); while(char.IsDigit(Peek())) sb.Append(Next());
return new Token(TokType.INT,sb.ToString()); return new Token(TokType.INT,sb.ToString());
} }
// Operators and symbols if (Peek()==':'){ Next(); if(Peek()=='='){Next(); return new Token(TokType.ASSIGN,":=");} }
if (Peek()==':') { Next(); if(Peek()=='='){Next(); return new Token(TokType.ASSIGN,":=");} } if (Peek()=='<'){
if (Peek()=='<') {
Next(); Next();
if (Peek()=='='){Next(); return new Token(TokType.LE,"<=");} if (Peek()=='='){Next(); return new Token(TokType.LE,"<=");}
if (Peek()=='>'){Next(); return new Token(TokType.NEQ,"<>");} if (Peek()=='>'){Next(); return new Token(TokType.NEQ,"<>");}
return new Token(TokType.LT,"<"); return new Token(TokType.LT,"<");
} }
if (Peek()=='>') { Next(); if (Peek()=='='){Next(); return new Token(TokType.GE,">=");} return new Token(TokType.GT,">"); } if (Peek()=='>'){
if (Peek()=='=') { Next(); return new Token(TokType.EQ,"="); } Next();
if (Peek()=='='){Next(); return new Token(TokType.GE,">=");}
return new Token(TokType.GT,">");
}
if (Peek()=='='){Next();return new Token(TokType.EQ,"=");}
char c=Next(); char c=Next();
return c switch { return c switch {
@ -101,7 +108,7 @@ public class StLexer {
'-'=>new Token(TokType.MINUS,"-"), '-'=>new Token(TokType.MINUS,"-"),
'*'=>new Token(TokType.MUL,"*"), '*'=>new Token(TokType.MUL,"*"),
'/'=>new Token(TokType.DIV,"/"), '/'=>new Token(TokType.DIV,"/"),
_=>throw new Exception($"Unexpected char '{c}'") _=>throw new Exception($"Unexpected '{c}'")
}; };
} }
} }
@ -111,100 +118,100 @@ public class StParser {
StLexer lex; Token cur; StLexer lex; Token cur;
public StParser(string s){lex=new StLexer(s);cur=lex.NextToken();} public StParser(string s){lex=new StLexer(s);cur=lex.NextToken();}
void Next()=>cur=lex.NextToken(); void Next()=>cur=lex.NextToken();
void Expect(TokType t){ if(cur.Type!=t) throw new Exception($"Expected {t}, got {cur.Type}"); Next(); } void Expect(TokType t){if(cur.Type!=t)throw new Exception($"Expected {t}, got {cur.Type}");Next();}
public ProgramNode ParseProgram() { public ProgramNode ParseProgram(){
var p=new ProgramNode(); var p=new ProgramNode();
Expect(TokType.PROGRAM); Expect(TokType.PROGRAM);
if (cur.Type==TokType.IDENT) Next(); if(cur.Type==TokType.IDENT) Next();
if (cur.Type==TokType.VAR) { if(cur.Type==TokType.VAR){
Next(); Next();
while (cur.Type==TokType.IDENT) p.Vars.Add(ParseVarDecl()); while(cur.Type==TokType.IDENT) p.Vars.Add(ParseVarDecl());
Expect(TokType.END_VAR); Expect(TokType.END_VAR);
} }
while (cur.Type!=TokType.END_PROGRAM && cur.Type!=TokType.EOF) while(cur.Type!=TokType.END_PROGRAM&&cur.Type!=TokType.EOF)
p.Stmts.Add(ParseStmt()); p.Stmts.Add(ParseStmt());
Expect(TokType.END_PROGRAM); Expect(TokType.END_PROGRAM);
return p; return p;
} }
VarDecl ParseVarDecl() { VarDecl ParseVarDecl(){
string name=cur.Text; Expect(TokType.IDENT); string name=cur.Text; Expect(TokType.IDENT);
string tname=cur.Text.ToLowerInvariant(); Expect(TokType.IDENT); string tname=cur.Text.ToLowerInvariant(); Expect(TokType.IDENT);
VarType vt = tname switch { VarType vt=tname switch{
"int8"=>VarType.Int8, "int16"=>VarType.Int16, "int32"=>VarType.Int32, "int8"=>VarType.Int8,"int16"=>VarType.Int16,"int32"=>VarType.Int32,"byte"=>VarType.Byte,"bool"=>VarType.Bool,
"byte"=>VarType.Byte, "bool"=>VarType.Bool,
_=>throw new Exception($"Unknown type {tname}") _=>throw new Exception($"Unknown type {tname}")
}; };
Expr? init=null; Expr? init=null;
if (cur.Type==TokType.ASSIGN){ Next(); init=ParseExpr(); } if(cur.Type==TokType.ASSIGN){Next();init=ParseExpr();}
Expect(TokType.SEMI); Expect(TokType.SEMI);
return new VarDecl{Name=name,Type=vt,Init=init}; return new VarDecl{Name=name,Type=vt,Init=init};
} }
Stmt ParseStmt() { Stmt ParseStmt(){
if (cur.Type==TokType.IF) return ParseIf(); if(cur.Type==TokType.IF) return ParseIf();
if (cur.Type==TokType.IDENT) { if(cur.Type==TokType.WHILE) return ParseWhile();
if(cur.Type==TokType.IDENT){
var n=cur.Text; Next(); Expect(TokType.ASSIGN); var n=cur.Text; Next(); Expect(TokType.ASSIGN);
var e=ParseExpr(); Expect(TokType.SEMI); var e=ParseExpr(); Expect(TokType.SEMI);
return new AssignStmt{Target=n,Expr=e}; return new AssignStmt{Target=n,Expr=e};
} }
throw new Exception($"Unexpected token in statement: {cur.Type}"); throw new Exception($"Unexpected token {cur.Type} in stmt");
} }
IfStmt ParseIf() { IfStmt ParseIf(){
Next(); // consume IF Next();
var cond = ParseExpr(); var cond=ParseExpr(); Expect(TokType.THEN);
Expect(TokType.THEN); var node=new IfStmt{Cond=cond};
var ifs = new IfStmt{Cond=cond}; while(cur.Type!=TokType.ELSE&&cur.Type!=TokType.END_IF)
while(cur.Type!=TokType.ELSE && cur.Type!=TokType.END_IF) node.ThenStmts.Add(ParseStmt());
ifs.ThenStmts.Add(ParseStmt()); if(cur.Type==TokType.ELSE){
if (cur.Type==TokType.ELSE) {
Next(); Next();
while(cur.Type!=TokType.END_IF) while(cur.Type!=TokType.END_IF)
ifs.ElseStmts.Add(ParseStmt()); node.ElseStmts.Add(ParseStmt());
} }
Expect(TokType.END_IF); Expect(TokType.END_IF); Expect(TokType.SEMI);
Expect(TokType.SEMI); return node;
return ifs;
} }
Expr ParseExpr() => ParseCompare(); WhileStmt ParseWhile(){
Next(); // WHILE
var cond=ParseExpr(); Expect(TokType.DO);
var ws=new WhileStmt{Cond=cond};
while(cur.Type!=TokType.END_WHILE)
ws.Body.Add(ParseStmt());
Expect(TokType.END_WHILE); Expect(TokType.SEMI);
return ws;
}
Expr ParseCompare() { Expr ParseExpr()=>ParseCompare();
Expr ParseCompare(){
var l=ParseAddSub(); var l=ParseAddSub();
while(cur.Type is TokType.LT or TokType.GT or TokType.LE or TokType.GE or TokType.EQ or TokType.NEQ) { while(cur.Type is TokType.LT or TokType.GT or TokType.LE or TokType.GE or TokType.EQ or TokType.NEQ){
var op=cur.Type; Next(); var op=cur.Type; Next();
var r=ParseAddSub(); var r=ParseAddSub();
l=new BinaryExpr(l,op,r); l=new BinaryExpr(l,op,r);
} }
return l; return l;
} }
Expr ParseAddSub(){
Expr ParseAddSub() {
var l=ParseMulDiv(); var l=ParseMulDiv();
while(cur.Type==TokType.PLUS||cur.Type==TokType.MINUS){ while(cur.Type==TokType.PLUS||cur.Type==TokType.MINUS){
var op=cur.Type;Next(); var op=cur.Type;Next();var r=ParseMulDiv();l=new BinaryExpr(l,op,r);
var r=ParseMulDiv();
l=new BinaryExpr(l,op,r);
} }
return l; return l;
} }
Expr ParseMulDiv(){
Expr ParseMulDiv() {
var l=ParsePrimary(); var l=ParsePrimary();
while(cur.Type==TokType.MUL||cur.Type==TokType.DIV){ while(cur.Type==TokType.MUL||cur.Type==TokType.DIV){
var op=cur.Type;Next(); var op=cur.Type;Next();var r=ParsePrimary();l=new BinaryExpr(l,op,r);
var r=ParsePrimary();
l=new BinaryExpr(l,op,r);
} }
return l; return l;
} }
Expr ParsePrimary(){
Expr ParsePrimary() { if(cur.Type==TokType.INT){int v=int.Parse(cur.Text);Next();return new IntExpr(v);}
if (cur.Type==TokType.INT){int v=int.Parse(cur.Text);Next();return new IntExpr(v);} if(cur.Type==TokType.IDENT){string n=cur.Text;Next();return new VarExpr(n);}
if (cur.Type==TokType.IDENT){string n=cur.Text;Next();return new VarExpr(n);} if(cur.Type==TokType.LPAREN){Next();var e=ParseExpr();Expect(TokType.RPAREN);return e;}
if (cur.Type==TokType.LPAREN){Next();var e=ParseExpr();Expect(TokType.RPAREN);return e;}
throw new Exception($"Unexpected token {cur.Type}"); throw new Exception($"Unexpected token {cur.Type}");
} }
} }
@ -221,7 +228,7 @@ public class BytecodeEmitter {
syms[v.Name]=new Symbol{Name=v.Name,Type=v.Type,Index=idx++}; syms[v.Name]=new Symbol{Name=v.Name,Type=v.Type,Index=idx++};
foreach(var v in p.Vars) foreach(var v in p.Vars)
if(v.Init!=null){ EmitExpr(v.Init); EmitByte(0x03); EmitU16((ushort)syms[v.Name].Index); } if(v.Init!=null){EmitExpr(v.Init);EmitByte(0x03);EmitU16((ushort)syms[v.Name].Index);}
foreach(var s in p.Stmts) foreach(var s in p.Stmts)
EmitStmt(s); EmitStmt(s);
@ -235,43 +242,47 @@ public class BytecodeEmitter {
EmitExpr(a.Expr); EmitExpr(a.Expr);
EmitByte(0x03); EmitU16((ushort)syms[a.Target].Index); EmitByte(0x03); EmitU16((ushort)syms[a.Target].Index);
break; break;
case IfStmt iff: case IfStmt iff:
EmitExpr(iff.Cond); EmitExpr(iff.Cond);
EmitByte(0x20); // JZ (jump if zero) EmitByte(0x20); int jz=code.Count; EmitU16(0);
int jzPos = code.Count; EmitU16(0);
foreach(var st in iff.ThenStmts) EmitStmt(st); foreach(var st in iff.ThenStmts) EmitStmt(st);
if (iff.ElseStmts.Count>0){ if(iff.ElseStmts.Count>0){
EmitByte(0x21); // JMP unconditional EmitByte(0x21); int jmp=code.Count; EmitU16(0);
int jmpPos = code.Count; EmitU16(0); PatchJump(jz,code.Count);
PatchJump(jzPos, code.Count);
foreach(var st in iff.ElseStmts) EmitStmt(st); foreach(var st in iff.ElseStmts) EmitStmt(st);
PatchJump(jmpPos, code.Count); PatchJump(jmp,code.Count);
} else { } else PatchJump(jz,code.Count);
PatchJump(jzPos, code.Count);
}
break; break;
default:
throw new Exception($"Unhandled stmt {s.GetType().Name}"); case WhileStmt w:
int loopStart=code.Count;
EmitExpr(w.Cond);
EmitByte(0x20); int jzpos=code.Count; EmitU16(0);
foreach(var st in w.Body) EmitStmt(st);
EmitByte(0x21); EmitU16((ushort)loopStart);
PatchJump(jzpos,code.Count);
break;
default: throw new Exception($"Unknown stmt {s.GetType().Name}");
} }
} }
void PatchJump(int pos,int target){ code[pos]=(byte)(target&0xFF); code[pos+1]=(byte)(target>>8); } void PatchJump(int pos,int target){code[pos]=(byte)(target&0xFF);code[pos+1]=(byte)(target>>8);}
void EmitExpr(Expr e){ void EmitExpr(Expr e){
switch(e){ switch(e){
case IntExpr ie: case IntExpr ie:
int ci=AddConst(ie.Value); EmitByte(0x01); EmitU16((ushort)ci); break; int ci=AddConst(ie.Value);EmitByte(0x01);EmitU16((ushort)ci);break;
case VarExpr ve: case VarExpr ve:
int vi=syms[ve.Name].Index; EmitByte(0x02); EmitU16((ushort)vi); break; int vi=syms[ve.Name].Index;EmitByte(0x02);EmitU16((ushort)vi);break;
case BinaryExpr be: case BinaryExpr be:
EmitExpr(be.L); EmitExpr(be.R); EmitExpr(be.L);EmitExpr(be.R);
EmitByte(be.Op switch { EmitByte(be.Op switch{
TokType.PLUS=>0x10, TokType.MINUS=>0x11, TokType.MUL=>0x12, TokType.DIV=>0x13, TokType.PLUS=>0x10,TokType.MINUS=>0x11,TokType.MUL=>0x12,TokType.DIV=>0x13,
TokType.LT=>0x14, TokType.GT=>0x15, TokType.LE=>0x16, TokType.GE=>0x17, TokType.LT=>0x14,TokType.GT=>0x15,TokType.LE=>0x16,TokType.GE=>0x17,
TokType.EQ=>0x18, TokType.NEQ=>0x19, TokType.EQ=>0x18,TokType.NEQ=>0x19,
_=>throw new Exception($"Bad op {be.Op}") _=>throw new Exception("bad op")
}); }); break;
break;
default: throw new Exception("bad expr"); default: throw new Exception("bad expr");
} }
} }
@ -279,19 +290,15 @@ public class BytecodeEmitter {
int AddConst(int v){int i=consts.IndexOf(v);if(i>=0)return i;consts.Add(v);return consts.Count-1;} int AddConst(int v){int i=consts.IndexOf(v);if(i>=0)return i;consts.Add(v);return consts.Count-1;}
void EmitByte(byte b)=>code.Add(b); void EmitByte(byte b)=>code.Add(b);
void EmitU16(ushort v){code.Add((byte)(v&0xFF));code.Add((byte)(v>>8));} void EmitU16(ushort v){code.Add((byte)(v&0xFF));code.Add((byte)(v>>8));}
public byte[] BuildBinary(){ public byte[] BuildBinary(){
using var ms=new MemoryStream(); using var ms=new MemoryStream();var w=new BinaryWriter(ms);
var w=new BinaryWriter(ms);
w.Write(Encoding.ASCII.GetBytes("STBC")); w.Write(Encoding.ASCII.GetBytes("STBC"));
w.Write((ushort)1); w.Write((ushort)1);
w.Write((ushort)consts.Count); w.Write((ushort)consts.Count);foreach(var c in consts)w.Write(c);
foreach(var c in consts) w.Write(c);
w.Write((ushort)syms.Count); w.Write((ushort)syms.Count);
var types=new byte[syms.Count]; var types=new byte[syms.Count];foreach(var kv in syms)types[kv.Value.Index]=(byte)kv.Value.Type;
foreach(var kv in syms) types[kv.Value.Index]=(byte)kv.Value.Type; foreach(var b in types)w.Write(b);
foreach(var b in types) w.Write(b); w.Write((ushort)code.Count);w.Write(code.ToArray());
w.Write((ushort)code.Count); w.Write(code.ToArray());
return ms.ToArray(); return ms.ToArray();
} }
} }

Binary file not shown.

Binary file not shown.

View File

@ -1,14 +1,10 @@
PROGRAM Demo PROGRAM Demo
VAR VAR
a int16 := 3; a int16 := 0;
b int16 := 5;
c int16;
END_VAR END_VAR
IF a < b THEN WHILE a < 5 DO
c := b - a; a := a + 1;
ELSE END_WHILE;
c := a - b;
END_IF;
END_PROGRAM END_PROGRAM

View File

@ -13,7 +13,7 @@ using System.Reflection;
[assembly: System.Reflection.AssemblyCompanyAttribute("Compiler")] [assembly: System.Reflection.AssemblyCompanyAttribute("Compiler")]
[assembly: System.Reflection.AssemblyConfigurationAttribute("Debug")] [assembly: System.Reflection.AssemblyConfigurationAttribute("Debug")]
[assembly: System.Reflection.AssemblyFileVersionAttribute("1.0.0.0")] [assembly: System.Reflection.AssemblyFileVersionAttribute("1.0.0.0")]
[assembly: System.Reflection.AssemblyInformationalVersionAttribute("1.0.0+0e549b6e0a943de51ec80a7d726000f16f5498dd")] [assembly: System.Reflection.AssemblyInformationalVersionAttribute("1.0.0+384f9c719795a70259bf409ac57d221baf7a89b1")]
[assembly: System.Reflection.AssemblyProductAttribute("Compiler")] [assembly: System.Reflection.AssemblyProductAttribute("Compiler")]
[assembly: System.Reflection.AssemblyTitleAttribute("Compiler")] [assembly: System.Reflection.AssemblyTitleAttribute("Compiler")]
[assembly: System.Reflection.AssemblyVersionAttribute("1.0.0.0")] [assembly: System.Reflection.AssemblyVersionAttribute("1.0.0.0")]

View File

@ -1 +1 @@
f8051df38f7774448e56868f6a91a5557659f77e4a8558098bdfd3e3ba758ea4 42e892e3dd3396a02f45555f279acee4d0083d66ab01ce9b1baf7c02d466a01c

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
out.bin

Binary file not shown.