305 lines
12 KiB
C#
305 lines
12 KiB
C#
using System;
|
|
using System.IO;
|
|
using System.Text;
|
|
using System.Collections.Generic;
|
|
|
|
class Program {
|
|
static int Main(string[] args) {
|
|
if (args.Length < 2) {
|
|
Console.WriteLine("Usage: StEmitter <input.st> <output.bin>");
|
|
return 1;
|
|
}
|
|
|
|
var input = File.ReadAllText(args[0]);
|
|
var parser = new StParser(input);
|
|
var prog = parser.ParseProgram();
|
|
var emitter = new BytecodeEmitter();
|
|
emitter.Compile(prog);
|
|
File.WriteAllBytes(args[1], emitter.BuildBinary());
|
|
Console.WriteLine($"Wrote {args[1]}: consts={emitter.ConstantsCount}, vars={emitter.VarCount}, code={emitter.CodeLength}");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
// === AST ===
|
|
public enum VarType { Int8=1, Int16=2, Int32=3, Byte=4, Bool=5 }
|
|
public abstract class StNode{}
|
|
public class ProgramNode:StNode{ public List<VarDecl> Vars=new(); public List<Stmt> Stmts=new(); }
|
|
public class VarDecl:StNode{ public string Name; public VarType Type; public Expr? Init; }
|
|
|
|
public abstract class Stmt:StNode{}
|
|
public class AssignStmt:Stmt{ public string Target; public Expr Expr; }
|
|
public class IfStmt:Stmt{ public Expr Cond; public List<Stmt> ThenStmts=new(); public List<Stmt> ElseStmts=new(); }
|
|
public class WhileStmt:Stmt{ public Expr Cond; public List<Stmt> Body=new(); }
|
|
|
|
public abstract class Expr:StNode{}
|
|
public class IntExpr:Expr{ public int Value; public IntExpr(int v){Value=v;} }
|
|
public class VarExpr:Expr{ public string Name; public VarExpr(string n){Name=n;} }
|
|
public class BinaryExpr:Expr{ public Expr L; public Expr R; public TokType Op; public BinaryExpr(Expr l,TokType op,Expr r){L=l;Op=op;R=r;} }
|
|
|
|
// === TOKENIZER ===
|
|
public enum TokType {
|
|
IDENT, INT, ASSIGN, SEMI, LPAREN, RPAREN,
|
|
PLUS, MINUS, MUL, DIV,
|
|
LT, GT, LE, GE, EQ, NEQ,
|
|
IF, THEN, ELSE, END_IF,
|
|
WHILE, DO, END_WHILE,
|
|
PROGRAM, VAR, END_VAR, END_PROGRAM,
|
|
EOF
|
|
}
|
|
public class Token{ public TokType Type; public string Text; public Token(TokType t,string s){Type=t;Text=s;} }
|
|
|
|
public class StLexer {
|
|
private readonly string src; private int i;
|
|
public StLexer(string s){src=s;}
|
|
char Peek()=> i<src.Length?src[i]:'\0';
|
|
char Next()=> i<src.Length?src[i++]:'\0';
|
|
|
|
public Token NextToken() {
|
|
while (char.IsWhiteSpace(Peek())) Next();
|
|
if (Peek()=='\0') return new Token(TokType.EOF,"");
|
|
|
|
if (char.IsLetter(Peek())||Peek()=='_'){
|
|
var sb=new StringBuilder();
|
|
while (char.IsLetterOrDigit(Peek())||Peek()=='_') sb.Append(Next());
|
|
var s=sb.ToString().ToUpperInvariant();
|
|
return s switch {
|
|
"PROGRAM"=>new Token(TokType.PROGRAM,s),
|
|
"VAR"=>new Token(TokType.VAR,s),
|
|
"END_VAR"=>new Token(TokType.END_VAR,s),
|
|
"END_PROGRAM"=>new Token(TokType.END_PROGRAM,s),
|
|
"IF"=>new Token(TokType.IF,s),
|
|
"THEN"=>new Token(TokType.THEN,s),
|
|
"ELSE"=>new Token(TokType.ELSE,s),
|
|
"END_IF"=>new Token(TokType.END_IF,s),
|
|
"WHILE"=>new Token(TokType.WHILE,s),
|
|
"DO"=>new Token(TokType.DO,s),
|
|
"END_WHILE"=>new Token(TokType.END_WHILE,s),
|
|
_=>new Token(TokType.IDENT,s)
|
|
};
|
|
}
|
|
|
|
if (char.IsDigit(Peek())) {
|
|
var sb=new StringBuilder();
|
|
while(char.IsDigit(Peek())) sb.Append(Next());
|
|
return new Token(TokType.INT,sb.ToString());
|
|
}
|
|
|
|
if (Peek()==':'){ Next(); if(Peek()=='='){Next(); return new Token(TokType.ASSIGN,":=");} }
|
|
if (Peek()=='<'){
|
|
Next();
|
|
if (Peek()=='='){Next(); return new Token(TokType.LE,"<=");}
|
|
if (Peek()=='>'){Next(); return new Token(TokType.NEQ,"<>");}
|
|
return new Token(TokType.LT,"<");
|
|
}
|
|
if (Peek()=='>'){
|
|
Next();
|
|
if (Peek()=='='){Next(); return new Token(TokType.GE,">=");}
|
|
return new Token(TokType.GT,">");
|
|
}
|
|
if (Peek()=='='){Next();return new Token(TokType.EQ,"=");}
|
|
|
|
char c=Next();
|
|
return c switch {
|
|
';'=>new Token(TokType.SEMI,";"),
|
|
'('=>new Token(TokType.LPAREN,"("),
|
|
')'=>new Token(TokType.RPAREN,")"),
|
|
'+'=>new Token(TokType.PLUS,"+"),
|
|
'-'=>new Token(TokType.MINUS,"-"),
|
|
'*'=>new Token(TokType.MUL,"*"),
|
|
'/'=>new Token(TokType.DIV,"/"),
|
|
_=>throw new Exception($"Unexpected '{c}'")
|
|
};
|
|
}
|
|
}
|
|
|
|
// === PARSER ===
|
|
public class StParser {
|
|
StLexer lex; Token cur;
|
|
public StParser(string s){lex=new StLexer(s);cur=lex.NextToken();}
|
|
void Next()=>cur=lex.NextToken();
|
|
void Expect(TokType t){if(cur.Type!=t)throw new Exception($"Expected {t}, got {cur.Type}");Next();}
|
|
|
|
public ProgramNode ParseProgram(){
|
|
var p=new ProgramNode();
|
|
Expect(TokType.PROGRAM);
|
|
if(cur.Type==TokType.IDENT) Next();
|
|
if(cur.Type==TokType.VAR){
|
|
Next();
|
|
while(cur.Type==TokType.IDENT) p.Vars.Add(ParseVarDecl());
|
|
Expect(TokType.END_VAR);
|
|
}
|
|
while(cur.Type!=TokType.END_PROGRAM&&cur.Type!=TokType.EOF)
|
|
p.Stmts.Add(ParseStmt());
|
|
Expect(TokType.END_PROGRAM);
|
|
return p;
|
|
}
|
|
|
|
VarDecl ParseVarDecl(){
|
|
string name=cur.Text; Expect(TokType.IDENT);
|
|
string tname=cur.Text.ToLowerInvariant(); Expect(TokType.IDENT);
|
|
VarType vt=tname switch{
|
|
"int8"=>VarType.Int8,"int16"=>VarType.Int16,"int32"=>VarType.Int32,"byte"=>VarType.Byte,"bool"=>VarType.Bool,
|
|
_=>throw new Exception($"Unknown type {tname}")
|
|
};
|
|
Expr? init=null;
|
|
if(cur.Type==TokType.ASSIGN){Next();init=ParseExpr();}
|
|
Expect(TokType.SEMI);
|
|
return new VarDecl{Name=name,Type=vt,Init=init};
|
|
}
|
|
|
|
Stmt ParseStmt(){
|
|
if(cur.Type==TokType.IF) return ParseIf();
|
|
if(cur.Type==TokType.WHILE) return ParseWhile();
|
|
if(cur.Type==TokType.IDENT){
|
|
var n=cur.Text; Next(); Expect(TokType.ASSIGN);
|
|
var e=ParseExpr(); Expect(TokType.SEMI);
|
|
return new AssignStmt{Target=n,Expr=e};
|
|
}
|
|
throw new Exception($"Unexpected token {cur.Type} in stmt");
|
|
}
|
|
|
|
IfStmt ParseIf(){
|
|
Next();
|
|
var cond=ParseExpr(); Expect(TokType.THEN);
|
|
var node=new IfStmt{Cond=cond};
|
|
while(cur.Type!=TokType.ELSE&&cur.Type!=TokType.END_IF)
|
|
node.ThenStmts.Add(ParseStmt());
|
|
if(cur.Type==TokType.ELSE){
|
|
Next();
|
|
while(cur.Type!=TokType.END_IF)
|
|
node.ElseStmts.Add(ParseStmt());
|
|
}
|
|
Expect(TokType.END_IF); Expect(TokType.SEMI);
|
|
return node;
|
|
}
|
|
|
|
WhileStmt ParseWhile(){
|
|
Next(); // WHILE
|
|
var cond=ParseExpr(); Expect(TokType.DO);
|
|
var ws=new WhileStmt{Cond=cond};
|
|
while(cur.Type!=TokType.END_WHILE)
|
|
ws.Body.Add(ParseStmt());
|
|
Expect(TokType.END_WHILE); Expect(TokType.SEMI);
|
|
return ws;
|
|
}
|
|
|
|
Expr ParseExpr()=>ParseCompare();
|
|
Expr ParseCompare(){
|
|
var l=ParseAddSub();
|
|
while(cur.Type is TokType.LT or TokType.GT or TokType.LE or TokType.GE or TokType.EQ or TokType.NEQ){
|
|
var op=cur.Type; Next();
|
|
var r=ParseAddSub();
|
|
l=new BinaryExpr(l,op,r);
|
|
}
|
|
return l;
|
|
}
|
|
Expr ParseAddSub(){
|
|
var l=ParseMulDiv();
|
|
while(cur.Type==TokType.PLUS||cur.Type==TokType.MINUS){
|
|
var op=cur.Type;Next();var r=ParseMulDiv();l=new BinaryExpr(l,op,r);
|
|
}
|
|
return l;
|
|
}
|
|
Expr ParseMulDiv(){
|
|
var l=ParsePrimary();
|
|
while(cur.Type==TokType.MUL||cur.Type==TokType.DIV){
|
|
var op=cur.Type;Next();var r=ParsePrimary();l=new BinaryExpr(l,op,r);
|
|
}
|
|
return l;
|
|
}
|
|
Expr ParsePrimary(){
|
|
if(cur.Type==TokType.INT){int v=int.Parse(cur.Text);Next();return new IntExpr(v);}
|
|
if(cur.Type==TokType.IDENT){string n=cur.Text;Next();return new VarExpr(n);}
|
|
if(cur.Type==TokType.LPAREN){Next();var e=ParseExpr();Expect(TokType.RPAREN);return e;}
|
|
throw new Exception($"Unexpected token {cur.Type}");
|
|
}
|
|
}
|
|
|
|
// === BYTECODE ===
|
|
public class BytecodeEmitter {
|
|
List<int> consts=new(); Dictionary<string,Symbol> syms=new(); List<byte> code=new();
|
|
class Symbol{public string Name;public VarType Type;public int Index;}
|
|
public int ConstantsCount=>consts.Count; public int VarCount=>syms.Count; public int CodeLength=>code.Count;
|
|
|
|
public void Compile(ProgramNode p){
|
|
int idx=0;
|
|
foreach(var v in p.Vars)
|
|
syms[v.Name]=new Symbol{Name=v.Name,Type=v.Type,Index=idx++};
|
|
|
|
foreach(var v in p.Vars)
|
|
if(v.Init!=null){EmitExpr(v.Init);EmitByte(0x03);EmitU16((ushort)syms[v.Name].Index);}
|
|
|
|
foreach(var s in p.Stmts)
|
|
EmitStmt(s);
|
|
|
|
EmitByte(0xF0);
|
|
}
|
|
|
|
void EmitStmt(Stmt s){
|
|
switch(s){
|
|
case AssignStmt a:
|
|
EmitExpr(a.Expr);
|
|
EmitByte(0x03); EmitU16((ushort)syms[a.Target].Index);
|
|
break;
|
|
|
|
case IfStmt iff:
|
|
EmitExpr(iff.Cond);
|
|
EmitByte(0x20); int jz=code.Count; EmitU16(0);
|
|
foreach(var st in iff.ThenStmts) EmitStmt(st);
|
|
if(iff.ElseStmts.Count>0){
|
|
EmitByte(0x21); int jmp=code.Count; EmitU16(0);
|
|
PatchJump(jz,code.Count);
|
|
foreach(var st in iff.ElseStmts) EmitStmt(st);
|
|
PatchJump(jmp,code.Count);
|
|
} else PatchJump(jz,code.Count);
|
|
break;
|
|
|
|
case WhileStmt w:
|
|
int loopStart=code.Count;
|
|
EmitExpr(w.Cond);
|
|
EmitByte(0x20); int jzpos=code.Count; EmitU16(0);
|
|
foreach(var st in w.Body) EmitStmt(st);
|
|
EmitByte(0x21); EmitU16((ushort)loopStart);
|
|
PatchJump(jzpos,code.Count);
|
|
break;
|
|
|
|
default: throw new Exception($"Unknown stmt {s.GetType().Name}");
|
|
}
|
|
}
|
|
|
|
void PatchJump(int pos,int target){code[pos]=(byte)(target&0xFF);code[pos+1]=(byte)(target>>8);}
|
|
void EmitExpr(Expr e){
|
|
switch(e){
|
|
case IntExpr ie:
|
|
int ci=AddConst(ie.Value);EmitByte(0x01);EmitU16((ushort)ci);break;
|
|
case VarExpr ve:
|
|
int vi=syms[ve.Name].Index;EmitByte(0x02);EmitU16((ushort)vi);break;
|
|
case BinaryExpr be:
|
|
EmitExpr(be.L);EmitExpr(be.R);
|
|
EmitByte(be.Op switch{
|
|
TokType.PLUS=>0x10,TokType.MINUS=>0x11,TokType.MUL=>0x12,TokType.DIV=>0x13,
|
|
TokType.LT=>0x14,TokType.GT=>0x15,TokType.LE=>0x16,TokType.GE=>0x17,
|
|
TokType.EQ=>0x18,TokType.NEQ=>0x19,
|
|
_=>throw new Exception("bad op")
|
|
}); break;
|
|
default: throw new Exception("bad expr");
|
|
}
|
|
}
|
|
|
|
int AddConst(int v){int i=consts.IndexOf(v);if(i>=0)return i;consts.Add(v);return consts.Count-1;}
|
|
void EmitByte(byte b)=>code.Add(b);
|
|
void EmitU16(ushort v){code.Add((byte)(v&0xFF));code.Add((byte)(v>>8));}
|
|
public byte[] BuildBinary(){
|
|
using var ms=new MemoryStream();var w=new BinaryWriter(ms);
|
|
w.Write(Encoding.ASCII.GetBytes("STBC"));
|
|
w.Write((ushort)1);
|
|
w.Write((ushort)consts.Count);foreach(var c in consts)w.Write(c);
|
|
w.Write((ushort)syms.Count);
|
|
var types=new byte[syms.Count];foreach(var kv in syms)types[kv.Value.Index]=(byte)kv.Value.Type;
|
|
foreach(var b in types)w.Write(b);
|
|
w.Write((ushort)code.Count);w.Write(code.ToArray());
|
|
return ms.ToArray();
|
|
}
|
|
}
|