internal Dfa Target(char ch) { // construct or lookup the target for a new arc Dfa n = new Dfa(m_tks); for (NList pos = m_nfa; !pos.AtEnd; pos = pos.m_next) { pos.m_node.AddTarget(ch, n); } // check we actually got something if (n.m_nfa.AtEnd) { return(null); } n.Closure(); // now check we haven't got it already for (int pos1 = 0; pos1 < m_tks.states.Count; pos1++) { if (((Dfa)m_tks.states[pos1]).SameAs(n)) { return((Dfa)m_tks.states[pos1]); } } // this is a brand new Dfa node so recursively build it n.AddActions(); return(n); }
// helper for building DFa public void AddTarget(char ch, Dfa next) { for (int j = 0; j < m_arcs.Count; j++) { Arc a = (Arc)m_arcs[j]; if (a.Match(ch)) { next.AddNfaNode(a.m_next); } } }
public static void SetTokens(YyLexer tks, Hashtable h) { foreach (Dfa dfa in (IEnumerable)h.Values) { if (dfa.m_tokens == null) { dfa.m_tokens = tks; Dfa.SetTokens(tks, dfa.m_map); } } }
internal bool SameAs(Dfa dfa) { NList nlist1 = this.m_nfa; NList nlist2; for (nlist2 = dfa.m_nfa; nlist1.m_node == nlist2.m_node && !nlist1.AtEnd; nlist2 = nlist2.m_next) { nlist1 = nlist1.m_next; } return(nlist1.m_node == nlist2.m_node); }
internal bool SameAs(Dfa dfa) { NList pos1 = m_nfa; NList pos2 = dfa.m_nfa; while (pos1.m_node == pos2.m_node && !pos1.AtEnd) { pos1 = pos1.m_next; pos2 = pos2.m_next; } return(pos1.m_node == pos2.m_node); }
public static void SetTokens(YyLexer tks, Hashtable h) // needed after deserialisation { foreach (Dfa v in h.Values) { if (v.m_tokens != null) { continue; } v.m_tokens = tks; Dfa.SetTokens(tks, v.m_map); } }
internal void AddActions() { this.m_tks.states.Add((object)this); foreach (Charset charset in (IEnumerable)this.m_tks.m_tokens.cats.Values) { foreach (char key in (IEnumerable)charset.m_chars.Keys) { Dfa dfa = this.Target(key); if (dfa != null) { this.m_map[(object)key] = (object)dfa; } } } }
// Deserializing public void GetDfa() { if (tokens.Count > 0) { return; } Serialiser f = new Serialiser(arr); m_encoding = (Encoding)f.Deserialise(); toupper = (bool)f.Deserialise(); cats = (Hashtable)f.Deserialise(); m_gencat = (UnicodeCategory)f.Deserialise(); usingEOF = (bool)f.Deserialise(); starts = (Hashtable)f.Deserialise(); Dfa.SetTokens(this, starts); tokens = (Hashtable)f.Deserialise(); }
bool TryActions(Dfa dfa, ref TOKEN tok) { int len = m_pch - m_startMatch; if (len == 0) { return(false); } if (m_startMatch + len <= m_buf.Length) { yytext = m_buf.Substring(m_startMatch, len); } else // can happen with {EOF} rules { yytext = m_buf.Substring(m_startMatch); } // actions is a list of old-style actions for this DFA in order of priority // there is a list because of the chance that any of them may REJECT Dfa.Action a = dfa.m_actions; bool reject = true; while (reject && a != null) { int action = a.a_act; reject = false; a = a.a_next; if (a == null && dfa.m_tokClass != "") { // last one might not be an old-style action if (m_debug) { Console.WriteLine("creating a " + dfa.m_tokClass); } tok = (TOKEN)Tfactory.create(dfa.m_tokClass, this); } else { tok = m_tokens.OldAction(this, ref yytext, action, ref reject); if (m_debug && !reject) { Console.WriteLine("Old action " + action); } } } return(!reject); }
internal NList m_nfa = new NList(); // nfa nodes in m_state order internal void AddActions() { // This routine is called for a new DFA node m_tks.states.Add(this); // Follow all the arcs from here foreach (Charset cs in m_tks.m_tokens.cats.Values) { foreach (char j in cs.m_chars.Keys) { Dfa dfa = Target(j); if (dfa != null) { m_map[j] = dfa; } } } }
// match a Dfa agsint a given string public int Match(string str, int ix, ref int action) { // return number of chars matched int r = 0; Dfa dfa = null; // if there is no arc or the string is exhausted, this is okay at a terminal if (ix >= str.Length || (dfa = ((Dfa)m_map[m_tokens.Filter(str[ix])])) == null || (r = dfa.Match(str, ix + 1, ref action)) < 0) { if (m_actions != null) { action = m_actions.a_act; return(0); } return(-1); } return(r + 1); }
public void GetDfa() { if (this.tokens.Count > 0) { return; } Serialiser serialiser = new Serialiser(this.arr); serialiser.VersionCheck(); this.m_encoding = (Encoding)serialiser.Deserialise(); this.toupper = (bool)serialiser.Deserialise(); this.cats = (Hashtable)serialiser.Deserialise(); this.m_gencat = (UnicodeCategory)serialiser.Deserialise(); this.usingEOF = (bool)serialiser.Deserialise(); this.starts = (Hashtable)serialiser.Deserialise(); Dfa.SetTokens(this, this.starts); this.tokens = (Hashtable)serialiser.Deserialise(); this.reswds = (Hashtable)serialiser.Deserialise(); }
public static object Serialise(object o, Serialiser s) { if (s == null) { return(new Dfa()); } Dfa d = (Dfa)o; if (s.Encode) { s.Serialise(d.m_state); s.Serialise(d.m_map); s.Serialise(d.m_actions); s.Serialise(d.m_tokClass); return(null); } d.m_state = (int)s.Deserialise(); d.m_map = (Hashtable)s.Deserialise(); d.m_actions = (Action)s.Deserialise(); d.m_tokClass = (string)s.Deserialise(); return(d); }
internal Dfa Target(char ch) { Dfa dfa = new Dfa(this.m_tks); for (NList nlist = this.m_nfa; !nlist.AtEnd; nlist = nlist.m_next) { nlist.m_node.AddTarget(ch, dfa); } if (dfa.m_nfa.AtEnd) { return((Dfa)null); } dfa.Closure(); for (int index = 0; index < this.m_tks.states.Count; ++index) { if (((Dfa)this.m_tks.states[index]).SameAs(dfa)) { return((Dfa)this.m_tks.states[index]); } } dfa.AddActions(); return(dfa); }
private bool TryActions(Dfa dfa, ref TOKEN tok) { int length = this.m_pch - this.m_startMatch; if (length == 0) { return(false); } this.yytext = this.m_startMatch + length > this.m_buf.Length ? this.m_buf.Substring(this.m_startMatch) : this.m_buf.Substring(this.m_startMatch, length); Dfa.Action action = dfa.m_actions; bool reject = true; while (reject && action != null) { int aAct = action.a_act; reject = false; action = action.a_next; if (action == null && dfa.m_tokClass != "") { if (this.m_debug) { Console.WriteLine("creating a " + dfa.m_tokClass); } tok = (TOKEN)Tfactory.create(dfa.m_tokClass, this); } else { tok = this.m_tokens.OldAction(this, ref this.yytext, aAct, ref reject); if (this.m_debug && !reject) { Console.WriteLine("Old action " + (object)aAct); } } } return(!reject); }
public static object Serialise(object o, Serialiser s) { if (s == null) { return((object)new Dfa()); } Dfa dfa = (Dfa)o; if (s.Encode) { s.Serialise((object)dfa.m_state); s.Serialise((object)dfa.m_map); s.Serialise((object)dfa.m_actions); s.Serialise((object)dfa.m_tokClass); s.Serialise((object)dfa.m_reswds); return((object)null); } dfa.m_state = (int)s.Deserialise(); dfa.m_map = (Hashtable)s.Deserialise(); dfa.m_actions = (Dfa.Action)s.Deserialise(); dfa.m_tokClass = (string)s.Deserialise(); dfa.m_reswds = (int)s.Deserialise(); return((object)dfa); }
private bool Match(ref TOKEN tok, Dfa dfa) { char ch = this.PeekChar(); int pch = this.m_pch; int mark = 0; if (this.m_debug) { Console.Write("state {0} with ", (object)dfa.m_state); if (char.IsLetterOrDigit(ch) || char.IsPunctuation(ch)) { Console.WriteLine(ch); } else { Console.WriteLine("#" + (object)(int)ch); } } if (dfa.m_actions != null) { mark = this.Mark(); } Dfa dfa1; if ((dfa1 = (Dfa)dfa.m_map[(object)this.m_tokens.Filter(ch)]) == null) { if (this.m_debug) { Console.Write("{0} no arc", (object)dfa.m_state); } if (dfa.m_actions != null) { if (this.m_debug) { Console.WriteLine(" terminal"); } return(this.TryActions(dfa, ref tok)); } if (this.m_debug) { Console.WriteLine(" fails"); } return(false); } this.Advance(); if (!this.Match(ref tok, dfa1)) { if (this.m_debug) { Console.WriteLine("back to {0} with {1}", (object)dfa.m_state, (object)ch); } if (dfa.m_actions != null) { if (this.m_debug) { Console.WriteLine("{0} succeeds", (object)dfa.m_state); } this.Restore(mark); return(this.TryActions(dfa, ref tok)); } if (this.m_debug) { Console.WriteLine("{0} fails", (object)dfa.m_state); } return(false); } if (dfa.m_reswds >= 0) { ((ResWds)this.m_tokens.reswds[(object)dfa.m_reswds]).Check(this, ref tok); } if (this.m_debug) { Console.Write("{0} matched ", (object)dfa.m_state); if (this.m_pch <= this.m_buf.Length) { Console.WriteLine(this.m_buf.Substring(pch, this.m_pch - pch)); } else { Console.WriteLine(this.m_buf.Substring(pch)); } } return(true); }
void _Create() { m_outFile.WriteLine("using System;using Tools;"); m_tokens = new YyLexer(erh); string buf = ""; string str = ""; string name=""; string startsym; Nfa nfa; int p,q,max; Console.WriteLine("Reading Input File"); while (!m_inFile.Eof()) { buf = m_inFile.ReadLine(); startsym = "YYINITIAL"; max = buf.Length; p = 0; if (!White(buf,ref p,max)) continue; if (buf[p]=='%') { // directive // %lexer if(buf.Length>=p+6 && "%lexer".Equals(buf.Substring(p,6))) { m_lexerseen = true; p+=6; if (!White(buf,ref p, max)) continue; q = p; NonWhite(buf,ref p, max); if (q!=p) m_outname = buf.Substring(q,p-q); continue; } // %encoding if (buf.Length>=p+9 && "%encoding".Equals(buf.Substring(p,9))) { p+=9; White(buf,ref p, max); q = p; NonWhite(buf,ref p, max); m_tokens.InputEncoding = buf.Substring(q,p-q); continue; } // %namespace if (buf.Length>=p+10 && "%namespace".Equals(buf.Substring(p,10))) { p+=10; White(buf,ref p,max); q = p; NonWhite(buf,ref p,max); m_outFile.WriteLine("namespace "+buf.Substring(q,p-q)+" {"); m_namespace = true; continue; } // %define if(buf.Length>=p+7 && "%define".Equals(buf.Substring(p,7))) { p+=7; White(buf,ref p,max); q = p; if (!NonWhite(buf,ref p,max)) { erh.Error(new CSToolsException(44,"Bad define")); continue; } name=buf.Substring(q,p-q); p++; if (White(buf,ref p,max)) defines[name]=buf.Substring(p,max-p); } else // % token/node if (buf.Length>=p+6 && "%token".Equals(buf.Substring(p,6))) EmitClassDefin(buf,ref p,max,m_inFile,"TOKEN", out str,out name,true); else if (buf.Length>=p+5 && "%node".Equals(buf.Substring(p,5))) EmitClassDefin(buf,ref p,max,m_inFile,"NODE",out str,out name,true); else if (buf.Length>=p+2 && "%{".Equals(buf.Substring(p,2))) CopyCode(); else if (buf.Length>=p+9 && "%declare{".Equals(buf.Substring(p,9))) { p += 8; m_actvars = ToBraceIfFound(ref buf,ref p,ref max,m_inFile); m_actvars = m_actvars.Substring(1,m_actvars.Length-2); } else m_tokens.erh.Error(new CSToolsException(8,"Unknown directive "+buf.Substring(p,max-p))); continue; } else if (buf[p]=='<') { // startstate q = p++; while (p<max && buf[p]!='>') p++; if (p++ ==max) { m_tokens.erh.Error(new CSToolsException(25,"Bad startsymbol")); continue; } startsym = buf.Substring(q+1,p-q-2); White(buf, ref p, max); } q=p; // can't simply look for nonwhite space here because embedded spaces GetRegex(buf,ref p,max); string trgx = buf.Substring(q,p-q); if (m_tokens.toupper) trgx = trgx.ToUpper(); Regex rgx = new Regex(this,q,trgx); Nfa nfa1= new Nfa(this,rgx); if (!m_startstates.Contains(startsym)) m_startstates[startsym] = new Nfa(this); nfa = (Nfa)m_startstates[startsym]; nfa.AddEps(nfa1); White(buf,ref p,max); m_actions[nfa1.m_end.m_state] = nfa1.m_end; // handle multiline actions enclosed in {} nfa1.m_end.m_sTerminal = ToBraceIfFound(ref buf,ref p, ref max,m_inFile); // examine action string if (nfa1.m_end.m_sTerminal.Length>0 && nfa1.m_end.m_sTerminal[0] == '%') { string tokClass,b = nfa1.m_end.m_sTerminal; q = 1; max = b.Length; int n; for (n=0;q<max&&b[q]!=' '&&b[q]!='\t'&&b[q]!='\n'&&b[q]!='{'&&b[q]!=':';q++,n++) // extract the class name ; tokClass = b.Substring(1,n); // new-style auto token construction object ob = m_tokens.tokens[tokClass]; TokClassDef t = (TokClassDef)ob; bool isNew = (t==null); // check for initialisation action following %name string init = b.Substring(n+1,b.Length-n-1); string bas1 = "TOKEN"; bool haveInit = false; for (int j=0;j<init.Length;j++) if (init[j]=='{') { haveInit = true; break; } else if (init[j]==':') { bas1 = ""; for (;init[j]==' '||init[j]=='\r';j++) ; for (;init[j]!=' '&&init[j]!='\t'&&init[j]!='{'&&init[j]!='\n';j++) bas1 += init[j]; break; } if (isNew && tokClass!="TOKEN") { // this token class has not been declared. Do so now bool isNode = (m_tokens.tokens[bas1]!=null); t = new TokClassDef(this,tokClass,bas1); // updates TOKEN.tokens m_outFile.WriteLine("//%{0}+{1}",tokClass,t.m_yynum); m_outFile.Write("public class {0} : {1}",tokClass,bas1); m_outFile.WriteLine("{ public override string yyname { get { return \""+tokClass+"\";}}"); m_outFile.WriteLine("public override int yynum { get { return "+t.m_yynum+"; }}"); m_outFile.WriteLine(" public "+tokClass+"(Lexer yyl):base(yyl) {}}"); } if (haveInit && init.IndexOf("%except")<0) { init = init.Replace("yylval","yymval"); // yuk: avoid the next line munging yylval init = FixActions(init); init = init.Replace("yymval","yylval"); nfa1.m_end.m_sTerminal = "%"+NewConstructor(t,init); } } } if (!m_lexerseen) m_tokens.erh.Error(new CSToolsException(26,"No %lexer directive detected: possibly incorrect text encoding?")); Console.WriteLine("Constructing DFAs"); foreach (string s in m_startstates.Keys) { Dfa d = new Dfa((Nfa)m_startstates[s]); m_tokens.starts[s] = d; if (d.m_actions!=null) Console.WriteLine("Warning: This lexer script generates an infinite token stream on bad input"); } Console.WriteLine("Output phase"); Emit(m_actions,m_actvars,m_namespace,m_showDfa); Console.WriteLine("End of Create"); object o = m_tokens.starts["YYINITIAL"]; if (o == null) Console.WriteLine("Warning: No lexer"); else if (((Dfa)o).m_actions!=null) // repeat the above warning Console.WriteLine("Warning: This lexer script generates an infinite token stream on bad input"); }
// match a Dfa against lexer's input bool Match(ref TOKEN tok, Dfa dfa, int depth = 0) { char ch = PeekChar(); int op = m_pch, mark = 0; Dfa next; if (m_debug) { Console.Write("state {0} with ", dfa.m_state); if (char.IsLetterOrDigit(ch) || char.IsPunctuation(ch)) { Console.WriteLine(ch); } else { Console.WriteLine("#" + (int)ch); } } if (dfa.m_actions != null) { mark = Mark(); } if (// ch==0 || (next = ((Dfa)dfa.m_map[m_tokens.Filter(ch)])) == null) { if (m_debug) { Console.Write("{0} no arc", dfa.m_state); } if (dfa.m_actions != null) { if (m_debug) { Console.WriteLine(" terminal"); } return(TryActions(dfa, ref tok)); // fails on REJECT } if (m_debug) { Console.WriteLine(" fails"); } return(false); } Advance(); if (depth > 16) { } if (!Match(ref tok, next, depth + 1)) { // rest of string fails if (m_debug) { Console.WriteLine("back to {0} with {1}", dfa.m_state, ch); } if (dfa.m_actions != null) { // this is still okay at a terminal if (m_debug) { Console.WriteLine("{0} succeeds", dfa.m_state); } Restore(mark); return(TryActions(dfa, ref tok)); } if (m_debug) { Console.WriteLine("{0} fails", dfa.m_state); } return(false); } if (dfa.m_reswds >= 0) { ((ResWds)m_tokens.reswds[dfa.m_reswds]).Check(this, ref tok); } if (m_debug) { Console.Write("{0} matched ", dfa.m_state); if (m_pch <= m_buf.Length) { Console.WriteLine(m_buf.Substring(op, m_pch - op)); } else { Console.WriteLine(m_buf.Substring(op)); } } return(true); }
// helper for building DFa /// <exclude/> public void AddTarget(char ch, Dfa next) { for (int j=0; j<m_arcs.Count; j++) { Arc a = (Arc)m_arcs[j]; if (a.Match(ch)) next.AddNfaNode(a.m_next); } }
/// <exclude/> internal Dfa Target(char ch) { // construct or lookup the target for a new arc Dfa n = new Dfa(m_tks); for (NList pos = m_nfa; !pos.AtEnd; pos=pos.m_next) pos.m_node.AddTarget(ch,n); // check we actually got something if (n.m_nfa.AtEnd) return null; n.Closure(); // now check we haven't got it already for (int pos1=0;pos1<m_tks.states.Count;pos1++) if (((Dfa)m_tks.states[pos1]).SameAs(n)) return (Dfa)m_tks.states[pos1]; // this is a brand new Dfa node so recursively build it n.AddActions(); return n; }
/// <exclude/> internal bool SameAs(Dfa dfa) { NList pos1 = m_nfa; NList pos2 = dfa.m_nfa; while (pos1.m_node==pos2.m_node && !pos1.AtEnd) { pos1 = pos1.m_next; pos2 = pos2.m_next; } return pos1.m_node==pos2.m_node; }
bool TryActions(Dfa dfa,ref TOKEN tok) { int len = m_pch-m_startMatch; if (len==0) return false; if (m_startMatch+len<=m_buf.Length) yytext = m_buf.Substring(m_startMatch,len); else // can happen with {EOF} rules yytext = m_buf.Substring(m_startMatch); // actions is a list of old-style actions for this DFA in order of priority // there is a list because of the chance that any of them may REJECT Dfa.Action a = dfa.m_actions; bool reject = true; while (reject && a!=null) { int action = a.a_act; reject = false; a = a.a_next; if (a==null && dfa.m_tokClass!="") { // last one might not be an old-style action if (m_debug) Console.WriteLine("creating a "+dfa.m_tokClass); tok=(TOKEN)Tfactory.create(dfa.m_tokClass,this); } else { tok = m_tokens.OldAction(this,ref yytext,action,ref reject); if (m_debug && !reject) Console.WriteLine("Old action "+action); } } return !reject; }
// match a Dfa against lexer's input bool Match(ref TOKEN tok,Dfa dfa) { char ch=PeekChar(); int op=m_pch, mark=0; Dfa next; if (m_debug) { Console.Write("state {0} with ",dfa.m_state); if (char.IsLetterOrDigit(ch)||char.IsPunctuation(ch)) Console.WriteLine(ch); else Console.WriteLine("#"+(int)ch); } if (dfa.m_actions!=null) { mark = Mark(); } if (// ch==0 || (next=((Dfa)dfa.m_map[m_tokens.Filter(ch)]))==null) { if (m_debug) Console.Write("{0} no arc",dfa.m_state); if (dfa.m_actions!=null) { if (m_debug) Console.WriteLine(" terminal"); return TryActions(dfa,ref tok); // fails on REJECT } if (m_debug) Console.WriteLine(" fails"); return false; } Advance(); if (!Match(ref tok, next)) { // rest of string fails if (m_debug) Console.WriteLine("back to {0} with {1}",dfa.m_state,ch); if (dfa.m_actions!=null) { // this is still okay at a terminal if (m_debug) Console.WriteLine("{0} succeeds",dfa.m_state); Restore(mark); return TryActions(dfa,ref tok); } if (m_debug) Console.WriteLine("{0} fails",dfa.m_state); return false; } if (dfa.m_reswds>=0) { ((ResWds)m_tokens.reswds[dfa.m_reswds]).Check(this,ref tok); } if (m_debug) { Console.Write("{0} matched ",dfa.m_state); if (m_pch<=m_buf.Length) Console.WriteLine(m_buf.Substring(op,m_pch-op)); else Console.WriteLine(m_buf.Substring(op)); } return true; }
public void Print() { Console.Write("{0}:", m_state); if (m_actions != null) { Console.Write(" ("); for (Action a = m_actions; a != null; a = a.a_next) { Console.Write("{0} <", a.a_act); } if (m_tokClass != "") { Console.Write(m_tokClass); } Console.Write(">)"); } Console.WriteLine(); Hashtable amap = new Hashtable(); // char->bool IDictionaryEnumerator idx = m_map.GetEnumerator(); for (int count = m_map.Count; count-- > 0;) { idx.MoveNext(); char j = (char)idx.Key; Dfa pD = (Dfa)idx.Value; if (!amap.Contains(j)) { amap[j] = true; Console.Write(" {0} ", pD.m_state); int ij = (int)j; if (ij >= 32 && ij < 128) { Console.Write(j); } else { Console.Write(" #{0} ", ij); } IDictionaryEnumerator idy = m_map.GetEnumerator(); for (;;) { idy.MoveNext(); Dfa pD1 = (Dfa)idy.Value; if (pD1 == pD) { break; } } for (int count1 = count; count1 > 0; count1--) { idy.MoveNext(); j = (char)idy.Key; Dfa pD1 = (Dfa)idy.Value; if (pD == pD1) { amap[j] = true; ij = (int)j; if (ij >= 32 && ij < 128) { Console.Write(j); } else { Console.Write(" #{0} ", ij); } } } Console.WriteLine(); } } }
public void Print() { Console.Write("{0}:", (object)this.m_state); if (this.m_actions != null) { Console.Write(" ("); for (Dfa.Action action = this.m_actions; action != null; action = action.a_next) { Console.Write("{0} <", (object)action.a_act); } if (this.m_tokClass != "") { Console.Write(this.m_tokClass); } Console.Write(">)"); } Console.WriteLine(); Hashtable hashtable = new Hashtable(); IDictionaryEnumerator enumerator1 = this.m_map.GetEnumerator(); int count = this.m_map.Count; while (count-- > 0) { enumerator1.MoveNext(); char key1 = (char)enumerator1.Key; Dfa dfa1 = (Dfa)enumerator1.Value; if (!hashtable.Contains((object)key1)) { hashtable[(object)key1] = (object)true; Console.Write(" {0} ", (object)dfa1.m_state); int num1 = (int)key1; if (num1 >= 32 && num1 < 128) { Console.Write(key1); } else { Console.Write(" #{0} ", (object)num1); } IDictionaryEnumerator enumerator2 = this.m_map.GetEnumerator(); do { enumerator2.MoveNext(); }while ((Dfa)enumerator2.Value != dfa1); for (int index = count; index > 0; --index) { enumerator2.MoveNext(); char key2 = (char)enumerator2.Key; Dfa dfa2 = (Dfa)enumerator2.Value; if (dfa1 == dfa2) { hashtable[(object)key2] = (object)true; int num2 = (int)key2; if (num2 >= 32 && num2 < 128) { Console.Write(key2); } else { Console.Write(" #{0} ", (object)num2); } } } Console.WriteLine(); } } }