/// <summary> /// Split /// <code><ID> = <e:expr> ;</code> /// into 4 chunks for tokenizing by /// <see cref="Tokenize(string)"/> /// . /// </summary> internal virtual IList<Chunk> Split(string pattern) { int p = 0; int n = pattern.Length; IList<Chunk> chunks = new List<Chunk>(); StringBuilder buf = new StringBuilder(); // find all start and stop indexes first, then collect IList<int> starts = new List<int>(); IList<int> stops = new List<int>(); while (p < n) { if (p == pattern.IndexOf(escape + start, p)) { p += escape.Length + start.Length; } else { if (p == pattern.IndexOf(escape + stop, p)) { p += escape.Length + stop.Length; } else { if (p == pattern.IndexOf(start, p)) { starts.Add(p); p += start.Length; } else { if (p == pattern.IndexOf(stop, p)) { stops.Add(p); p += stop.Length; } else { p++; } } } } } // System.out.println(""); // System.out.println(starts); // System.out.println(stops); if (starts.Count > stops.Count) { throw new ArgumentException("unterminated tag in pattern: " + pattern); } if (starts.Count < stops.Count) { throw new ArgumentException("missing start tag in pattern: " + pattern); } int ntags = starts.Count; for (int i = 0; i < ntags; i++) { if (starts[i] >= stops[i]) { throw new ArgumentException("tag delimiters out of order in pattern: " + pattern); } } // collect into chunks now if (ntags == 0) { string text = Sharpen.Runtime.Substring(pattern, 0, n); chunks.Add(new TextChunk(text)); } if (ntags > 0 && starts[0] > 0) { // copy text up to first tag into chunks string text = Sharpen.Runtime.Substring(pattern, 0, starts[0]); chunks.Add(new TextChunk(text)); } for (int i_1 = 0; i_1 < ntags; i_1++) { // copy inside of <tag> string tag = Sharpen.Runtime.Substring(pattern, starts[i_1] + start.Length, stops[i_1]); string ruleOrToken = tag; string label = null; int colon = tag.IndexOf(':'); if (colon >= 0) { label = Sharpen.Runtime.Substring(tag, 0, colon); ruleOrToken = Sharpen.Runtime.Substring(tag, colon + 1, tag.Length); } chunks.Add(new TagChunk(label, ruleOrToken)); if (i_1 + 1 < ntags) { // copy from end of <tag> to start of next string text = Sharpen.Runtime.Substring(pattern, stops[i_1] + stop.Length, starts[i_1 + 1]); chunks.Add(new TextChunk(text)); } } if (ntags > 0) { int afterLastTag = stops[ntags - 1] + stop.Length; if (afterLastTag < n) { // copy text from end of last tag to end string text = Sharpen.Runtime.Substring(pattern, afterLastTag, n); chunks.Add(new TextChunk(text)); } } // strip out the escape sequences from text chunks but not tags for (int i_2 = 0; i_2 < chunks.Count; i_2++) { Chunk c = chunks[i_2]; if (c is TextChunk) { TextChunk tc = (TextChunk)c; string unescaped = tc.Text.Replace(escape, string.Empty); if (unescaped.Length < tc.Text.Length) { chunks.Set(i_2, new TextChunk(unescaped)); } } } return chunks; }
/// <summary>Parses the _revisions dict from a document into an array of revision ID strings.</summary> internal static IList<string> ParseCouchDBRevisionHistory(IDictionary<String, Object> docProperties) { var revisions = docProperties.Get ("_revisions").AsDictionary<string,object> (); if (revisions == null) { return new List<string>(); } var ids = revisions ["ids"].AsList<string> (); if (ids == null || ids.Count == 0) { return new List<string>(); } var revIDs = new List<string>(ids); var start = Convert.ToInt64(revisions.Get("start")); for (var i = 0; i < revIDs.Count; i++) { var revID = revIDs[i]; revIDs.Set(i, Sharpen.Extensions.ToString(start--) + "-" + revID); } return revIDs; }
/// <summary> /// Serialize state descriptors, edge descriptors, and decision→state map /// into list of ints: /// grammar-type, (ANTLRParser.LEXER, ...) /// max token type, /// num states, /// state-0-type ruleIndex, state-1-type ruleIndex, ... /// </summary> /// <remarks> /// Serialize state descriptors, edge descriptors, and decision→state map /// into list of ints: /// grammar-type, (ANTLRParser.LEXER, ...) /// max token type, /// num states, /// state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type ruleIndex optional-arg ... /// num rules, /// rule-1-start-state rule-1-args, rule-2-start-state rule-2-args, ... /// (args are token type,actionIndex in lexer else 0,0) /// num modes, /// mode-0-start-state, mode-1-start-state, ... (parser has 0 modes) /// num sets /// set-0-interval-count intervals, set-1-interval-count intervals, ... /// num total edges, /// src, trg, edge-type, edge arg1, optional edge arg2 (present always), ... /// num decisions, /// decision-0-start-state, decision-1-start-state, ... /// Convenient to pack into unsigned shorts to make as Java string. /// </remarks> public virtual List<int> Serialize() { List<int> data = new List<int>(); data.Add(ATNDeserializer.SerializedVersion); SerializeUUID(data, ATNDeserializer.SerializedUuid); // convert grammar type to ATN const to avoid dependence on ANTLRParser data.Add((int)(atn.grammarType)); data.Add(atn.maxTokenType); int nedges = 0; IDictionary<IntervalSet, int> setIndices = new Dictionary<IntervalSet, int>(); IList<IntervalSet> sets = new List<IntervalSet>(); // dump states, count edges and collect sets while doing so List<int> nonGreedyStates = new List<int>(); List<int> sllStates = new List<int>(); List<int> precedenceStates = new List<int>(); data.Add(atn.states.Count); foreach (ATNState s in atn.states) { if (s == null) { // might be optimized away data.Add((int)(StateType.InvalidType)); continue; } StateType stateType = s.StateType; if (s is DecisionState) { DecisionState decisionState = (DecisionState)s; if (decisionState.nonGreedy) { nonGreedyStates.Add(s.stateNumber); } if (decisionState.sll) { sllStates.Add(s.stateNumber); } } if (s is RuleStartState && ((RuleStartState)s).isPrecedenceRule) { precedenceStates.Add(s.stateNumber); } data.Add((int)(stateType)); if (s.ruleIndex == -1) { data.Add(char.MaxValue); } else { data.Add(s.ruleIndex); } if (s.StateType == StateType.LoopEnd) { data.Add(((LoopEndState)s).loopBackState.stateNumber); } else { if (s is BlockStartState) { data.Add(((BlockStartState)s).endState.stateNumber); } } if (s.StateType != StateType.RuleStop) { // the deserializer can trivially derive these edges, so there's no need to serialize them nedges += s.NumberOfTransitions; } for (int i = 0; i < s.NumberOfTransitions; i++) { Transition t = s.Transition(i); TransitionType edgeType = Transition.serializationTypes.Get(t.GetType()); if (edgeType == TransitionType.Set || edgeType == TransitionType.NotSet) { SetTransition st = (SetTransition)t; if (!setIndices.ContainsKey(st.set)) { sets.Add(st.set); setIndices.Put(st.set, sets.Count - 1); } } } } // non-greedy states data.Add(nonGreedyStates.Size()); for (int i_1 = 0; i_1 < nonGreedyStates.Size(); i_1++) { data.Add(nonGreedyStates.Get(i_1)); } // SLL decisions data.Add(sllStates.Size()); for (int i_2 = 0; i_2 < sllStates.Size(); i_2++) { data.Add(sllStates.Get(i_2)); } // precedence states data.Add(precedenceStates.Size()); for (int i_3 = 0; i_3 < precedenceStates.Size(); i_3++) { data.Add(precedenceStates.Get(i_3)); } int nrules = atn.ruleToStartState.Length; data.Add(nrules); for (int r = 0; r < nrules; r++) { ATNState ruleStartState = atn.ruleToStartState[r]; data.Add(ruleStartState.stateNumber); bool leftFactored = ruleNames[ruleStartState.ruleIndex].IndexOf(ATNSimulator.RuleVariantDelimiter) >= 0; data.Add(leftFactored ? 1 : 0); if (atn.grammarType == ATNType.Lexer) { if (atn.ruleToTokenType[r] == TokenConstants.Eof) { data.Add(char.MaxValue); } else { data.Add(atn.ruleToTokenType[r]); } } } int nmodes = atn.modeToStartState.Count; data.Add(nmodes); if (nmodes > 0) { foreach (ATNState modeStartState in atn.modeToStartState) { data.Add(modeStartState.stateNumber); } } int nsets = sets.Count; data.Add(nsets); foreach (IntervalSet set in sets) { bool containsEof = set.Contains(TokenConstants.Eof); if (containsEof && set.GetIntervals()[0].b == TokenConstants.Eof) { data.Add(set.GetIntervals().Count - 1); } else { data.Add(set.GetIntervals().Count); } data.Add(containsEof ? 1 : 0); foreach (Interval I in set.GetIntervals()) { if (I.a == TokenConstants.Eof) { if (I.b == TokenConstants.Eof) { continue; } else { data.Add(0); } } else { data.Add(I.a); } data.Add(I.b); } } data.Add(nedges); foreach (ATNState s_1 in atn.states) { if (s_1 == null) { // might be optimized away continue; } if (s_1.StateType == StateType.RuleStop) { continue; } for (int i = 0; i_3 < s_1.NumberOfTransitions; i_3++) { Transition t = s_1.Transition(i_3); if (atn.states[t.target.stateNumber] == null) { throw new InvalidOperationException("Cannot serialize a transition to a removed state."); } int src = s_1.stateNumber; int trg = t.target.stateNumber; TransitionType edgeType = Transition.serializationTypes.Get(t.GetType()); int arg1 = 0; int arg2 = 0; int arg3 = 0; switch (edgeType) { case TransitionType.Rule: { trg = ((RuleTransition)t).followState.stateNumber; arg1 = ((RuleTransition)t).target.stateNumber; arg2 = ((RuleTransition)t).ruleIndex; arg3 = ((RuleTransition)t).precedence; break; } case TransitionType.Precedence: { PrecedencePredicateTransition ppt = (PrecedencePredicateTransition)t; arg1 = ppt.precedence; break; } case TransitionType.Predicate: { PredicateTransition pt = (PredicateTransition)t; arg1 = pt.ruleIndex; arg2 = pt.predIndex; arg3 = pt.isCtxDependent ? 1 : 0; break; } case TransitionType.Range: { arg1 = ((RangeTransition)t).from; arg2 = ((RangeTransition)t).to; if (arg1 == TokenConstants.Eof) { arg1 = 0; arg3 = 1; } break; } case TransitionType.Atom: { arg1 = ((AtomTransition)t).label; if (arg1 == TokenConstants.Eof) { arg1 = 0; arg3 = 1; } break; } case TransitionType.Action: { ActionTransition at = (ActionTransition)t; arg1 = at.ruleIndex; arg2 = at.actionIndex; if (arg2 == -1) { arg2 = unchecked((int)(0xFFFF)); } arg3 = at.isCtxDependent ? 1 : 0; break; } case TransitionType.Set: { arg1 = setIndices.Get(((SetTransition)t).set); break; } case TransitionType.NotSet: { arg1 = setIndices.Get(((SetTransition)t).set); break; } case TransitionType.Wildcard: { break; } } data.Add(src); data.Add(trg); data.Add((int)(edgeType)); data.Add(arg1); data.Add(arg2); data.Add(arg3); } } int ndecisions = atn.decisionToState.Count; data.Add(ndecisions); foreach (DecisionState decStartState in atn.decisionToState) { data.Add(decStartState.stateNumber); } // // LEXER ACTIONS // if (atn.grammarType == ATNType.Lexer) { data.Add(atn.lexerActions.Length); foreach (ILexerAction action in atn.lexerActions) { data.Add((int)(action.ActionType)); switch (action.ActionType) { case LexerActionType.Channel: { int channel = ((LexerChannelAction)action).Channel; data.Add(channel != -1 ? channel : unchecked((int)(0xFFFF))); data.Add(0); break; } case LexerActionType.Custom: { int ruleIndex = ((LexerCustomAction)action).RuleIndex; int actionIndex = ((LexerCustomAction)action).ActionIndex; data.Add(ruleIndex != -1 ? ruleIndex : unchecked((int)(0xFFFF))); data.Add(actionIndex != -1 ? actionIndex : unchecked((int)(0xFFFF))); break; } case LexerActionType.Mode: { int mode = ((LexerModeAction)action).Mode; data.Add(mode != -1 ? mode : unchecked((int)(0xFFFF))); data.Add(0); break; } case LexerActionType.More: { data.Add(0); data.Add(0); break; } case LexerActionType.PopMode: { data.Add(0); data.Add(0); break; } case LexerActionType.PushMode: { mode = ((LexerPushModeAction)action).Mode; data.Add(mode != -1 ? mode : unchecked((int)(0xFFFF))); data.Add(0); break; } case LexerActionType.Skip: { data.Add(0); data.Add(0); break; } case LexerActionType.Type: { int type = ((LexerTypeAction)action).Type; data.Add(type != -1 ? type : unchecked((int)(0xFFFF))); data.Add(0); break; } default: { string message = string.Format(CultureInfo.CurrentCulture, "The specified lexer action type {0} is not valid.", action.ActionType); throw new ArgumentException(message); } } } } // don't adjust the first value since that's the version number for (int i_4 = 1; i_4 < data.Size(); i_4++) { if (data.Get(i_4) < char.MinValue || data.Get(i_4) > char.MaxValue) { throw new NotSupportedException("Serialized ATN data element out of range."); } int value = (data.Get(i_4) + 2) & unchecked((int)(0xFFFF)); data.Set(i_4, value); } return data; }