public void VerifySimpleTrieNotEnoughBufferLeft() { byte[] hello = Encoding.UTF8.GetBytes("hello"); byte[] helloBang = Encoding.UTF8.GetBytes("hello!"); SimpleTrie t = new SimpleTrie(); t.AddToken(hello); t.AddToken(helloBang); byte[] source1 = Encoding.UTF8.GetBytes("hi"); byte[] source2 = Encoding.UTF8.GetBytes(" hello"); int token; int pos = 0; Assert.IsFalse(t.GetOperation(source1, source1.Length, ref pos, out token)); Assert.AreEqual(-1, token); pos = 1; Assert.IsTrue(t.GetOperation(source2, source2.Length, ref pos, out token)); Assert.AreEqual(0, token); pos = 2; Assert.IsFalse(t.GetOperation(source2, source2.Length, ref pos, out token)); Assert.AreEqual(-1, token); }
public void VerifySimpleTrieCombine() { byte[] hello = Encoding.UTF8.GetBytes("hello"); byte[] helloBang = Encoding.UTF8.GetBytes("hello!"); byte[] hi = Encoding.UTF8.GetBytes("hi"); byte[] there = Encoding.UTF8.GetBytes("there!"); SimpleTrie t = new SimpleTrie(); t.AddToken(hello); t.AddToken(helloBang); SimpleTrie t2 = new SimpleTrie(); t.AddToken(hi); t.AddToken(there); SimpleTrie combined = new SimpleTrie(); combined.Append(t); combined.Append(t2); byte[] source1 = Encoding.UTF8.GetBytes("hello there"); byte[] source2 = Encoding.UTF8.GetBytes("hello! there"); byte[] source3 = Encoding.UTF8.GetBytes("hi there"); byte[] source4 = Encoding.UTF8.GetBytes("there!"); int token; int pos = 0; Assert.IsTrue(t.GetOperation(source1, source1.Length, ref pos, out token)); Assert.AreEqual(0, token); pos = 0; Assert.IsTrue(t.GetOperation(source2, source2.Length, ref pos, out token)); Assert.AreEqual(1, token); pos = 0; Assert.IsTrue(t.GetOperation(source3, source3.Length, ref pos, out token)); Assert.AreEqual(2, token); pos = 0; Assert.IsTrue(t.GetOperation(source4, source4.Length, ref pos, out token)); Assert.AreEqual(3, token); }
public void VerifySimpleTrieAtBegin() { byte[] hello = Encoding.UTF8.GetBytes("hello"); byte[] helloBang = Encoding.UTF8.GetBytes("hello!"); byte[] hi = Encoding.UTF8.GetBytes("hi"); SimpleTrie t = new SimpleTrie(); t.AddToken(hello); t.AddToken(helloBang); t.AddToken(hi); byte[] source1 = Encoding.UTF8.GetBytes("hello there"); byte[] source2 = Encoding.UTF8.GetBytes("hello1 there"); byte[] source3 = Encoding.UTF8.GetBytes("hello! there"); byte[] source4 = Encoding.UTF8.GetBytes("hi there"); byte[] source5 = Encoding.UTF8.GetBytes("hi"); byte[] source6 = Encoding.UTF8.GetBytes("he"); int token; int pos = 0; Assert.IsTrue(t.GetOperation(source1, source1.Length, ref pos, out token)); Assert.AreEqual(0, token); pos = 0; Assert.IsTrue(t.GetOperation(source2, source2.Length, ref pos, out token)); Assert.AreEqual(0, token); pos = 0; Assert.IsTrue(t.GetOperation(source3, source3.Length, ref pos, out token)); Assert.AreEqual(1, token); pos = 0; Assert.IsTrue(t.GetOperation(source4, source4.Length, ref pos, out token)); Assert.AreEqual(2, token); pos = 0; Assert.IsTrue(t.GetOperation(source5, source5.Length, ref pos, out token)); Assert.AreEqual(2, token); pos = 0; Assert.IsFalse(t.GetOperation(source6, source6.Length, ref pos, out token)); Assert.AreEqual(-1, token); }
public GlobbingPatternMatcher(string pattern) { Pattern = pattern; List <Tuple <int, GlobbingPatternToken> > tokens = new List <Tuple <int, GlobbingPatternToken> >(); byte[] patternBytes = Encoding.UTF8.GetBytes(pattern); int currentBufferPosition = 0; while (currentBufferPosition != patternBytes.Length) { int token; int originalBufferPosition = currentBufferPosition; tokens.Add(!Trie.GetOperation(patternBytes, patternBytes.Length, ref currentBufferPosition, out token) ? Tuple.Create(currentBufferPosition++, GlobbingPatternToken.Literal) : Tuple.Create(originalBufferPosition, (GlobbingPatternToken)token)); } StringBuilder rx = new StringBuilder("^"); int literalBegin = 0; GlobbingPatternToken lastToken = GlobbingPatternToken.AnyNumberOfPathParts; for (int i = 0; i < tokens.Count; ++i) { if (lastToken == GlobbingPatternToken.Literal && tokens[i].Item2 != GlobbingPatternToken.Literal) { rx.Append(Regex.Escape(Encoding.UTF8.GetString(patternBytes, literalBegin, tokens[i].Item1 - literalBegin))); } switch (tokens[i].Item2) { case GlobbingPatternToken.Literal: if (lastToken != GlobbingPatternToken.Literal) { literalBegin = tokens[i].Item1; } break; case GlobbingPatternToken.AnyNumberOfPathParts: rx.Append(@"(?:[^\\/]*[\\/])*"); break; case GlobbingPatternToken.CloseCharSet: rx.Append("]"); break; case GlobbingPatternToken.OpenCharSet: rx.Append("["); break; case GlobbingPatternToken.OnePathPart: rx.Append(@"[^\\/]*"); break; case GlobbingPatternToken.Wildcard: rx.Append(@"[^\\/]?"); break; case GlobbingPatternToken.SeparatorChar: case GlobbingPatternToken.SeparatorChar2: rx.Append(@"[\\/]"); break; } lastToken = tokens[i].Item2; } if (lastToken == GlobbingPatternToken.Literal) { rx.Append(Regex.Escape(Encoding.UTF8.GetString(patternBytes, literalBegin, patternBytes.Length - literalBegin))); } _regex = new Regex(rx.ToString(), RegexOptions.Compiled); }
public static bool CppStyleEvaluator(IProcessorState processor, ref int bufferLength, ref int currentBufferPosition) { SimpleTrie trie = new SimpleTrie(); //Logic trie.AddToken(processor.Encoding.GetBytes("&&"), 0); trie.AddToken(processor.Encoding.GetBytes("||"), 1); trie.AddToken(processor.Encoding.GetBytes("^"), 2); trie.AddToken(processor.Encoding.GetBytes("!"), 3); trie.AddToken(processor.Encoding.GetBytes(">"), 4); trie.AddToken(processor.Encoding.GetBytes(">="), 5); trie.AddToken(processor.Encoding.GetBytes("<"), 6); trie.AddToken(processor.Encoding.GetBytes("<="), 7); trie.AddToken(processor.Encoding.GetBytes("=="), 8); trie.AddToken(processor.Encoding.GetBytes("="), 9); trie.AddToken(processor.Encoding.GetBytes("!="), 10); //Bitwise trie.AddToken(processor.Encoding.GetBytes("&"), 11); trie.AddToken(processor.Encoding.GetBytes("|"), 12); trie.AddToken(processor.Encoding.GetBytes("<<"), 13); trie.AddToken(processor.Encoding.GetBytes(">>"), 14); //Braces trie.AddToken(processor.Encoding.GetBytes("("), 15); trie.AddToken(processor.Encoding.GetBytes(")"), 16); //Whitespace trie.AddToken(processor.Encoding.GetBytes(" "), 17); trie.AddToken(processor.Encoding.GetBytes("\t"), 18); //EOLs trie.AddToken(processor.Encoding.GetBytes("\r\n"), 19); trie.AddToken(processor.Encoding.GetBytes("\n"), 20); trie.AddToken(processor.Encoding.GetBytes("\r"), 21); //Tokens trie.Append(processor.EncodingConfig.Variables); //Run forward to EOL and collect args TokenFamily currentTokenFamily; List <byte> currentTokenBytes = new List <byte>(); List <TokenRef> tokens = new List <TokenRef>(); int token; if (!trie.GetOperation(processor.CurrentBuffer, bufferLength, ref currentBufferPosition, out token)) { currentTokenFamily = TokenFamily.Literal; currentTokenBytes.Add(processor.CurrentBuffer[currentBufferPosition++]); } else if (token > ReservedTokenMaxIndex) { currentTokenFamily = TokenFamily.Reference | (TokenFamily)token; tokens.Add(new TokenRef { Family = currentTokenFamily }); } else { currentTokenFamily = (TokenFamily)token; if (currentTokenFamily != TokenFamily.WindowsEOL && currentTokenFamily != TokenFamily.LegacyMacEOL && currentTokenFamily != TokenFamily.UnixEOL) { tokens.Add(new TokenRef { Family = currentTokenFamily }); } else { return(EvaluateCondition(tokens, processor.EncodingConfig.VariableValues)); } } int braceDepth = 0; if (tokens[0].Family == TokenFamily.OpenBrace) { ++braceDepth; } bool first = true; while ((first || braceDepth > 0) && bufferLength > 0) { int targetLen = Math.Min(bufferLength, trie.MaxLength); for (; currentBufferPosition < bufferLength - targetLen + 1;) { int oldBufferPos = currentBufferPosition; if (trie.GetOperation(processor.CurrentBuffer, bufferLength, ref currentBufferPosition, out token)) { if (braceDepth == 0) { switch (tokens[tokens.Count - 1].Family) { case TokenFamily.Whitespace: case TokenFamily.Tab: case TokenFamily.CloseBrace: case TokenFamily.WindowsEOL: case TokenFamily.UnixEOL: case TokenFamily.LegacyMacEOL: TokenFamily thisFamily = (TokenFamily)token; if (thisFamily == TokenFamily.WindowsEOL || thisFamily == TokenFamily.UnixEOL || thisFamily == TokenFamily.LegacyMacEOL) { currentBufferPosition = oldBufferPos; } break; default: currentBufferPosition = oldBufferPos; first = false; break; } if (!first) { break; } } //We matched an item, so whatever this is, it's not a literal, end the current literal if that's // what we currently have if (currentTokenFamily == TokenFamily.Literal) { string literal = processor.Encoding.GetString(currentTokenBytes.ToArray()); tokens.Add(new TokenRef { Family = TokenFamily.Literal, Literal = literal }); currentTokenBytes.Clear(); } //If we have a token from the args... if (token > ReservedTokenMaxIndex) { currentTokenFamily = TokenFamily.Reference | (TokenFamily)token; tokens.Add(new TokenRef { Family = currentTokenFamily }); } //If we have a normal token... else { currentTokenFamily = (TokenFamily)token; if (currentTokenFamily != TokenFamily.WindowsEOL && currentTokenFamily != TokenFamily.LegacyMacEOL && currentTokenFamily != TokenFamily.UnixEOL) { if (currentTokenFamily == TokenFamily.OpenBrace) { ++braceDepth; } else if (currentTokenFamily == TokenFamily.CloseBrace) { --braceDepth; } tokens.Add(new TokenRef { Family = currentTokenFamily }); } else { return(EvaluateCondition(tokens, processor.EncodingConfig.VariableValues)); } } } else if (braceDepth > 0) { currentTokenFamily = TokenFamily.Literal; currentTokenBytes.Add(processor.CurrentBuffer[currentBufferPosition++]); } else { first = false; break; } } processor.AdvanceBuffer(currentBufferPosition); currentBufferPosition = processor.CurrentBufferPosition; bufferLength = processor.CurrentBufferLength; } return(EvaluateCondition(tokens, processor.EncodingConfig.VariableValues)); }