// moves to the next token // returns false if the end of the buffer was reached without finding a token. private bool SeekToToken(IProcessorState processor, ref int bufferLength, ref int currentBufferPosition, out int token) { bool bufferAdvanceFailed = false; while (bufferLength >= _trie.MinLength) { for (; currentBufferPosition < bufferLength - _trie.MinLength + 1; ++currentBufferPosition) { if (_trie.GetOperation(processor.CurrentBuffer, bufferLength, ref currentBufferPosition, out token)) { if (bufferAdvanceFailed || (currentBufferPosition != bufferLength)) { return(true); } } } if (bufferAdvanceFailed) { break; } bufferAdvanceFailed = !processor.AdvanceBuffer(bufferLength - _trie.MaxLength + 1); currentBufferPosition = processor.CurrentBufferPosition; bufferLength = processor.CurrentBufferLength; } //If we run out of places to look, assert that the end of the buffer is the end token = EndTokenBaseIndex; currentBufferPosition = bufferLength; return(false); // no terminator found }
private void ScanToCloseCondition(IProcessorState processorState, List <byte> conditionBytes, ref int bufferLength, ref int currentBufferPosition) { int previousPosition = currentBufferPosition; while (bufferLength >= _closeConditionTrie.MinLength) { //Try to get at least the max length of the tree into the buffer if (bufferLength - currentBufferPosition < _closeConditionTrie.MaxLength) { conditionBytes.AddRange(processorState.CurrentBuffer.Skip(previousPosition).Take(currentBufferPosition - previousPosition)); processorState.AdvanceBuffer(currentBufferPosition); currentBufferPosition = processorState.CurrentBufferPosition; bufferLength = processorState.CurrentBufferLength; previousPosition = 0; } int sz = bufferLength == processorState.CurrentBuffer.Length ? _closeConditionTrie.MaxLength : _closeConditionTrie.MinLength; for (; currentBufferPosition < bufferLength - sz + 1; ++currentBufferPosition) { if (bufferLength == 0) { currentBufferPosition = 0; return; } int token; if (_closeConditionTrie.GetOperation(processorState.CurrentBuffer, bufferLength, ref currentBufferPosition, out token)) { conditionBytes.AddRange(processorState.CurrentBuffer.Skip(previousPosition).Take(currentBufferPosition - previousPosition - _closeConditionTrie.Tokens[token].Length)); return; } } } //Ran out of places to check and haven't reached the actual match, consume all the way to the end currentBufferPosition = bufferLength; }
private bool SeekToTerminator(IProcessorState processor, ref int bufferLength, ref int currentBufferPosition, out int token) { while (bufferLength >= _trie.MinLength) { for (; currentBufferPosition < bufferLength - _trie.MinLength + 1; ++currentBufferPosition) { if (_trie.GetOperation(processor.CurrentBuffer, bufferLength, ref currentBufferPosition, out token)) { return(true); } } processor.AdvanceBuffer(bufferLength - _trie.MaxLength + 1); currentBufferPosition = processor.CurrentBufferPosition; bufferLength = processor.CurrentBufferLength; } //If we run out of places to look, assert that the end of the buffer is the end token = 1; currentBufferPosition = bufferLength; return(true); }
// moves to the next token // returns false if the end of the buffer was reached without finding a token. private bool SeekToToken(IProcessorState processor, ref int bufferLength, ref int currentBufferPosition, out int token) { bool bufferAdvanceFailed = false; ITokenTrieEvaluator evaluator = _trie.CreateEvaluator(); while (true) { for (; currentBufferPosition < bufferLength; ++currentBufferPosition) { if (evaluator.Accept(processor.CurrentBuffer[currentBufferPosition], ref currentBufferPosition, out token)) { if (bufferAdvanceFailed || (currentBufferPosition != bufferLength)) { return(true); } } } if (bufferAdvanceFailed) { if (evaluator.TryFinalizeMatchesInProgress(ref currentBufferPosition, out token)) { return(true); } break; } bufferAdvanceFailed = !processor.AdvanceBuffer(bufferLength - evaluator.BytesToKeepInBuffer); currentBufferPosition = evaluator.BytesToKeepInBuffer; bufferLength = processor.CurrentBufferLength; } //If we run out of places to look, assert that the end of the buffer is the end token = EndTokenBaseIndex; currentBufferPosition = bufferLength; return(false); // no terminator found }
public int HandleMatch(IProcessorState processor, int bufferLength, ref int currentBufferPosition, int token, Stream target) { bool flag; if (processor.Config.Flags.TryGetValue(Region.OperationName, out flag) && !flag) { target.Write(Tokens[token].Value, Tokens[token].Start, Tokens[token].Length); return(Tokens[token].Length); } processor.WhitespaceHandler(ref bufferLength, ref currentBufferPosition, wholeLine: _definition._wholeLine, trim: _definition._trimWhitespace); if (_startAndEndAreSame) { token = _waitingForEnd ? 1 : 0; } //If we're resuming from a region that has been included (we've found the end now) // just process the end if (_waitingForEnd && token == 1) { _waitingForEnd = false; return(0); } if (token != 0) { return(0); } //If we're including the region, set that we're waiting for the end and return // control to the processor if (_includeRegion) { _waitingForEnd = true; return(0); } //If we've made it here, we're skipping stuff, skip all the way to the end of the // end token int i = currentBufferPosition; int j = 0; for (; j < _endToken.Length; ++j) { if (i + j == bufferLength) { processor.AdvanceBuffer(i + j); bufferLength = processor.CurrentBufferLength; i = -j; } //TODO: This should be using one of the tries rather than looking for the byte run directly if (processor.CurrentBuffer[i + j] != _endToken.Value[j]) { ++i; j = -1; } } i += j; processor.WhitespaceHandler(ref bufferLength, ref i, wholeLine: _definition._wholeLine, trim: _definition._trimWhitespace); currentBufferPosition = i; return(0); }
public static bool Evaluate(IProcessorState processor, ref int bufferLength, ref int currentBufferPosition, out bool faulted) { faulted = false; TokenTrie trie = new TokenTrie(); //Logic trie.AddToken(processor.Encoding.GetBytes("&&"), 0); trie.AddToken(processor.Encoding.GetBytes("||"), 1); trie.AddToken(processor.Encoding.GetBytes("^"), 2); trie.AddToken(processor.Encoding.GetBytes("!"), 3); trie.AddToken(processor.Encoding.GetBytes(">"), 4); trie.AddToken(processor.Encoding.GetBytes(">="), 5); trie.AddToken(processor.Encoding.GetBytes("<"), 6); trie.AddToken(processor.Encoding.GetBytes("<="), 7); trie.AddToken(processor.Encoding.GetBytes("=="), 8); trie.AddToken(processor.Encoding.GetBytes("="), 9); trie.AddToken(processor.Encoding.GetBytes("!="), 10); //Bitwise trie.AddToken(processor.Encoding.GetBytes("&"), 11); trie.AddToken(processor.Encoding.GetBytes("|"), 12); trie.AddToken(processor.Encoding.GetBytes("<<"), 13); trie.AddToken(processor.Encoding.GetBytes(">>"), 14); //Braces trie.AddToken(processor.Encoding.GetBytes("("), 15); trie.AddToken(processor.Encoding.GetBytes(")"), 16); //Whitespace trie.AddToken(processor.Encoding.GetBytes(" "), 17); trie.AddToken(processor.Encoding.GetBytes("\t"), 18); //EOLs trie.AddToken(processor.Encoding.GetBytes("\r\n"), 19); trie.AddToken(processor.Encoding.GetBytes("\n"), 20); trie.AddToken(processor.Encoding.GetBytes("\r"), 21); // quotes trie.AddToken(processor.Encoding.GetBytes("\""), 22); trie.AddToken(processor.Encoding.GetBytes("'"), 23); //Tokens trie.Append(processor.EncodingConfig.Variables); //Run forward to EOL and collect args TokenFamily currentTokenFamily; List <byte> currentTokenBytes = new List <byte>(); List <TokenRef> tokens = new List <TokenRef>(); if (!trie.GetOperation(processor.CurrentBuffer, bufferLength, ref currentBufferPosition, out int token)) { currentTokenFamily = TokenFamily.Literal; currentTokenBytes.Add(processor.CurrentBuffer[currentBufferPosition++]); } else if (token > ReservedTokenMaxIndex) { currentTokenFamily = TokenFamily.Reference | (TokenFamily)token; tokens.Add(new TokenRef { Family = currentTokenFamily }); } else { currentTokenFamily = (TokenFamily)token; if (currentTokenFamily != TokenFamily.WindowsEOL && currentTokenFamily != TokenFamily.LegacyMacEOL && currentTokenFamily != TokenFamily.UnixEOL) { tokens.Add(new TokenRef { Family = currentTokenFamily }); } else { return(EvaluateCondition(tokens, processor.EncodingConfig.VariableValues)); } } int braceDepth = 0; if (tokens[0].Family == TokenFamily.OpenBrace) { ++braceDepth; } bool first = true; QuotedRegionKind inQuoteType = QuotedRegionKind.None; while ((first || braceDepth > 0) && bufferLength > 0) { int targetLen = Math.Min(bufferLength, trie.MaxLength); for (; currentBufferPosition < bufferLength - targetLen + 1;) { int oldBufferPos = currentBufferPosition; if (trie.GetOperation(processor.CurrentBuffer, bufferLength, ref currentBufferPosition, out token)) { if (braceDepth == 0) { switch (tokens[tokens.Count - 1].Family) { case TokenFamily.Whitespace: case TokenFamily.Tab: case TokenFamily.CloseBrace: case TokenFamily.WindowsEOL: case TokenFamily.UnixEOL: case TokenFamily.LegacyMacEOL: TokenFamily thisFamily = (TokenFamily)token; if (thisFamily == TokenFamily.WindowsEOL || thisFamily == TokenFamily.UnixEOL || thisFamily == TokenFamily.LegacyMacEOL) { currentBufferPosition = oldBufferPos; } break; default: currentBufferPosition = oldBufferPos; first = false; break; } if (!first) { break; } } // We matched an item, so whatever this is, it's not a literal. // if the current token is a literal, end it. if (currentTokenFamily == TokenFamily.Literal) { string literal = processor.Encoding.GetString(currentTokenBytes.ToArray()); tokens.Add(new TokenRef { Family = TokenFamily.Literal, Literal = literal }); currentTokenBytes.Clear(); } TokenFamily foundTokenFamily = (TokenFamily)token; if (foundTokenFamily == TokenFamily.QuotedLiteral || foundTokenFamily == TokenFamily.SingleQuotedLiteral) { QuotedRegionKind incomingQuoteKind; switch (foundTokenFamily) { case TokenFamily.QuotedLiteral: incomingQuoteKind = QuotedRegionKind.DoubleQuoteRegion; break; case TokenFamily.SingleQuotedLiteral: incomingQuoteKind = QuotedRegionKind.SingleQuoteRegion; break; default: incomingQuoteKind = QuotedRegionKind.None; break; } if (inQuoteType == QuotedRegionKind.None) { // starting quote found currentTokenBytes.AddRange(trie.Tokens[token].Value); inQuoteType = incomingQuoteKind; } else if (incomingQuoteKind == inQuoteType) { // end quote found currentTokenBytes.AddRange(trie.Tokens[token].Value); tokens.Add(new TokenRef { Family = TokenFamily.Literal, Literal = processor.Encoding.GetString(currentTokenBytes.ToArray()) }); currentTokenBytes.Clear(); inQuoteType = QuotedRegionKind.None; } else { // this is a different quote type. Treat it like a non-match, just add the token to the currentTokenBytes currentTokenBytes.AddRange(trie.Tokens[token].Value); } } else if (inQuoteType != QuotedRegionKind.None) { // we're inside a quoted literal, the token found by the trie should not be processed, just included with the literal currentTokenBytes.AddRange(trie.Tokens[token].Value); } else if (token > ReservedTokenMaxIndex) { currentTokenFamily = TokenFamily.Reference | (TokenFamily)token; tokens.Add(new TokenRef { Family = currentTokenFamily }); } else { //If we have a normal token... currentTokenFamily = (TokenFamily)token; if (currentTokenFamily != TokenFamily.WindowsEOL && currentTokenFamily != TokenFamily.LegacyMacEOL && currentTokenFamily != TokenFamily.UnixEOL) { switch (currentTokenFamily) { case TokenFamily.OpenBrace: ++braceDepth; break; case TokenFamily.CloseBrace: --braceDepth; break; } tokens.Add(new TokenRef { Family = currentTokenFamily }); } else { return(EvaluateCondition(tokens, processor.EncodingConfig.VariableValues)); } } } else if (inQuoteType != QuotedRegionKind.None) { // we're in a quoted literal but did not match a token at the current position. // so just add the current byte to the currentTokenBytes currentTokenBytes.Add(processor.CurrentBuffer[currentBufferPosition++]); } else if (braceDepth > 0) { currentTokenFamily = TokenFamily.Literal; currentTokenBytes.Add(processor.CurrentBuffer[currentBufferPosition++]); } else { first = false; break; } } processor.AdvanceBuffer(currentBufferPosition); currentBufferPosition = processor.CurrentBufferPosition; bufferLength = processor.CurrentBufferLength; } #if DEBUG Debug.Assert( inQuoteType == QuotedRegionKind.None, $"Malformed predicate due to unmatched quotes. InitialBuffer = {processor.Encoding.GetString(processor.CurrentBuffer)} currentTokenFamily = {currentTokenFamily} | TokenFamily.QuotedLiteral = {TokenFamily.QuotedLiteral} | TokenFamily.SingleQuotedLiteral = {TokenFamily.SingleQuotedLiteral}"); #endif return(EvaluateCondition(tokens, processor.EncodingConfig.VariableValues)); }
public static bool CppStyleEvaluator(IProcessorState processor, ref int bufferLength, ref int currentBufferPosition) { TokenTrie trie = new TokenTrie(); //Logic trie.AddToken(processor.Encoding.GetBytes("&&"), 0); trie.AddToken(processor.Encoding.GetBytes("||"), 1); trie.AddToken(processor.Encoding.GetBytes("^"), 2); trie.AddToken(processor.Encoding.GetBytes("!"), 3); trie.AddToken(processor.Encoding.GetBytes(">"), 4); trie.AddToken(processor.Encoding.GetBytes(">="), 5); trie.AddToken(processor.Encoding.GetBytes("<"), 6); trie.AddToken(processor.Encoding.GetBytes("<="), 7); trie.AddToken(processor.Encoding.GetBytes("=="), 8); trie.AddToken(processor.Encoding.GetBytes("="), 9); trie.AddToken(processor.Encoding.GetBytes("!="), 10); //Bitwise trie.AddToken(processor.Encoding.GetBytes("&"), 11); trie.AddToken(processor.Encoding.GetBytes("|"), 12); trie.AddToken(processor.Encoding.GetBytes("<<"), 13); trie.AddToken(processor.Encoding.GetBytes(">>"), 14); //Braces trie.AddToken(processor.Encoding.GetBytes("("), 15); trie.AddToken(processor.Encoding.GetBytes(")"), 16); //Whitespace trie.AddToken(processor.Encoding.GetBytes(" "), 17); trie.AddToken(processor.Encoding.GetBytes("\t"), 18); //EOLs trie.AddToken(processor.Encoding.GetBytes("\r\n"), 19); trie.AddToken(processor.Encoding.GetBytes("\n"), 20); trie.AddToken(processor.Encoding.GetBytes("\r"), 21); //Tokens trie.Append(processor.EncodingConfig.Variables); //Run forward to EOL and collect args TokenFamily currentTokenFamily; List <byte> currentTokenBytes = new List <byte>(); List <TokenRef> tokens = new List <TokenRef>(); int token; if (!trie.GetOperation(processor.CurrentBuffer, bufferLength, ref currentBufferPosition, out token)) { currentTokenFamily = TokenFamily.Literal; currentTokenBytes.Add(processor.CurrentBuffer[currentBufferPosition++]); } else if (token > ReservedTokenMaxIndex) { currentTokenFamily = TokenFamily.Reference | (TokenFamily)token; tokens.Add(new TokenRef { Family = currentTokenFamily }); } else { currentTokenFamily = (TokenFamily)token; if (currentTokenFamily != TokenFamily.WindowsEOL && currentTokenFamily != TokenFamily.LegacyMacEOL && currentTokenFamily != TokenFamily.UnixEOL) { tokens.Add(new TokenRef { Family = currentTokenFamily }); } else { return(EvaluateCondition(tokens, processor.EncodingConfig.VariableValues)); } } int braceDepth = 0; if (tokens[0].Family == TokenFamily.OpenBrace) { ++braceDepth; } bool first = true; while ((first || braceDepth > 0) && bufferLength > 0) { int targetLen = Math.Min(bufferLength, trie.MaxLength); for (; currentBufferPosition < bufferLength - targetLen + 1;) { int oldBufferPos = currentBufferPosition; if (trie.GetOperation(processor.CurrentBuffer, bufferLength, ref currentBufferPosition, out token)) { if (braceDepth == 0) { switch (tokens[tokens.Count - 1].Family) { case TokenFamily.Whitespace: case TokenFamily.Tab: case TokenFamily.CloseBrace: case TokenFamily.WindowsEOL: case TokenFamily.UnixEOL: case TokenFamily.LegacyMacEOL: TokenFamily thisFamily = (TokenFamily)token; if (thisFamily == TokenFamily.WindowsEOL || thisFamily == TokenFamily.UnixEOL || thisFamily == TokenFamily.LegacyMacEOL) { currentBufferPosition = oldBufferPos; } break; default: currentBufferPosition = oldBufferPos; first = false; break; } if (!first) { break; } } //We matched an item, so whatever this is, it's not a literal, end the current literal if that's // what we currently have if (currentTokenFamily == TokenFamily.Literal) { string literal = processor.Encoding.GetString(currentTokenBytes.ToArray()); tokens.Add(new TokenRef { Family = TokenFamily.Literal, Literal = literal }); currentTokenBytes.Clear(); } //If we have a token from the args... if (token > ReservedTokenMaxIndex) { if (currentTokenFamily == TokenFamily.Literal) { TokenRef previous = tokens[tokens.Count - 1]; previous.Literal += processor.Encoding.GetString(currentTokenBytes.ToArray()); currentTokenBytes = processor.Encoding.GetBytes(previous.Literal).ToList(); tokens.RemoveAt(tokens.Count - 1); } else { currentTokenFamily = TokenFamily.Reference | (TokenFamily)token; tokens.Add(new TokenRef { Family = currentTokenFamily }); } } //If we have a normal token... else { currentTokenFamily = (TokenFamily)token; if (currentTokenFamily != TokenFamily.WindowsEOL && currentTokenFamily != TokenFamily.LegacyMacEOL && currentTokenFamily != TokenFamily.UnixEOL) { if (currentTokenFamily == TokenFamily.OpenBrace) { ++braceDepth; } else if (currentTokenFamily == TokenFamily.CloseBrace) { --braceDepth; } tokens.Add(new TokenRef { Family = currentTokenFamily }); } else { return(EvaluateCondition(tokens, processor.EncodingConfig.VariableValues)); } } } else if (braceDepth > 0) { currentTokenFamily = TokenFamily.Literal; currentTokenBytes.Add(processor.CurrentBuffer[currentBufferPosition++]); } else { first = false; break; } } processor.AdvanceBuffer(currentBufferPosition); currentBufferPosition = processor.CurrentBufferPosition; bufferLength = processor.CurrentBufferLength; } return(EvaluateCondition(tokens, processor.EncodingConfig.VariableValues)); }
private void FindEnd(IProcessorState processorState, ref int bufferLength, ref int currentBufferPosition) { int depth = 1; bool inElement = true; while (bufferLength >= _structureTrie.MinLength) { //Try to get at least the max length of the tree into the buffer if (bufferLength - currentBufferPosition < _structureTrie.MaxLength) { processorState.AdvanceBuffer(currentBufferPosition); currentBufferPosition = processorState.CurrentBufferPosition; bufferLength = processorState.CurrentBufferLength; } int sz = bufferLength == processorState.CurrentBuffer.Length ? _structureTrie.MaxLength : _structureTrie.MinLength; for (; currentBufferPosition < bufferLength - sz + 1; ++currentBufferPosition) { if (bufferLength == 0) { currentBufferPosition = 0; return; } int token; if (_structureTrie.GetOperation(processorState.CurrentBuffer, bufferLength, ref currentBufferPosition, out token)) { if (token == _mapping.OpenOpenElementToken) { ++depth; inElement = true; } else if (token == _mapping.SelfClosingElementEndToken) { --depth; inElement = false; } else if (token == _mapping.CloseElementTagToken) { if (inElement) { inElement = false; } else { --depth; } } else if (token == _mapping.OpenCloseElementToken) { inElement = false; } if (depth == 0) { return; } } } } //Ran out of places to check and haven't reached the actual match, consume all the way to the end currentBufferPosition = bufferLength; }
public int HandleMatch(IProcessorState processor, int bufferLength, ref int currentBufferPosition, int token, Stream target) { bool flag; if (processor.Config.Flags.TryGetValue(OperationName, out flag) && !flag) { target.Write(Tokens[token].Value, Tokens[token].Start, Tokens[token].Length); return(Tokens[token].Length); } List <byte> pathBytes = new List <byte>(); while (!_endTokenMatcher.GetOperation(processor.CurrentBuffer, bufferLength, ref currentBufferPosition, out token)) { pathBytes.Add(processor.CurrentBuffer[currentBufferPosition++]); if (bufferLength - currentBufferPosition < _endTokenMatcher.MinLength) { processor.AdvanceBuffer(currentBufferPosition); bufferLength = processor.CurrentBufferLength; currentBufferPosition = 0; if (bufferLength == 0) { break; } } } byte[] pathBytesArray = pathBytes.ToArray(); string sourceLocation = processor.Encoding.GetString(pathBytesArray).Trim(); const int pageSize = 65536; //Start off with a 64K buffer, we'll keep adding chunks to this byte[] composite = new byte[pageSize]; int totalLength; using (Stream data = _source.SourceStreamOpener(sourceLocation)) { int index = composite.Length - pageSize; int nRead = data.Read(composite, index, pageSize); //As long as we're reading whole pages, keep allocating more space ahead while (nRead == pageSize) { byte[] newBuffer = new byte[composite.Length + pageSize]; Buffer.BlockCopy(composite, 0, newBuffer, 0, composite.Length); composite = newBuffer; nRead = data.Read(composite, index, pageSize); } totalLength = composite.Length - (pageSize - nRead); } byte[] bom; Encoding realEncoding = EncodingUtil.Detect(composite, totalLength, out bom); if (!Equals(realEncoding, processor.Encoding)) { composite = Encoding.Convert(realEncoding, processor.Encoding, composite, bom.Length, totalLength - bom.Length); totalLength = composite.Length; } target.Write(composite, 0, totalLength - bom.Length); return(composite.Length); }