/// <summary> /// Extracts a block of text delimited by double quotes. It is /// assumed the parser is positioned at the first quote. The /// quotes are not included in the returned string. On return, /// the parser is positioned at the closing quote or at the end of /// the text if the closing quote was not found. /// </summary> /// <param name="parser">TextParser object</param> /// <returns>The extracted text</returns> private string ExtractQuote(TextParser parser) { // Extract contents of quote parser.MoveAhead(); int start = parser.Position; while (!parser.EndOfText && parser.Peek() != '"') { parser.MoveAhead(); } return(parser.Extract(start, parser.Position)); }
/// <summary> /// Extracts a block of text delimited by the specified open and close /// characters. It is assumed the parser is positioned at an /// occurrence of the open character. The open and closing characters /// are not included in the returned string. On return, the parser is /// positioned at the closing character or at the end of the text if /// the closing character was not found. /// </summary> /// <param name="parser">TextParser object</param> /// <param name="openChar">Start-of-block delimiter</param> /// <param name="closeChar">End-of-block delimiter</param> /// <returns>The extracted text</returns> private string ExtractBlock(TextParser parser, char openChar, char closeChar) { // Track delimiter depth int depth = 1; // Extract characters between delimiters parser.MoveAhead(); int start = parser.Position; while (!parser.EndOfText) { if (parser.Peek() == openChar) { // Increase block depth depth++; } else if (parser.Peek() == closeChar) { // Decrease block depth depth--; // Test for end of block if (depth == 0) { break; } } else if (parser.Peek() == '"') { // Don't count delimiters within quoted text ExtractQuote(parser); } // Move to next character parser.MoveAhead(); } return(parser.Extract(start, parser.Position)); }
/// <summary> /// Parses a query segment and converts it to an expression /// tree. /// </summary> /// <param name="query">Query segment to convert</param> /// <param name="defaultConjunction">Implicit conjunction type</param> /// <returns>Root node of expression tree</returns> private INode ParseNode(string query, ConjunctionTypes defaultConjunction) { TermForms termForm = TermForms.Inflectional; bool termExclude = false; ConjunctionTypes conjunction = defaultConjunction; bool resetState = true; INode root = null; INode node; string term; TextParser parser = new TextParser(query); while (!parser.EndOfText) { if (resetState) { // Reset modifiers termForm = TermForms.Inflectional; termExclude = false; conjunction = defaultConjunction; resetState = false; } parser.MovePastWhitespace(); if (!parser.EndOfText && !Punctuation.Contains(parser.Peek())) { // Extract query term int start = parser.Position; parser.MoveAhead(); while (!parser.EndOfText && !Punctuation.Contains(parser.Peek()) && !Char.IsWhiteSpace(parser.Peek())) { parser.MoveAhead(); } // Allow trailing wildcard if (parser.Peek() == '*') { parser.MoveAhead(); termForm = TermForms.Literal; } // Interpret token term = parser.Extract(start, parser.Position); if (String.Compare(term, "AND", true) == 0) { conjunction = ConjunctionTypes.And; } else if (String.Compare(term, "OR", true) == 0) { conjunction = ConjunctionTypes.Or; } else if (String.Compare(term, "NEAR", true) == 0) { conjunction = ConjunctionTypes.Near; } else if (String.Compare(term, "NOT", true) == 0) { termExclude = true; } else { root = AddNode(root, term, termForm, termExclude, conjunction); resetState = true; } continue; } else if (parser.Peek() == '"') { // Match next term exactly termForm = TermForms.Literal; // Extract quoted term term = ExtractQuote(parser); root = AddNode(root, term.Trim(), termForm, termExclude, conjunction); resetState = true; } else if (parser.Peek() == '(') { // Parse parentheses block term = ExtractBlock(parser, '(', ')'); node = ParseNode(term, defaultConjunction); root = AddNode(root, node, conjunction, true); resetState = true; } else if (parser.Peek() == '<') { // Parse angle brackets block term = ExtractBlock(parser, '<', '>'); node = ParseNode(term, ConjunctionTypes.Near); root = AddNode(root, node, conjunction); resetState = true; } else if (parser.Peek() == '-') { // Match when next term is not present termExclude = true; } else if (parser.Peek() == '+') { // Match next term exactly termForm = TermForms.Literal; } else if (parser.Peek() == '~') { // Match synonyms of next term termForm = TermForms.Thesaurus; } // Advance to next character parser.MoveAhead(); } return(root); }
/// <summary> /// Parses a query segment and converts it to an expression /// tree. /// </summary> /// <param name="query">Query segment to convert</param> /// <param name="defaultConjunction">Implicit conjunction type</param> /// <returns>Root node of expression tree</returns> private INode ParseNode(string query, ConjunctionTypes defaultConjunction) { TermForms termForm = TermForms.Inflectional; bool termExclude = false; ConjunctionTypes conjunction = defaultConjunction; bool resetState = true; INode root = null; INode node; string term; TextParser parser = new TextParser(query); while (!parser.EndOfText) { if (resetState) { // Reset modifiers termForm = TermForms.Inflectional; termExclude = false; conjunction = defaultConjunction; resetState = false; } parser.MovePastWhitespace(); if (!parser.EndOfText && !Punctuation.Contains(parser.Peek())) { // Extract query term int start = parser.Position; parser.MoveAhead(); while (!parser.EndOfText && !Punctuation.Contains(parser.Peek()) && !Char.IsWhiteSpace(parser.Peek())) parser.MoveAhead(); // Allow trailing wildcard if (parser.Peek() == '*') { parser.MoveAhead(); termForm = TermForms.Literal; } // Interpret token term = parser.Extract(start, parser.Position); if (String.Compare(term, "AND", true) == 0) conjunction = ConjunctionTypes.And; else if (String.Compare(term, "OR", true) == 0) conjunction = ConjunctionTypes.Or; else if (String.Compare(term, "NEAR", true) == 0) conjunction = ConjunctionTypes.Near; else if (String.Compare(term, "NOT", true) == 0) termExclude = true; else { root = AddNode(root, term, termForm, termExclude, conjunction); resetState = true; } continue; } else if (parser.Peek() == '"') { // Match next term exactly termForm = TermForms.Literal; // Extract quoted term term = ExtractQuote(parser); root = AddNode(root, term.Trim(), termForm, termExclude, conjunction); resetState = true; } else if (parser.Peek() == '(') { // Parse parentheses block term = ExtractBlock(parser, '(', ')'); node = ParseNode(term, defaultConjunction); root = AddNode(root, node, conjunction, true); resetState = true; } else if (parser.Peek() == '<') { // Parse angle brackets block term = ExtractBlock(parser, '<', '>'); node = ParseNode(term, ConjunctionTypes.Near); root = AddNode(root, node, conjunction); resetState = true; } else if (parser.Peek() == '-') { // Match when next term is not present termExclude = true; } else if (parser.Peek() == '+') { // Match next term exactly termForm = TermForms.Literal; } else if (parser.Peek() == '~') { // Match synonyms of next term termForm = TermForms.Thesaurus; } // Advance to next character parser.MoveAhead(); } return root; }
/// <summary> /// Extracts a block of text delimited by double quotes. It is /// assumed the parser is positioned at the first quote. The /// quotes are not included in the returned string. On return, /// the parser is positioned at the closing quote or at the end of /// the text if the closing quote was not found. /// </summary> /// <param name="parser">TextParser object</param> /// <returns>The extracted text</returns> private string ExtractQuote(TextParser parser) { // Extract contents of quote parser.MoveAhead(); int start = parser.Position; while (!parser.EndOfText && parser.Peek() != '"') parser.MoveAhead(); return parser.Extract(start, parser.Position); }
/// <summary> /// Extracts a block of text delimited by the specified open and close /// characters. It is assumed the parser is positioned at an /// occurrence of the open character. The open and closing characters /// are not included in the returned string. On return, the parser is /// positioned at the closing character or at the end of the text if /// the closing character was not found. /// </summary> /// <param name="parser">TextParser object</param> /// <param name="openChar">Start-of-block delimiter</param> /// <param name="closeChar">End-of-block delimiter</param> /// <returns>The extracted text</returns> private string ExtractBlock(TextParser parser, char openChar, char closeChar) { // Track delimiter depth int depth = 1; // Extract characters between delimiters parser.MoveAhead(); int start = parser.Position; while (!parser.EndOfText) { if (parser.Peek() == openChar) { // Increase block depth depth++; } else if (parser.Peek() == closeChar) { // Decrease block depth depth--; // Test for end of block if (depth == 0) break; } else if (parser.Peek() == '"') { // Don't count delimiters within quoted text ExtractQuote(parser); } // Move to next character parser.MoveAhead(); } return parser.Extract(start, parser.Position); }