/// <summary>
        /// Extracts a block of text delimited by the specified open and close
        /// characters. It is assumed the parser is positioned at an
        /// occurrence of the open character. The open and closing characters
        /// are not included in the returned string. On return, the parser is
        /// positioned at the closing character or at the end of the text if
        /// the closing character was not found.
        /// </summary>
        /// <param name="parser">TextParser object</param>
        /// <param name="openChar">Start-of-block delimiter</param>
        /// <param name="closeChar">End-of-block delimiter</param>
        /// <returns>The extracted text</returns>
        private string ExtractBlock(TextParser parser, char openChar, char closeChar)
        {
            // Track delimiter depth
            int depth = 1;

            // Extract characters between delimiters
            parser.MoveAhead();
            int start = parser.Position;

            while (!parser.EndOfText)
            {
                if (parser.Peek() == openChar)
                {
                    // Increase block depth
                    depth++;
                }
                else if (parser.Peek() == closeChar)
                {
                    // Decrease block depth
                    depth--;
                    // Test for end of block
                    if (depth == 0)
                    {
                        break;
                    }
                }
                else if (parser.Peek() == '"')
                {
                    // Don't count delimiters within quoted text
                    ExtractQuote(parser);
                }
                // Move to next character
                parser.MoveAhead();
            }
            return(parser.Extract(start, parser.Position));
        }
        /// <summary>
        /// Parses a query segment and converts it to an expression
        /// tree.
        /// </summary>
        /// <param name="query">Query segment to convert</param>
        /// <param name="defaultConjunction">Implicit conjunction type</param>
        /// <returns>Root node of expression tree</returns>
        private INode ParseNode(string query, ConjunctionTypes defaultConjunction)
        {
            TermForms        termForm    = TermForms.Inflectional;
            bool             termExclude = false;
            ConjunctionTypes conjunction = defaultConjunction;
            bool             resetState  = true;
            INode            root        = null;
            INode            node;
            string           term;

            TextParser parser = new TextParser(query);

            while (!parser.EndOfText)
            {
                if (resetState)
                {
                    // Reset modifiers
                    termForm    = TermForms.Inflectional;
                    termExclude = false;
                    conjunction = defaultConjunction;
                    resetState  = false;
                }

                parser.MovePastWhitespace();
                if (!parser.EndOfText &&
                    !Punctuation.Contains(parser.Peek()))
                {
                    // Extract query term
                    int start = parser.Position;
                    parser.MoveAhead();
                    while (!parser.EndOfText &&
                           !Punctuation.Contains(parser.Peek()) &&
                           !Char.IsWhiteSpace(parser.Peek()))
                    {
                        parser.MoveAhead();
                    }

                    // Allow trailing wildcard
                    if (parser.Peek() == '*')
                    {
                        parser.MoveAhead();
                        termForm = TermForms.Literal;
                    }

                    // Interpret token
                    term = parser.Extract(start, parser.Position);
                    if (String.Compare(term, "AND", true) == 0)
                    {
                        conjunction = ConjunctionTypes.And;
                    }
                    else if (String.Compare(term, "OR", true) == 0)
                    {
                        conjunction = ConjunctionTypes.Or;
                    }
                    else if (String.Compare(term, "NEAR", true) == 0)
                    {
                        conjunction = ConjunctionTypes.Near;
                    }
                    else if (String.Compare(term, "NOT", true) == 0)
                    {
                        termExclude = true;
                    }
                    else
                    {
                        root       = AddNode(root, term, termForm, termExclude, conjunction);
                        resetState = true;
                    }
                    continue;
                }
                else if (parser.Peek() == '"')
                {
                    // Match next term exactly
                    termForm = TermForms.Literal;
                    // Extract quoted term
                    term       = ExtractQuote(parser);
                    root       = AddNode(root, term.Trim(), termForm, termExclude, conjunction);
                    resetState = true;
                }
                else if (parser.Peek() == '(')
                {
                    // Parse parentheses block
                    term       = ExtractBlock(parser, '(', ')');
                    node       = ParseNode(term, defaultConjunction);
                    root       = AddNode(root, node, conjunction, true);
                    resetState = true;
                }
                else if (parser.Peek() == '<')
                {
                    // Parse angle brackets block
                    term       = ExtractBlock(parser, '<', '>');
                    node       = ParseNode(term, ConjunctionTypes.Near);
                    root       = AddNode(root, node, conjunction);
                    resetState = true;
                }
                else if (parser.Peek() == '-')
                {
                    // Match when next term is not present
                    termExclude = true;
                }
                else if (parser.Peek() == '+')
                {
                    // Match next term exactly
                    termForm = TermForms.Literal;
                }
                else if (parser.Peek() == '~')
                {
                    // Match synonyms of next term
                    termForm = TermForms.Thesaurus;
                }
                // Advance to next character
                parser.MoveAhead();
            }
            return(root);
        }
Пример #3
0
 /// <summary>
 /// Extracts a block of text delimited by double quotes. It is
 /// assumed the parser is positioned at the first quote. The
 /// quotes are not included in the returned string. On return,
 /// the parser is positioned at the closing quote or at the end of
 /// the text if the closing quote was not found.
 /// </summary>
 /// <param name="parser">TextParser object</param>
 /// <returns>The extracted text</returns>
 private string ExtractQuote(TextParser parser)
 {
     // Extract contents of quote
     parser.MoveAhead();
     int start = parser.Position;
     while (!parser.EndOfText && parser.Peek() != '"')
         parser.MoveAhead();
     return parser.Extract(start, parser.Position);
 }
Пример #4
0
        /// <summary>
        /// Parses a query segment and converts it to an expression
        /// tree.
        /// </summary>
        /// <param name="query">Query segment to convert</param>
        /// <param name="defaultConjunction">Implicit conjunction type</param>
        /// <returns>Root node of expression tree</returns>
        private INode ParseNode(string query, ConjunctionTypes defaultConjunction)
        {
            TermForms termForm = TermForms.Inflectional;
            bool termExclude = false;
            ConjunctionTypes conjunction = defaultConjunction;
            bool resetState = true;
            INode root = null;
            INode node;
            string term;

            TextParser parser = new TextParser(query);
            while (!parser.EndOfText)
            {
                if (resetState)
                {
                    // Reset modifiers
                    termForm = TermForms.Inflectional;
                    termExclude = false;
                    conjunction = defaultConjunction;
                    resetState = false;
                }

                parser.MovePastWhitespace();
                if (!parser.EndOfText &&
                    !Punctuation.Contains(parser.Peek()))
                {
                    // Extract query term
                    int start = parser.Position;
                    parser.MoveAhead();
                    while (!parser.EndOfText &&
                        !Punctuation.Contains(parser.Peek()) &&
                        !Char.IsWhiteSpace(parser.Peek()))
                        parser.MoveAhead();

                    // Allow trailing wildcard
                    if (parser.Peek() == '*')
                    {
                        parser.MoveAhead();
                        termForm = TermForms.Literal;
                    }

                    // Interpret token
                    term = parser.Extract(start, parser.Position);
                    if (String.Compare(term, "AND", true) == 0)
                        conjunction = ConjunctionTypes.And;
                    else if (String.Compare(term, "OR", true) == 0)
                        conjunction = ConjunctionTypes.Or;
                    else if (String.Compare(term, "NEAR", true) == 0)
                        conjunction = ConjunctionTypes.Near;
                    else if (String.Compare(term, "NOT", true) == 0)
                        termExclude = true;
                    else
                    {
                        root = AddNode(root, term, termForm, termExclude, conjunction);
                        resetState = true;
                    }
                    continue;
                }
                else if (parser.Peek() == '"')
                {
                    // Match next term exactly
                    termForm = TermForms.Literal;
                    // Extract quoted term
                    term = ExtractQuote(parser);
                    root = AddNode(root, term.Trim(), termForm, termExclude, conjunction);
                    resetState = true;
                }
                else if (parser.Peek() == '(')
                {
                    // Parse parentheses block
                    term = ExtractBlock(parser, '(', ')');
                    node = ParseNode(term, defaultConjunction);
                    root = AddNode(root, node, conjunction, true);
                    resetState = true;
                }
                else if (parser.Peek() == '<')
                {
                    // Parse angle brackets block
                    term = ExtractBlock(parser, '<', '>');
                    node = ParseNode(term, ConjunctionTypes.Near);
                    root = AddNode(root, node, conjunction);
                    resetState = true;
                }
                else if (parser.Peek() == '-')
                {
                    // Match when next term is not present
                    termExclude = true;
                }
                else if (parser.Peek() == '+')
                {
                    // Match next term exactly
                    termForm = TermForms.Literal;
                }
                else if (parser.Peek() == '~')
                {
                    // Match synonyms of next term
                    termForm = TermForms.Thesaurus;
                }
                // Advance to next character
                parser.MoveAhead();
            }
            return root;
        }
Пример #5
0
        /// <summary>
        /// Extracts a block of text delimited by the specified open and close
        /// characters. It is assumed the parser is positioned at an
        /// occurrence of the open character. The open and closing characters
        /// are not included in the returned string. On return, the parser is
        /// positioned at the closing character or at the end of the text if
        /// the closing character was not found.
        /// </summary>
        /// <param name="parser">TextParser object</param>
        /// <param name="openChar">Start-of-block delimiter</param>
        /// <param name="closeChar">End-of-block delimiter</param>
        /// <returns>The extracted text</returns>
        private string ExtractBlock(TextParser parser, char openChar, char closeChar)
        {
            // Track delimiter depth
            int depth = 1;

            // Extract characters between delimiters
            parser.MoveAhead();
            int start = parser.Position;
            while (!parser.EndOfText)
            {
                if (parser.Peek() == openChar)
                {
                    // Increase block depth
                    depth++;
                }
                else if (parser.Peek() == closeChar)
                {
                    // Decrease block depth
                    depth--;
                    // Test for end of block
                    if (depth == 0)
                        break;
                }
                else if (parser.Peek() == '"')
                {
                    // Don't count delimiters within quoted text
                    ExtractQuote(parser);
                }
                // Move to next character
                parser.MoveAhead();
            }
            return parser.Extract(start, parser.Position);
        }