C# (CSharp) LuceneCommon.Stem примеры использования

Язык программирования: C# (CSharp)

Класс/Тип: LuceneCommon

Метод/Функция: Stem

Примеров на hotexamples.com: 2

C# (CSharp) LuceneCommon.Stem - 2 примера найдено. Это лучшие примеры C# (CSharp) кода для LuceneCommon.Stem, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GetReader(6)

GetSearcher(4)

ReleaseReader(4)

ReleaseSearcher(4)

IsStopWord(2)

Stem(2)

CloseReader(1)

CreateHightLight(1)

DebugHook(1)

PanGuSplitWord(1)

StringToQuery(1)

Пример #1

Показать файл

Файл: QueryDriver.cs Проект: ArsenShnurkov/beagle-1

        static void AddSearchTermInfo(QueryPart part,
                                      SearchTermResponse response, StringBuilder sb)
        {
            if (part.Logic == QueryPartLogic.Prohibited)
            {
                return;
            }

            if (part is QueryPart_Or)
            {
                ICollection sub_parts;
                sub_parts = ((QueryPart_Or)part).SubParts;
                foreach (QueryPart qp in sub_parts)
                {
                    AddSearchTermInfo(qp, response, sb);
                }
                return;
            }

            if (!(part is QueryPart_Text))
            {
                return;
            }

            QueryPart_Text tp;

            tp = (QueryPart_Text)part;

            string [] split;
            split = tp.Text.Split(' ');

            // First, remove stop words
            for (int i = 0; i < split.Length; ++i)
            {
                if (LuceneCommon.IsStopWord(split [i]))
                {
                    split [i] = null;
                }
            }

            // Assemble the phrase minus stop words
            sb.Length = 0;
            for (int i = 0; i < split.Length; ++i)
            {
                if (split [i] == null)
                {
                    continue;
                }
                if (sb.Length > 0)
                {
                    sb.Append(' ');
                }
                sb.Append(split [i]);
            }
            response.ExactText.Add(sb.ToString());

            // Now assemble a stemmed version
            sb.Length = 0;             // clear the previous value
            for (int i = 0; i < split.Length; ++i)
            {
                if (split [i] == null)
                {
                    continue;
                }
                if (sb.Length > 0)
                {
                    sb.Append(' ');
                }
                sb.Append(LuceneCommon.Stem(split [i].ToLower()));
            }
            response.StemmedText.Add(sb.ToString());
        }

Пример #2

Показать файл

Файл: SnippetFu.cs Проект: universsky/beagrep

        // Starts scanning at character pos of string text for occurrence of any word
        // in stemmed_terms. Returns a list of (words)*[(matched word)(words)*]+
        private SnippetLine MarkTerms(ArrayList stemmed_terms, string text, ref int pos)
        {
            SnippetLine snippet_line       = null;
            int         prev_match_end_pos = pos;     // misnomer; means 1 + end_pos of previous word

            // 1. get next word
            // 2. if no next word, return arraylist
            // 3. if word is not a match, following_words ++
            // 4. else {
            // 4a. add list to the arraylist
            // 4b. add word to the arraylist
            // 4c. clear list
            // 4d. following_words=0
            // }
            // 5. if (following_words >= max_following_words) {
            // 5a. add list to the arraylist
            // 5b. clear list
            // 5c. return list
            // }

            while (pos < text.Length)
            {
                // Find the beginning of the next token
                if (IsTokenSeparator(text [pos]))
                {
                    ++pos;
                    continue;
                }

                // Find the end of the next token
                int end_pos = pos + 1;
                while (end_pos < text.Length && !IsTokenSeparator(text [end_pos]))
                {
                    ++end_pos;
                }

                string token         = text.Substring(pos, end_pos - pos);
                string stemmed_token = null;
                bool   found_match   = false;

                // Iterate through the stemmed terms and match the token
                for (int i = 0; i < stemmed_terms.Count; i++)
                {
                    // If this term is longer than the token in question, give up.
                    if (end_pos - pos < ((string)stemmed_terms [i]).Length)
                    {
                        continue;
                    }

                    // We cache the token, so as to avoid stemming it more than once
                    // when considering multiple terms.
                    if (stemmed_token == null)
                    {
                        stemmed_token = LuceneCommon.Stem(token.ToLower());
                    }

                    if (String.Compare((string)stemmed_terms [i], stemmed_token, true) != 0)
                    {
                        continue;
                    }

                    // We have a match!
                    found_match = true;
                    //Console.WriteLine ("Found match");

                    if (snippet_line == null)
                    {
                        snippet_line = new SnippetLine();
                    }

                    // Find the fragment before the match
                    int start_pos = sliding_window.StartValue;
                    if (start_pos == -1)                     // If no non-match words seen after last match
                    {
                        start_pos = prev_match_end_pos;      // Use wherever previous word ended
                    }
                    sliding_window.Reset();

                    string before_match = text.Substring(start_pos, pos - start_pos);
                    snippet_line.AddNonMatchFragment(before_match);
                    //Console.WriteLine ("Adding [{0}, {1}]:[{2}]", start_pos, pos - 1, before_match);

                    snippet_line.AddMatchFragment(i, token);
                    //Console.WriteLine ("Adding word [{0}, {1}]:[{2}]", pos, end_pos - 1, token);
                    prev_match_end_pos = end_pos;

                    break;
                }

                if (!found_match)
                {
                    // Add the start pos of the token to the window
                    sliding_window.Add(pos);
                    // If we found a match previously and saw enough following words, stop
                    if (snippet_line != null && snippet_line.Count > 0 && sliding_window.Count == context_length)
                    {
                        sliding_window.Reset();
                        string after_match = text.Substring(prev_match_end_pos, end_pos - prev_match_end_pos);
                        snippet_line.AddNonMatchFragment(after_match);
                        //Console.WriteLine ("Adding [{0}, {1}]:[{2}]", prev_match_end_pos, end_pos - 1, after_match);
                        return(snippet_line);
                    }
                }

                pos = end_pos;
            }

            // If less than 6 words came after the last match, add the rest here
            if (snippet_line != null && snippet_line.Count > 0)
            {
                sliding_window.Reset();
                string after_match = text.Substring(prev_match_end_pos, pos - prev_match_end_pos);
                snippet_line.AddNonMatchFragment(after_match);
                //Console.WriteLine ("Adding [{0}, {1}]:[{2}]", prev_match_end_pos, pos - 1, after_match);

                //Console.WriteLine ("Sending snippet: {0}", snippet_line.ToString ());
                return(snippet_line);
            }

            sliding_window.Reset();
            return(null);
        }