C# (CSharp) StopWords.Contains Examples

Programming Language: C# (CSharp)

Class/Type: StopWords

Method/Function: Contains

Examples at hotexamples.com: 10

C# (CSharp) StopWords.Contains - 10 examples found. These are the top rated real world C# (CSharp) examples of StopWords.Contains extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Contains(10)

Add(9)

UnionWith(6)

IsEmpty(6)

GetStopWords(4)

ReturnCleansedString(4)

IsStopWord(3)

PopulateStopWords(3)

ReadBlackListWords(3)

Any(2)

IsMatch(2)

RemoveStopwords(1)

removeSingleLetterWords(1)

removeNumbers(1)

SaveFile(1)

MarkStopWords(1)

RemoveStopWords(1)

Remove(1)

All(1)

IsStopWordPresentOrNot(1)

GetHashCode(1)

Equals(1)

ContainsKey(1)

removeWordsBasedOnStopWordsList(1)

Example #1

Show file

        private void StripAnalysisText(List <string> rawAnalysisWords)
        {
            List <string> strippedList = new List <string>();

            foreach (string s in rawAnalysisWords)
            {
                string p;
                string q;
                if (!string.IsNullOrEmpty(s))
                {
                    if (Rgx.IsMatch(s))
                    {
                        p = Rgx.Replace(s, "").ToLower();
                    }
                    else
                    {
                        p = s.ToLower();
                    }

                    if (!StopWords.Contains(p.ToLower()) && !string.IsNullOrWhiteSpace(p))
                    {
                        if (Apos.IsMatch(p))
                        {
                            q = Apos.Replace(p, "").ToLower();
                            strippedList.Add(q);
                        }
                        else
                        {
                            strippedList.Add(p);
                        }
                    }
                }
            }
            StrippedWords = strippedList;
        }

Example #2

Show file

File: SeoAnalyzer.cs Project: asiehmokarian/SeoAnalyzerAppUpdate

        public Dictionary <string, int> GetWordOccurancesFromText(string text)
        {
            if (string.IsNullOrEmpty(text))
            {
                throw new ArgumentNullException(nameof(text));
            }

            text = GetPlainText(text);

            var result  = new Dictionary <string, int>();
            var matches = Regex.Matches(text, WordRegexPattern);

            foreach (Match word in matches)
            {
                var key = word.Value.ToLower();
                if (!StopWords.Contains(key, StringComparer.OrdinalIgnoreCase))
                {
                    if (result.ContainsKey(key))
                    {
                        result[key]++;
                    }
                    else
                    {
                        result.Add(key, 1);
                    }
                }
            }
            return(result);
        }

Example #3

Show file

File: TfidfExtractor.cs Project: zhangkangen/Learn

        private IDictionary <string, double> GetWordIfidf(string text, IEnumerable <string> allowPos)
        {
            IEnumerable <string> words = null;

            if (allowPos.IsNotEmpty())
            {
                words = FilterCutByPos(text, allowPos);
            }
            else
            {
                words = Segmenter.Cut(text);
            }

            // Calculate TF
            var freq = new Dictionary <string, double>();

            foreach (var word in words)
            {
                var w = word;
                if (string.IsNullOrEmpty(w) || w.Trim().Length < 2 || StopWords.Contains(w.ToLower()))
                {
                    continue;
                }
                freq[w] = freq.GetDefault(w, 0.0) + 1.0;
            }
            var total = freq.Values.Sum();

            foreach (var k in freq.Keys.ToList())
            {
                freq[k] *= IdfFreq.GetDefault(k, MedianIdf) / total;
            }

            return(freq);
        }

Example #4

Show file

File: Document.cs Project: jakakordez/web-searcher

        public IEnumerable <Posting> Index(sqliteContext dbContext, List <string> words)
        {
            Dictionary <string, Posting> postings = new Dictionary <string, Posting>();
            int index = 0;

            foreach (var token in Tokens)
            {
                var word = token.ToString().ToLower();
                index++;
                if (StopWords.Contains(word))
                {
                    continue;
                }
                if (words != null && !words.Contains(word))
                {
                    continue;
                }

                if (dbContext?.IndexWord.Find(word) == null)
                {
                    dbContext?.IndexWord.Add(new IndexWord()
                    {
                        Word = word
                    });
                }

                if (!postings.ContainsKey(word))
                {
                    postings.Add(word, new Posting()
                    {
                        Word         = word,
                        DocumentName = name,
                        Indexes      = ""
                    });
                }

                var p = postings[word];
                p.Frequency++;
                if (p.Indexes != "")
                {
                    p.Indexes += ",";
                }
                p.Indexes += index;
            }

            foreach (var posting in postings)
            {
                dbContext?.Posting.Add(posting.Value);
            }
            return(postings.Values);
        }

Example #5

Show file

File: MoreLikeThis.cs Project: sycct/lucenenet

        /// <summary>
        /// determines if the passed term is likely to be of interest in "more like" comparisons
        /// </summary>
        /// <param name="term"> The word being considered </param>
        /// <returns> <c>true</c> if should be ignored, <c>false</c> if should be used in further analysis </returns>
        private bool IsNoiseWord(string term)
        {
            int len = term.Length;

            if (MinWordLen > 0 && len < MinWordLen)
            {
                return(true);
            }
            if (MaxWordLen > 0 && len > MaxWordLen)
            {
                return(true);
            }
            return(StopWords != null && StopWords.Contains(term));
        }

Example #6

Show file

File: StopSettingStrategy.cs Project: EvanQuan/Chubberino

        public Boolean ShouldStop(ChatMessage chatMessage)
        {
            if (!chatMessage.IsModerator)
            {
                return(false);
            }

            if (chatMessage.Username.Equals("streamelements", StringComparison.OrdinalIgnoreCase))
            {
                return(false);
            }

            String[] messageWords = chatMessage.Message.Split(' ');

            return(messageWords.Any(word => StopWords.Contains(word.ToLower()) || word.Equals(TwitchClientManager.Name.Value, StringComparison.OrdinalIgnoreCase)));
        }

Example #7

Show file

File: DocumentActions.cs Project: alexandrmazur96/PlagiarismDetection

        /// <summary>
        /// Очищает текст используя базу (словарь) стоп-слов
        /// </summary>
        /// <param name="text">Неочищенный текст, который нужно канонизировать</param>
        /// <returns>Коллекцию слов из текста, которые готовы к употреблению =)</returns>
        private static List <string> TextPurify(string text)
        {
            //разделяем ввесь текст на отдельные слова
            var rawTokens = text.Split(Separators).ToList();

            //проходимся по этому списку слов в linq-выражении
            var canonedTokens = rawTokens.Select(word => word.ToCharArray().Where(n => !char.IsDigit(n)).ToArray()).Select(purified => new string(purified)).ToList();

            //из этой коллекции удаляем все пустые элементы и стоп-слова используя linq
            canonedTokens.RemoveAll(item => StopWords.Contains(item.ToLower()) || string.IsNullOrWhiteSpace(item));

            //также удаляются все стоп-символы из слов в коллекции
            var purifiedTokens = (from item in canonedTokens let regex = new Regex("[0-9/|_!@#$%^&*()_+=?:;.,{}№><«»'\"`~" + @"\\[\]– -]*") select regex.Replace(item, "")).ToList();

            //устанавливаются все слова в Lower Case
            var purifiedLowerCaseTokens = purifiedTokens.Select(purifiedToken => purifiedToken.ToLower()).ToList();

            var stemmedLowerCaseTokens = new List <string>();
            var cyrillicStemmer        = new RussianStemmer();
            var latinStemmer           = new EnglishStemmer();

            foreach (var purifiedLowerCaseToken in purifiedLowerCaseTokens)
            {
                switch (Verifications.GetFontType(purifiedLowerCaseToken))
                {
                case FontType.Cyrillic:
                    stemmedLowerCaseTokens.Add(cyrillicStemmer.Stem(purifiedLowerCaseToken));
                    break;

                case FontType.Latin:
                    stemmedLowerCaseTokens.Add(latinStemmer.Stem(purifiedLowerCaseToken));
                    break;

                case FontType.Other:
                case FontType.Numbers:
                    break;

                default:
                    throw new ArgumentOutOfRangeException();
                }
            }

            return(stemmedLowerCaseTokens);
        }

Example #8

Show file

 public bool PairFilter(IEnumerable <string> allowPos, Pair wp)
 {
     return(allowPos.Contains(wp.Flag) &&
            wp.Word.Trim().Length >= 2 &&
            !StopWords.Contains(wp.Word.ToLower()));
 }

Example #9

Show file

File: TextRankExtractor.cs Project: szc982/58HouseSearch

 public bool PairFilter(Pair wp)
 {
     return(DefaultPosFilter.Contains(wp.Flag) &&
            wp.Word.Trim().Length >= 2 &&
            !StopWords.Contains(wp.Word.ToLower()));
 }

Example #10

Show file

File: Search.cs Project: waynejr2/dotnetmocks2demo

        /// <summary>
        /// Removes stop words and HTML from the specified string.
        /// </summary>
        /// <param name="content">
        /// The content.
        /// </param>
        /// <param name="removeHtml">
        /// The remove Html.
        /// </param>
        /// <returns>
        /// The clean content.
        /// </returns>
        private static string CleanContent(string content, bool removeHtml)
        {
            if (removeHtml)
            {
                content = Utils.StripHtml(content);
            }

            content =
                content.Replace("\\", string.Empty).Replace("|", string.Empty).Replace("(", string.Empty).Replace(
                    ")", string.Empty).Replace("[", string.Empty).Replace("]", string.Empty).Replace("*", string.Empty).
                Replace("?", string.Empty).Replace("}", string.Empty).Replace("{", string.Empty).Replace(
                    "^", string.Empty).Replace("+", string.Empty);

            var words = content.Split(new[] { ' ', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
            var sb    = new StringBuilder();

            foreach (var word in
                     words.Select(t => t.ToLowerInvariant().Trim()).Where(word => word.Length > 1 && !StopWords.Contains(word)))
            {
                sb.AppendFormat("{0} ", word);
            }

            return(sb.ToString());
        }