예제 #1
0
 public StemPair(string Stem, KeywordPair KeywordRef, Artifact ArtifactRef)
 {
     this.stem = Stem;
     this.keywordRef = KeywordRef;
     this.artifactRef = ArtifactRef;
 }
예제 #2
0
        private void AddKeywordsStems(string unparsed)
        {
            string[] chunks = unparsed.Split(new string[2] { "\r\n", "\n" }, StringSplitOptions.None);
            foreach (string chunk in chunks)
            {
                // add english keywords to the list first
                string longKeywordEnglish = ExtractLanguage(LANGUAGE_ENGLISH, chunk);
                string longKeywordFrench = ExtractLanguage(LANGUAGE_FRENCH, chunk);
                KeywordPair keywordPair = new KeywordPair(longKeywordEnglish, longKeywordFrench, this);
                if (longKeywordEnglish != "" && longKeywordFrench != "")
                {
                    mKeywords.Add(keywordPair);
                }

                // now get the stems for each keyword, using the english keywords since that is what the stemmer works for
                // split each english keyword into separate words
                string[] preStemWordList = longKeywordEnglish.Split(new string[4] { " ", "-", "&", "," }, StringSplitOptions.None);
                foreach (string word in preStemWordList)
                {
                    // and get the stem for each word
                    string stem = Stemmer.Stem(word.ToLower());
                    StemPair stemPair = new StemPair(stem, keywordPair, this);
                    if (stem != "" && Stopwords.Contains(stem) == false)
                    {
                        mStems.Add(stemPair);
                    }
                }
            }
        }