/// <summary> /// Processes an artifacts collection using the Snowball stemming algorithm. /// </summary> /// <param name="artifacts">Artifacts collection</param> /// <param name="langauge">Stemmer language</param> /// <returns>Stemmed artifacts</returns> public static TLArtifactsCollection ProcessArtifacts(TLArtifactsCollection artifacts, SnowballStemmerEnum langauge) { TLArtifactsCollection processed = new TLArtifactsCollection(); foreach (TLArtifact artifact in artifacts.Values) { TLArtifact processedArtifact = new TLArtifact(artifact.Id, String.Empty); processedArtifact.Text = ProcessText(artifact.Text, langauge); processed.Add(processedArtifact); } return processed; }
/// <summary> /// Returns a stemmer object that implements the Snowball stemmer algorithm in the given language. /// </summary> /// <param name="language">Language</param> /// <returns>Snowball stemmer</returns> public static ISnowballStemmer GetStemmer(SnowballStemmerEnum language) { switch (language) { case SnowballStemmerEnum.Czech: return new CzechStemmer(); case SnowballStemmerEnum.Danish: return new DanishStemmer(); case SnowballStemmerEnum.Dutch: return new DutchStemmer(); case SnowballStemmerEnum.Finnish: return new FinnishStemmer(); case SnowballStemmerEnum.French: return new FrenchStemmer(); case SnowballStemmerEnum.German: return new GermanStemmer(); case SnowballStemmerEnum.Hungarian: return new HungarianStemmer(); case SnowballStemmerEnum.Italian: return new ItalianStemmer(); case SnowballStemmerEnum.Norwegian: return new NorwegianStemmer(); case SnowballStemmerEnum.Portuguese: return new PortugalStemmer(); case SnowballStemmerEnum.Romanian: return new RomanianStemmer(); case SnowballStemmerEnum.Russian: return new RussianStemmer(); case SnowballStemmerEnum.Spanish: return new SpanishStemmer(); case SnowballStemmerEnum.English: case SnowballStemmerEnum.Default: default: return new EnglishStemmer(); } }
/// <summary> /// Returns a stemmer object that implements the Snowball stemmer algorithm in the given language. /// </summary> /// <param name="language">Language</param> /// <returns>Snowball stemmer</returns> public static ISnowballStemmer GetStemmer(SnowballStemmerEnum language) { switch (language) { case SnowballStemmerEnum.Czech: return(new CzechStemmer()); case SnowballStemmerEnum.Danish: return(new DanishStemmer()); case SnowballStemmerEnum.Dutch: return(new DutchStemmer()); case SnowballStemmerEnum.Finnish: return(new FinnishStemmer()); case SnowballStemmerEnum.French: return(new FrenchStemmer()); case SnowballStemmerEnum.German: return(new GermanStemmer()); case SnowballStemmerEnum.Hungarian: return(new HungarianStemmer()); case SnowballStemmerEnum.Italian: return(new ItalianStemmer()); case SnowballStemmerEnum.Norwegian: return(new NorwegianStemmer()); case SnowballStemmerEnum.Portuguese: return(new PortugalStemmer()); case SnowballStemmerEnum.Romanian: return(new RomanianStemmer()); case SnowballStemmerEnum.Russian: return(new RussianStemmer()); case SnowballStemmerEnum.Spanish: return(new SpanishStemmer()); case SnowballStemmerEnum.English: case SnowballStemmerEnum.Default: default: return(new EnglishStemmer()); } }
/// <summary> /// Processes a string of terms using the Snowball stemming algorithm. /// </summary> /// <param name="text">Input string</param> /// <param name="language">Stemmer language</param> /// <returns>Stemmed terms</returns> public static string ProcessText(string text, SnowballStemmerEnum language) { StringBuilder builder = new StringBuilder(); string result = string.Empty; string stemmedWord; char[] delimiterChars = { ' ' }; string[] tokens = text.Split(delimiterChars); ISnowballStemmer stemmer = SnowballStemmerUtils.GetStemmer(language); foreach (string token in tokens) { stemmedWord = stemmer.Stem(token); builder.AppendFormat("{0} ", stemmedWord); } result = builder.ToString().Trim(); return result; }
/// <summary> /// Processes a string of terms using the Snowball stemming algorithm. /// </summary> /// <param name="text">Input string</param> /// <param name="language">Stemmer language</param> /// <returns>Stemmed terms</returns> public static string ProcessText(string text, SnowballStemmerEnum language) { StringBuilder builder = new StringBuilder(); string result = string.Empty; string stemmedWord; char[] delimiterChars = { ' ' }; string[] tokens = text.Split(delimiterChars); ISnowballStemmer stemmer = SnowballStemmerUtils.GetStemmer(language); foreach (string token in tokens) { stemmedWord = stemmer.Stem(token); builder.AppendFormat("{0} ", stemmedWord); } result = builder.ToString().Trim(); return(result); }
/// <summary> /// Processes an artifacts collection using the Snowball stemming algorithm. /// </summary> /// <param name="artifacts">Artifacts collection</param> /// <param name="langauge">Stemmer language</param> /// <returns>Stemmed artifacts</returns> public static TLArtifactsCollection ProcessArtifacts(TLArtifactsCollection artifacts, SnowballStemmerEnum langauge) { TLArtifactsCollection processed = new TLArtifactsCollection(); foreach (TLArtifact artifact in artifacts.Values) { TLArtifact processedArtifact = new TLArtifact(artifact.Id, String.Empty); processedArtifact.Text = ProcessText(artifact.Text, langauge); processed.Add(processedArtifact); } return(processed); }