public void Spanish_BaseTest() { var stemmer = new SpanishStemmer(); Assert.AreEqual("acerqu", stemmer.Stem("acerquen")); Assert.AreEqual("Vision", stemmer.Stem("Visionado")); Assert.AreEqual("algu", stemmer.Stem("algue")); }
private TokenStream SpanishSteammer(TokenStream tokenStream) { //Obtener en una cadena cada token y aplicar el lematizador a cada término string term = string.Empty; IStemmer stemmer = new SpanishStemmer(); TokenStream tokenStreamtemp; var termAttr = tokenStream.GetAttribute <ITermAttribute>(); while (tokenStream.IncrementToken()) { term = term + " " + stemmer.Stem(termAttr.Term); } tokenStreamtemp = new StandardTokenizer(Version.LUCENE_30, new System.IO.StringReader(term)); return(tokenStreamtemp); // }
public TokenStream SpanishSteammer(TokenStream tokenStream) { //Obtener en una cadena cada token y aplicar el lematizador a cada término string term = string.Empty; IStemmer stemmer = new SpanishStemmer(); TokenStream tokenStreamtemp; //var termAttr = tokenStream.GetAttribute<ITermAttribute>(); string cadena = ""; string[] token; Lucene.Net.Analysis.Token current; while ((current = tokenStream.Next()) != null) { cadena = current.ToString(); token = cadena.Split(','); cadena = cadena.Substring(1, token[0].Length - 1); term = term + " " + stemmer.Stem(cadena); } tokenStreamtemp = new StandardTokenizer(new System.IO.StringReader(term)); return(tokenStreamtemp); // }