public static IEnumerable <Tuple <string, int> > GetAllStems(this TermStemmer stemmer, string document, Func <string, IEnumerable <string> > splitter) { string last = null; int count = 0; foreach (var s in getStemsInOrder(stemmer, splitter(getContent(document)))) { if (last != s) { if (last != null) { yield return(Tuple.Create(last, count)); } last = s; count = 1; } else { count++; } } if (last != null) { yield return(Tuple.Create(last, count)); } }
private static IEnumerable <string> getStemsInOrder(TermStemmer stemmer, IEnumerable <string> collection) { return(from e in collection let term = stemmer(e) where term.Length > 0 orderby term select term); }
public Index(TermStemmer stemmer, ISimilarityComparer<Document> similarity) { this.stemmer = stemmer; this.similarity = similarity; stems = new Dictionary<string, List<DocumentReference>>(); sites = new List<Document>(); }
private static IEnumerable<string> getStemsInOrder(TermStemmer stemmer, IEnumerable<string> collection) { return from e in collection let term = stemmer(e) where term.Length > 0 orderby term select term; }
public Index(TermStemmer stemmer, ISimilarityComparer <Document> similarity) { this.stemmer = stemmer; this.similarity = similarity; stems = new Dictionary <string, List <DocumentReference> >(); sites = new List <Document>(); }
public static TermStemmer GetStemmer(TermStemmer stemmer) { return s => { s = s.ToLower(); s = trimSymbols(s); s = stemmer(s); s = trimSymbols(s); return s.Length < 3 ? string.Empty : s; }; }
public static TermStemmer GetStemmer(TermStemmer stemmer) { return(s => { s = s.ToLower(); s = trimSymbols(s); s = stemmer(s); s = trimSymbols(s); return s.Length < 3 ? string.Empty : s; }); }
public Ranker(Index index, TermStemmer stemmer) { int docCount = index.SiteCount; foreach (var t in index.GetStems()) tf_idf.Add(t.Key, getTF_IDF(t.Value, docCount)); Document[] keys = lengths.Keys.ToArray(); foreach (var d in keys) lengths[d] = Math.Sqrt(lengths[d]); this.index = index; this.stemmer = stemmer; //this.TF_WT = getTF_WT(); //this.IDF_WT = getIDF_WT(); //this.TF_IDF_WT = getTF_IDF_WT(); //this.NORM_WT = getNORM_WT(); }
public Ranker(Index index, TermStemmer stemmer) { int docCount = index.SiteCount; foreach (var t in index.GetStems()) { tf_idf.Add(t.Key, getTF_IDF(t.Value, docCount)); } Document[] keys = lengths.Keys.ToArray(); foreach (var d in keys) { lengths[d] = Math.Sqrt(lengths[d]); } this.index = index; this.stemmer = stemmer; //this.TF_WT = getTF_WT(); //this.IDF_WT = getIDF_WT(); //this.TF_IDF_WT = getTF_IDF_WT(); //this.NORM_WT = getNORM_WT(); }
public static IEnumerable <Tuple <string, int> > GetAllStems(this TermStemmer stemmer, string document) { return(GetAllStems(stemmer, document, x => x.Split(' '))); }