Пример #1
0
        public static IEnumerable <Tuple <string, int> > GetAllStems(this TermStemmer stemmer, string document, Func <string, IEnumerable <string> > splitter)
        {
            string last  = null;
            int    count = 0;

            foreach (var s in getStemsInOrder(stemmer, splitter(getContent(document))))
            {
                if (last != s)
                {
                    if (last != null)
                    {
                        yield return(Tuple.Create(last, count));
                    }

                    last  = s;
                    count = 1;
                }
                else
                {
                    count++;
                }
            }

            if (last != null)
            {
                yield return(Tuple.Create(last, count));
            }
        }
Пример #2
0
 private static IEnumerable <string> getStemsInOrder(TermStemmer stemmer, IEnumerable <string> collection)
 {
     return(from e in collection
            let term = stemmer(e)
                       where term.Length > 0
                       orderby term
                       select term);
 }
Пример #3
0
        public Index(TermStemmer stemmer, ISimilarityComparer<Document> similarity)
        {
            this.stemmer = stemmer;
            this.similarity = similarity;

            stems = new Dictionary<string, List<DocumentReference>>();
            sites = new List<Document>();
        }
 private static IEnumerable<string> getStemsInOrder(TermStemmer stemmer, IEnumerable<string> collection)
 {
     return from e in collection
            let term = stemmer(e)
            where term.Length > 0
            orderby term
            select term;
 }
Пример #5
0
        public Index(TermStemmer stemmer, ISimilarityComparer <Document> similarity)
        {
            this.stemmer    = stemmer;
            this.similarity = similarity;

            stems = new Dictionary <string, List <DocumentReference> >();
            sites = new List <Document>();
        }
 public static TermStemmer GetStemmer(TermStemmer stemmer)
 {
     return s =>
         {
             s = s.ToLower();
             s = trimSymbols(s);
             s = stemmer(s);
             s = trimSymbols(s);
             return s.Length < 3 ? string.Empty : s;
         };
 }
Пример #7
0
 public static TermStemmer GetStemmer(TermStemmer stemmer)
 {
     return(s =>
     {
         s = s.ToLower();
         s = trimSymbols(s);
         s = stemmer(s);
         s = trimSymbols(s);
         return s.Length < 3 ? string.Empty : s;
     });
 }
Пример #8
0
        public Ranker(Index index, TermStemmer stemmer)
        {
            int docCount = index.SiteCount;

            foreach (var t in index.GetStems())
                tf_idf.Add(t.Key, getTF_IDF(t.Value, docCount));

            Document[] keys = lengths.Keys.ToArray();
            foreach (var d in keys)
                lengths[d] = Math.Sqrt(lengths[d]);

            this.index = index;
            this.stemmer = stemmer;
            //this.TF_WT = getTF_WT();
            //this.IDF_WT = getIDF_WT();
            //this.TF_IDF_WT = getTF_IDF_WT();
            //this.NORM_WT = getNORM_WT();
        }
Пример #9
0
        public Ranker(Index index, TermStemmer stemmer)
        {
            int docCount = index.SiteCount;

            foreach (var t in index.GetStems())
            {
                tf_idf.Add(t.Key, getTF_IDF(t.Value, docCount));
            }

            Document[] keys = lengths.Keys.ToArray();
            foreach (var d in keys)
            {
                lengths[d] = Math.Sqrt(lengths[d]);
            }

            this.index   = index;
            this.stemmer = stemmer;
            //this.TF_WT = getTF_WT();
            //this.IDF_WT = getIDF_WT();
            //this.TF_IDF_WT = getTF_IDF_WT();
            //this.NORM_WT = getNORM_WT();
        }
Пример #10
0
 public static IEnumerable <Tuple <string, int> > GetAllStems(this TermStemmer stemmer, string document)
 {
     return(GetAllStems(stemmer, document, x => x.Split(' ')));
 }