Пример #1
0
        public void Calculate(string query)
        {
            int j = 0;
            query = EraseChar(query);
            String[] queryTerms = query.Split(new char[] { ' ' });
            for (j = 0; j < queryTerms.Length; j++)
            {
                Stemmer s = new Stemmer();
                s.add(queryTerms[j].ToCharArray(), queryTerms[j].Length);
                s.stem();
                //sprawdzanie czy jest to w zbiorze jesli tak to +1  //+++
                if (this.keywords.Keys.Contains(s.ToString()))
                {
                    this.keywords[s.ToString()] += 1;
                }
                else { } //+++
            }
            foreach (String keyword in keywords.Keys.ToList())
            {
                keywords[keyword] = rocchio(keyword);
            }

            foreach(KeyValuePair<String, double> keyword in keywords)
            {

                //wykorzstanei result jako bufo
                foreach (Document docu in documents)
                {
                    //+++ czasem nie zawieralo
                    if (docu.terms.ContainsKey(keyword.Key)
                        //+++ dzielenie przez zero
                        && docu.terms.Values.Max() > 0 && keywords.Values.Max() > 0 && documents.Where(x => x.terms.ContainsKey(keyword.Key)).Count() > 0)
                    {

                        docu.result += docu.terms[keyword.Key] / (double) docu.terms.Values.Max()
                            * Math.Log10(documents.Count / (double) documents.Where(x => x.terms.ContainsKey(keyword.Key)).Count())
                            * keyword.Value / (double) keywords.Values.Max()
                            * Math.Log10(documents.Count / (double) documents.Where(x => x.terms.ContainsKey(keyword.Key)).Count());
                    }

                }

            }
            foreach (Document docu in documents)
            {

                foreach (KeyValuePair<String, double> keyword in keywords)
                {
                    //+++ czasem nie zawieralo
                    if (docu.terms.ContainsKey(keyword.Key)
                        //+++ dzielenie przez zero
                        && docu.terms.Values.Max() > 0 && documents.Where(x => x.terms.ContainsKey(keyword.Key)).Count() > 0)
                    {
                        docu.length += Math.Pow(docu.terms[keyword.Key] / (double) docu.terms.Values.Max()
                            * Math.Log10(documents.Count / (double) documents.Where(x => x.terms.ContainsKey(keyword.Key)).Count()), 2);
                    }
                }

                docu.length = Math.Sqrt(docu.length);
            }
            double len=0;
            foreach (KeyValuePair<String, double> keyword in keywords)
            {
                //+++
                if (keywords.Values.Max() > 0 && documents.Where(x => x.terms.ContainsKey(keyword.Key)).Count() > 0)
                {
                    len += Math.Pow(keyword.Value / (double) keywords.Values.Max()
                      * Math.Log10(documents.Count / (double) documents.Where(x => x.terms.ContainsKey(keyword.Key)).Count()), 2);
                }
            }
            len = Math.Sqrt(len);
            foreach (Document docu in documents)
            {
                if (docu.length > 0 && len > 0)
                    docu.result = docu.result / (double) (docu.length * len);
            }
        }
Пример #2
0
        public void UpdateData(string documentsFileName, string keywordsFileName)
        {
            bool first = true;

            if (keywordsFileName.Length > 0)
            {
                this.keywords.Clear();
                //TO DO jeśli koneiczna będzie walidacja poprawności struktury plików to trzeba dopisać, bo narazie zakładamy poprawnosc domyslnie
                foreach (String part in File.ReadAllLines(keywordsFileName))
                {
                    if (part.Length > 0)
                    {
                        Stemmer s = new Stemmer();
                        s.add(part.ToCharArray(), part.Length);
                        s.stem();
                        if (!keywords.Keys.Contains(s.ToString()))
                        {
                            this.keywords.Add(s.ToString(), 0);
                        }
                        else //+++
                        {
                        }
                    }

                }
            }
            if (documentsFileName.Length > 0)
            {
                this.documents.Clear();
                foreach (String part in File.ReadAllLines(documentsFileName))
                {

                    if (part.Length == 0)
                    {
                        first = true;
                    }
                    else if (first)
                    {
                        this.documents.Add(new Document(part));
                        first = false;
                    }
                    String part2 = EraseChar(part);
                    String[] terms = part2.Split(new char[] { ' ' });
                    for (int i = 0; i < terms.Length; i++)
                    {
                        Stemmer stem = new Stemmer();
                        stem.add(terms[i].ToCharArray(), terms[i].Length);
                        stem.stem();
                        if (keywords.ContainsKey(stem.ToString()))
                        {
                            this.documents.Last().AddTerm(stem.ToString());
                        }
                    }

                }
            }
        }