Exemplo n.º 1
0
        private void loadIndex_Click(object sender, EventArgs e)
        {
            this.setStatus("загрузка данных из интернета...", Color.Orange);
            List <Document> docs;

            docs = new Crawler().Index();
            docs.ForEach(doc =>
            {
                var count = DbConn.getInstance().insertIntoDb(doc);
                if (count < 1)
                {
                    Console.WriteLine(doc.title);
                }
            });

            docs = DbConn.getInstance().GetAllFromDoc();
            HashSet <string>          uniqueLemms = new HashSet <string>();
            Dictionary <string, Word> lemmsObjs   = new Dictionary <string, Word>();

            docs.ForEach(doc =>
            {
                DeepMorphy.Model.MorphInfo[] docLemms = Parser.getAllLemms(doc.text);

                foreach (var lemInfo in docLemms)
                {
                    string lem = lemInfo.BestTag.Lemma != null ? lemInfo.BestTag.Lemma : lemInfo.Text;
                    if (uniqueLemms.Add(lem))
                    {
                        lemmsObjs.Add(lem, new Word(lem, doc.documentID));
                    }
                    else
                    {
                        int value;
                        if (lemmsObjs[lem].refs.TryGetValue(doc.documentID, out value)) // if word already mentioned in this doc
                        {
                            lemmsObjs[lem].refs[doc.documentID]++;
                        }
                        else
                        {
                            lemmsObjs[lem].mentionings++;
                            lemmsObjs[lem].refs.Add(doc.documentID, 1);
                        }
                    }
                }
            });

            foreach (var lOb in lemmsObjs)
            {
                lOb.Value.id = DbConn.getInstance().insertWord(lOb.Value);
                DbConn.getInstance().insertWordRefs(lOb.Value);
            }
            this.setStatus("загрузка завершена", Color.ForestGreen);
        }
Exemplo n.º 2
0
        private void Form1_Load(object sender, EventArgs e)
        {
            string            cmd = ("SELECT title, link, text, datetime, documentid FROM ss.documents");
            NpgsqlDataAdapter da  = DbConn.getInstance().getNewDataAdapter(cmd);

            ds.Reset();
            da.Fill(ds);
            dt = ds.Tables[0];
            dt.ReduceRows();

            dataGridView1.DataSource = dt.DefaultView;
            setOptionsDataGridView1();
            HideIrrelevantRows(20);
            this.setStatus("Запущено", Color.ForestGreen);
        }
Exemplo n.º 3
0
        protected Dictionary <int, double> getSearchQueryVector()
        {
            Dictionary <int, double> result      = new Dictionary <int, double>();
            HashSet <string>         uniqueWords = new HashSet <string>();

            DeepMorphy.Model.MorphInfo[] searchInfo = Parser.getAllLemms(this.searchQuery);
            foreach (var lemmInfo in searchInfo)
            {
                uniqueWords.Add(lemmInfo.BestTag.Lemma);
            }

            List <Word> lemmas = DbConn.getInstance().getWordObjectsForLemms(uniqueWords.ToArray <string>());

            foreach (var lem in lemmas)
            {
                result.Add(lem.id, 1.0d);
            }

            return(result);
        }
Exemplo n.º 4
0
        static bool getWordWeightInDocument(List <Word> docLemms, Word curLemm, int documentId, out double result)
        {
            try
            {
                double sqrtSumm          = 0.0d;
                int    amountOfDocuments = DbConn.getInstance().getAmountOfDocuments();

                foreach (Word lem in docLemms)
                {
                    sqrtSumm += Math.Pow(calcTDIDF(lem, documentId, amountOfDocuments), 2);
                }
                sqrtSumm = Math.Sqrt(sqrtSumm);

                result = calcTDIDF(curLemm, documentId, amountOfDocuments) / sqrtSumm;
                return(true);
            }
            catch
            {
                result = -1.0d;
                return(false);
            }
        }
Exemplo n.º 5
0
        private void search_Click(object sender, EventArgs e)
        {
            if (!Word.isCollectionUpToDate())
            {
                this.setStatus("загрузка", Color.Orange);
                Word.setWordsCollection(DbConn.getInstance().getAllWords());
            }

            this.setStatus("выполняется поиск", Color.Orange);
            int                      count       = 0;
            Search                   searchQuery = new Search(textBox1.Text);
            List <Document>          docs        = DbConn.getInstance().GetAllFromDoc();
            Dictionary <int, double> relevation  = new Dictionary <int, double>();

            double curRelevation;

            foreach (Document doc in docs)
            {
                curRelevation = searchQuery.scalarProduct(Document.getDocumentVector(doc.documentID));
                relevation.Add(doc.documentID, curRelevation);
                if (curRelevation != 0)
                {
                    count++;
                }
            }

            dataGridView1.DataSource = dt.ApplySort((r1, r2) =>
            {
                var val1 = relevation[Int32.Parse(r1["documentid"].ToString())];
                var val2 = relevation[Int32.Parse(r2["documentid"].ToString())];
                return(val2.CompareTo(val1));
            });

            setOptionsDataGridView1();
            HideIrrelevantRows(count);
            dataGridView1.Update();
            dataGridView1.Refresh();
            this.setStatus("поиск окончен", Color.ForestGreen);
        }
Exemplo n.º 6
0
 private void loadCollection_Click(object sender, EventArgs e)
 {
     this.setStatus("загрузка", Color.Orange);
     Word.setWordsCollection(DbConn.getInstance().getAllWords());
     this.setStatus("загружено", Color.ForestGreen);
 }