private void loadIndex_Click(object sender, EventArgs e) { this.setStatus("загрузка данных из интернета...", Color.Orange); List <Document> docs; docs = new Crawler().Index(); docs.ForEach(doc => { var count = DbConn.getInstance().insertIntoDb(doc); if (count < 1) { Console.WriteLine(doc.title); } }); docs = DbConn.getInstance().GetAllFromDoc(); HashSet <string> uniqueLemms = new HashSet <string>(); Dictionary <string, Word> lemmsObjs = new Dictionary <string, Word>(); docs.ForEach(doc => { DeepMorphy.Model.MorphInfo[] docLemms = Parser.getAllLemms(doc.text); foreach (var lemInfo in docLemms) { string lem = lemInfo.BestTag.Lemma != null ? lemInfo.BestTag.Lemma : lemInfo.Text; if (uniqueLemms.Add(lem)) { lemmsObjs.Add(lem, new Word(lem, doc.documentID)); } else { int value; if (lemmsObjs[lem].refs.TryGetValue(doc.documentID, out value)) // if word already mentioned in this doc { lemmsObjs[lem].refs[doc.documentID]++; } else { lemmsObjs[lem].mentionings++; lemmsObjs[lem].refs.Add(doc.documentID, 1); } } } }); foreach (var lOb in lemmsObjs) { lOb.Value.id = DbConn.getInstance().insertWord(lOb.Value); DbConn.getInstance().insertWordRefs(lOb.Value); } this.setStatus("загрузка завершена", Color.ForestGreen); }
private void Form1_Load(object sender, EventArgs e) { string cmd = ("SELECT title, link, text, datetime, documentid FROM ss.documents"); NpgsqlDataAdapter da = DbConn.getInstance().getNewDataAdapter(cmd); ds.Reset(); da.Fill(ds); dt = ds.Tables[0]; dt.ReduceRows(); dataGridView1.DataSource = dt.DefaultView; setOptionsDataGridView1(); HideIrrelevantRows(20); this.setStatus("Запущено", Color.ForestGreen); }
protected Dictionary <int, double> getSearchQueryVector() { Dictionary <int, double> result = new Dictionary <int, double>(); HashSet <string> uniqueWords = new HashSet <string>(); DeepMorphy.Model.MorphInfo[] searchInfo = Parser.getAllLemms(this.searchQuery); foreach (var lemmInfo in searchInfo) { uniqueWords.Add(lemmInfo.BestTag.Lemma); } List <Word> lemmas = DbConn.getInstance().getWordObjectsForLemms(uniqueWords.ToArray <string>()); foreach (var lem in lemmas) { result.Add(lem.id, 1.0d); } return(result); }
static bool getWordWeightInDocument(List <Word> docLemms, Word curLemm, int documentId, out double result) { try { double sqrtSumm = 0.0d; int amountOfDocuments = DbConn.getInstance().getAmountOfDocuments(); foreach (Word lem in docLemms) { sqrtSumm += Math.Pow(calcTDIDF(lem, documentId, amountOfDocuments), 2); } sqrtSumm = Math.Sqrt(sqrtSumm); result = calcTDIDF(curLemm, documentId, amountOfDocuments) / sqrtSumm; return(true); } catch { result = -1.0d; return(false); } }
private void search_Click(object sender, EventArgs e) { if (!Word.isCollectionUpToDate()) { this.setStatus("загрузка", Color.Orange); Word.setWordsCollection(DbConn.getInstance().getAllWords()); } this.setStatus("выполняется поиск", Color.Orange); int count = 0; Search searchQuery = new Search(textBox1.Text); List <Document> docs = DbConn.getInstance().GetAllFromDoc(); Dictionary <int, double> relevation = new Dictionary <int, double>(); double curRelevation; foreach (Document doc in docs) { curRelevation = searchQuery.scalarProduct(Document.getDocumentVector(doc.documentID)); relevation.Add(doc.documentID, curRelevation); if (curRelevation != 0) { count++; } } dataGridView1.DataSource = dt.ApplySort((r1, r2) => { var val1 = relevation[Int32.Parse(r1["documentid"].ToString())]; var val2 = relevation[Int32.Parse(r2["documentid"].ToString())]; return(val2.CompareTo(val1)); }); setOptionsDataGridView1(); HideIrrelevantRows(count); dataGridView1.Update(); dataGridView1.Refresh(); this.setStatus("поиск окончен", Color.ForestGreen); }
private void loadCollection_Click(object sender, EventArgs e) { this.setStatus("загрузка", Color.Orange); Word.setWordsCollection(DbConn.getInstance().getAllWords()); this.setStatus("загружено", Color.ForestGreen); }