private void getMatrixButton_Click(object sender, RoutedEventArgs e) { DocCollection.calcMatrixForDisplay(); buildandBindTable(dataGrid1, DocCollection.termList, DocCollection.weightMatrix, "Filename\\Term"); dataGrid1.Visibility = Visibility.Visible; ProgressLabel.Content = "Default data set loaded."; }
private void clearColButton_Click(object sender, RoutedEventArgs e) { DocCollection.cleanCollection(); dataGrid1.Visibility = Visibility.Collapsed; docWriter.deleteAllFiles(filepathGen.Text); ProgressLabel.Content = "Data collection cleared"; }
/// <summary> /// takes in a query and a document and returns it's cosine similiarity /// </summary> /// <param name="query"></param> /// <param name="doc"></param> /// <returns></returns> static public double cosineWeight(Query query, Doc doc) { double term1 = 0; double bTerm2 = 0; double bTerm3 = 0; if (query == null || query.qTerms == null || query.qTerms.Count == 0) { return(0); } foreach (var entry in doc.dict()) { bTerm2 += Math.Pow((DocCollection.calcIDF(entry.Key) * entry.Value), 2);//document's weights squared } bTerm2 = Math.Sqrt(bTerm2); foreach (var queryTerm in query.qTerms) { if (doc.dict().ContainsKey(queryTerm.term)) { term1 += queryTerm.weight * //query's weights ((double)doc.dict()[queryTerm.term] * DocCollection.calcIDF(queryTerm.term)); //TFIDF weight for term in collection & doc } bTerm3 += Math.Pow(queryTerm.weight, 2); //query's weights squared } bTerm3 = Math.Sqrt(bTerm3); return(term1 / (bTerm2 * bTerm3));//cosine value }
private void openFile_Click(object sender, RoutedEventArgs e) { Value row; if (dataGrid.SelectedIndex > -1) { row = dataGrid.SelectedItem as Value; var fName = row.term; DocCollection.openDoc(fName); } }
private void removeDoc_Click(object sender, RoutedEventArgs e) { var str = testDocLoadtextBox.Text; if (DocCollection.findDoc(str) != null) { DocCollection.removeDoc(str); docLoadLabel.Content = "None"; docLoadTermsLabel.Content = "0"; dataGrid2.ItemsSource = null; dataGrid2.Items.Refresh(); } }
private void button_Click(object sender, RoutedEventArgs e) { Query newQ = new Query(); var name = testDocLoadtextBox.Text; if (DocCollection.findDoc(name) == null) { testCVLabel.Content = "Document not\nloaded for testing."; } else if (newQ.setQuery(testQTtextBox.Text, testQWtextBox.Text)) { testCVLabel.Content = Math.Round(Similarity.cosineWeight(newQ, DocCollection.findDoc(name)), 6); } }
private void loadDocTest_Click(object sender, RoutedEventArgs e) { var str = testDocLoadtextBox.Text; List <string> headers; List <string> freq; List <string> idf; List <string> weight; List <List <string> > freqCon; if (DocCollection.findDoc(str) != null) { headers = new List <string>(); freq = new List <string>(); freqCon = new List <List <string> >(); idf = new List <string>(); weight = new List <string>(); foreach (var term in DocCollection.findDoc(str).dict()) { headers.Add(term.Key); freq.Add(term.Value.ToString()); idf.Add(Math.Round(DocCollection.calcIDF(term.Key), 4).ToString()); weight.Add(Math.Round((DocCollection.calcIDF(term.Key) * term.Value), 4).ToString()); } freq.Insert(0, "Freq"); idf.Insert(0, "IDF"); weight.Insert(0, "Weight"); freqCon.Add(freq); freqCon.Add(idf); freqCon.Add(weight); buildandBindTable(dataGrid2, headers, freqCon, "Term"); docLoadLabel.Content = DocCollection.findDoc(str).getFullName(); docLoadTermsLabel.Content = DocCollection.findDoc(str).dict().Count; } }
/// <summary> /// reads in the document and passes each word to the stemmer /// </summary> /// <param name="fileName"></param> public static void parseDoc(string fileName) { if (DocCollection.containsDoc(fileName)) { return; } Doc currentDoc = new Doc(fileName); string line; try { using (TextReader input = File.OpenText(fileName)) { while ((line = input.ReadLine()) != null) { string[] wordsLine = line.Split(new char[0], StringSplitOptions.RemoveEmptyEntries); foreach (string str in wordsLine) { if (!stopWords.Contains(str.ToLower())) { string temp = str.ToLower(); temp = Stemmer.stem(temp); currentDoc.addTerms(temp); } } } ((MainWindow)System.Windows.Application.Current.MainWindow).ProgressLabel.Content = "Read " + currentDoc.getName(); DocCollection.addDoc(currentDoc); } } catch (Exception ex) { System.Windows.Forms.MessageBox.Show("!!!File not read error: " + ex.ToString()); } }
private void loadDefCollectionButton_Click(object sender, RoutedEventArgs e) { DocCollection.cleanCollection(); DocCollection.setupDocCollection(); }