Пример #1
0
 private void getMatrixButton_Click(object sender, RoutedEventArgs e)
 {
     DocCollection.calcMatrixForDisplay();
     buildandBindTable(dataGrid1, DocCollection.termList, DocCollection.weightMatrix, "Filename\\Term");
     dataGrid1.Visibility  = Visibility.Visible;
     ProgressLabel.Content = "Default data set loaded.";
 }
Пример #2
0
 private void clearColButton_Click(object sender, RoutedEventArgs e)
 {
     DocCollection.cleanCollection();
     dataGrid1.Visibility = Visibility.Collapsed;
     docWriter.deleteAllFiles(filepathGen.Text);
     ProgressLabel.Content = "Data collection cleared";
 }
Пример #3
0
        /// <summary>
        /// takes in a query and a document and returns it's cosine similiarity
        /// </summary>
        /// <param name="query"></param>
        /// <param name="doc"></param>
        /// <returns></returns>
        static public double cosineWeight(Query query, Doc doc)
        {
            double term1  = 0;
            double bTerm2 = 0;
            double bTerm3 = 0;

            if (query == null || query.qTerms == null || query.qTerms.Count == 0)
            {
                return(0);
            }

            foreach (var entry in doc.dict())
            {
                bTerm2 += Math.Pow((DocCollection.calcIDF(entry.Key) * entry.Value), 2);//document's weights squared
            }
            bTerm2 = Math.Sqrt(bTerm2);

            foreach (var queryTerm in query.qTerms)
            {
                if (doc.dict().ContainsKey(queryTerm.term))
                {
                    term1 += queryTerm.weight *                                                            //query's weights
                             ((double)doc.dict()[queryTerm.term] * DocCollection.calcIDF(queryTerm.term)); //TFIDF weight for term in collection & doc
                }
                bTerm3 += Math.Pow(queryTerm.weight, 2);                                                   //query's weights squared
            }
            bTerm3 = Math.Sqrt(bTerm3);
            return(term1 / (bTerm2 * bTerm3));//cosine value
        }
Пример #4
0
        private void openFile_Click(object sender, RoutedEventArgs e)
        {
            Value row;

            if (dataGrid.SelectedIndex > -1)
            {
                row = dataGrid.SelectedItem as Value;
                var fName = row.term;
                DocCollection.openDoc(fName);
            }
        }
Пример #5
0
        private void removeDoc_Click(object sender, RoutedEventArgs e)
        {
            var str = testDocLoadtextBox.Text;

            if (DocCollection.findDoc(str) != null)
            {
                DocCollection.removeDoc(str);
                docLoadLabel.Content      = "None";
                docLoadTermsLabel.Content = "0";
                dataGrid2.ItemsSource     = null;
                dataGrid2.Items.Refresh();
            }
        }
Пример #6
0
        private void button_Click(object sender, RoutedEventArgs e)
        {
            Query newQ = new Query();
            var   name = testDocLoadtextBox.Text;

            if (DocCollection.findDoc(name) == null)
            {
                testCVLabel.Content = "Document not\nloaded for testing.";
            }
            else if (newQ.setQuery(testQTtextBox.Text, testQWtextBox.Text))
            {
                testCVLabel.Content = Math.Round(Similarity.cosineWeight(newQ, DocCollection.findDoc(name)), 6);
            }
        }
Пример #7
0
        private void loadDocTest_Click(object sender, RoutedEventArgs e)
        {
            var           str = testDocLoadtextBox.Text;
            List <string> headers;
            List <string> freq;
            List <string> idf;
            List <string> weight;

            List <List <string> > freqCon;

            if (DocCollection.findDoc(str) != null)
            {
                headers = new List <string>();
                freq    = new List <string>();
                freqCon = new List <List <string> >();
                idf     = new List <string>();
                weight  = new List <string>();

                foreach (var term in DocCollection.findDoc(str).dict())
                {
                    headers.Add(term.Key);
                    freq.Add(term.Value.ToString());
                    idf.Add(Math.Round(DocCollection.calcIDF(term.Key), 4).ToString());
                    weight.Add(Math.Round((DocCollection.calcIDF(term.Key) * term.Value), 4).ToString());
                }

                freq.Insert(0, "Freq");
                idf.Insert(0, "IDF");
                weight.Insert(0, "Weight");

                freqCon.Add(freq);
                freqCon.Add(idf);
                freqCon.Add(weight);

                buildandBindTable(dataGrid2, headers, freqCon, "Term");
                docLoadLabel.Content      = DocCollection.findDoc(str).getFullName();
                docLoadTermsLabel.Content = DocCollection.findDoc(str).dict().Count;
            }
        }
Пример #8
0
        /// <summary>
        /// reads in the document and passes each word to the stemmer
        /// </summary>
        /// <param name="fileName"></param>
        public static void parseDoc(string fileName)
        {
            if (DocCollection.containsDoc(fileName))
            {
                return;
            }

            Doc currentDoc = new Doc(fileName);

            string line;

            try
            {
                using (TextReader input = File.OpenText(fileName))
                {
                    while ((line = input.ReadLine()) != null)
                    {
                        string[] wordsLine = line.Split(new char[0], StringSplitOptions.RemoveEmptyEntries);
                        foreach (string str in wordsLine)
                        {
                            if (!stopWords.Contains(str.ToLower()))
                            {
                                string temp = str.ToLower();
                                temp = Stemmer.stem(temp);
                                currentDoc.addTerms(temp);
                            }
                        }
                    }
                    ((MainWindow)System.Windows.Application.Current.MainWindow).ProgressLabel.Content = "Read " + currentDoc.getName();
                    DocCollection.addDoc(currentDoc);
                }
            }
            catch (Exception ex)
            {
                System.Windows.Forms.MessageBox.Show("!!!File not read error: " + ex.ToString());
            }
        }
Пример #9
0
 private void loadDefCollectionButton_Click(object sender, RoutedEventArgs e)
 {
     DocCollection.cleanCollection();
     DocCollection.setupDocCollection();
 }