예제 #1
0
        public string retrieveTermInfo(string keyword)
        {
            string info = "";

            keyword = keyword.ToLower();
            //  if (keyword.Length > 3)
            //    keyword = porter.checkEnd(keyword.ToCharArray());

            info += "\nTERM:  " + keyword;
            info += "\nTerm Frequency: " + termDict[keyword].frequency;
            Stem stem  = new Stem();
            int  count = 0;

            if (isStem.Equals("yes"))
            {
                keyword = stem.step1(keyword);
            }

            foreach (KeyValuePair <string, Term> entry in termDict)
            {
                string word = entry.Key;

                if (isStem.Equals("yes"))
                {
                    word = stem.step1(entry.Key);
                }

                if (word.Equals(keyword))
                {
                    foreach (DocumentNode list in entry.Value.getDocuments())
                    {
                        count++;
                        info += "\n[DocID: " + list.documentID + " Line #: " + list.lineNum + " Word Position: " + list.wordNum + "]";
                        info += "\nDoc [DocID: " + list.documentID + "] line: " + list.line + "\n\n";
                    }
                }
            }


            return(info + "\nFreq:" + count);
        }
예제 #2
0
        public void outputDocumentTable2(string line)
        {
            var words = line.Split(' ');


            System.Console.WriteLine("Retrieving Doc....");
            Stem stem = new Stem();


            int count = 0;

            foreach (string word in words)
            {
                //query.Add(word, 0);
                string w = Regex.Replace(word, @"[^\w]", string.Empty);

                count++;
                if (isStem.Equals("yes"))
                {
                    w = stem.step1(w);
                }

                if (isStem.Equals("yes"))
                {
                    foreach (KeyValuePair <string, Term> entry in termDict)
                    {
                        if (isStopWord.Equals("no") && checkStopWord(entry.Key))
                        {
                        }

                        else if (isStopWord.Equals("yes") && checkStopWord(entry.Key))
                        {
                            if (entry.Key.Equals(word) || stem.step1(entry.Key).Equals(w))
                            {
                                if (termDict.ContainsKey(entry.Key) == true)
                                {
                                    if (query.ContainsKey(entry.Key) == false)
                                    {
                                        query.Add(entry.Key, 1);
                                    }
                                    else
                                    {
                                        query[entry.Key] = query[entry.Key] + 1;
                                    }
                                }
                                System.Console.WriteLine("**{0} => {1}", entry.Key, w);
                                System.Console.WriteLine("{0} => {1}", stem.step1(entry.Key), w);
                                relevantDocs.Add(count.ToString(), retrieveDocs(entry.Key));
                                count++;
                            }
                        }

                        else
                        {
                            if (entry.Key.Equals(word) || stem.step1(entry.Key).Equals(w))
                            {
                                if (termDict.ContainsKey(entry.Key) == true)
                                {
                                    if (query.ContainsKey(entry.Key) == false)
                                    {
                                        query.Add(entry.Key, 1);
                                    }
                                    else
                                    {
                                        query[entry.Key] = query[entry.Key] + 1;
                                    }

                                    System.Console.WriteLine("**{0} => {1}", entry.Key, w);
                                    System.Console.WriteLine("{0} => {1}", stem.step1(entry.Key), w);
                                    relevantDocs.Add(count.ToString(), retrieveDocs(entry.Key));
                                    count++;
                                }
                            }
                        }
                    }
                }


                if (isStem.Equals("no") && termDict.ContainsKey(w))

                {
                    if (isStopWord.Equals("yes") && checkStopWord(w) && termDict.ContainsKey(w))
                    {
                        relevantDocs.Add(count.ToString(), retrieveDocs(w));
                    }

                    else if (isStopWord.Equals("no") && checkStopWord(w))
                    {
                    }
                    else
                    {
                        relevantDocs.Add(count.ToString(), retrieveDocs(w));
                    }
                }
            }

            //AND all the relevant docs together to one>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
            foreach (KeyValuePair <string, SortedDictionary <string, DocumentNode> > entry in relevantDocs)
            {
                foreach (KeyValuePair <string, DocumentNode> doc in entry.Value)
                {
                    if (relevantDocsList.ContainsKey(doc.Value.documentID) == false)
                    {
                        relevantDocsList.Add(doc.Value.documentID, doc.Value);
                    }
                }
            }
            //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

            //calculate weights for each document in the relevant list.
            foreach (KeyValuePair <string, DocumentNode> entry in relevantDocsList)
            {
                calcWeights(entry.Key);
                calcQWeights(entry.Key);
            }

            //calculate n-weights for each document in the relevant list.
            calcNWeights();
            calcQNWeights();


            foreach (KeyValuePair <string, DocumentNode> entry in relevantDocsList)
            {
                System.Console.WriteLine("----------------------{0}----------------------------", entry.Key);
                System.Console.WriteLine("PASSS 1");
                displayDocValues(entry.Key);
                System.Console.WriteLine("");
            }

            calcSim();
            displaySim();
            table();
        }
예제 #3
0
        public void parseQuery(string line)
        {
            var words = line.Split(' ');

            clearAll();
            Stem stem = new Stem();

            //calculate term frequency of query.
            foreach (string word in words)
            {
                System.Console.WriteLine("PASSS 1");
                string w = Regex.Replace(word, @"[^\w]", string.Empty);

                if (isStem.Equals("yes"))
                {
                    w = stem.step1(w);
                }

                if (isStopWord.Equals("yes") && checkStopWord(w))
                {
                    if (termDict.ContainsKey(w) == true)
                    {
                        if (query.ContainsKey(w) == false)
                        {
                            query.Add(w, 1);
                        }
                        else
                        {
                            query[w] = query[w] + 1;
                        }
                    }
                }

                else if (isStem.Equals("no") && isStopWord.Equals("no"))
                {
                    if (termDict.ContainsKey(w) == true)
                    {
                        if (query.ContainsKey(w) == false)
                        {
                            query.Add(w, 1);
                        }
                        else
                        {
                            query[w] = query[w] + 1;
                        }
                    }
                }

                else if (isStopWord.Equals("no") && checkStopWord(w))
                {
                    if (termDict.ContainsKey(w) == true)
                    {
                        if (query.ContainsKey(w) == false)
                        {
                            query.Add(w, 1);
                        }
                        else
                        {
                            query[w] = query[w] + 1;
                        }
                    }
                }

                else
                {
                    if (termDict.ContainsKey(w) == true)
                    {
                        if (query.ContainsKey(w) == false)
                        {
                            query.Add(w, 1);
                        }
                        else
                        {
                            query[w] = query[w] + 1;
                        }
                    }
                }
            }

            System.Console.WriteLine("PASSS XXX");
            foreach (KeyValuePair <string, double> entry in query)
            {
                System.Console.WriteLine(">>>>>>>>{0} {1}", entry.Key, entry.Value);
            }


            outputDocumentTable2(line);
        }