Esempio n. 1
0
        public void AddNodesTest()
        {
            IBaseTree tree = new BaseTree();

            Assert.IsNull(tree.Root);
            Assert.IsFalse(tree.Contains("all"));

            tree.AddNode(null, new Node("All"));

            Assert.IsNotNull(tree.Root);
            Assert.AreEqual("all", tree.Root.KeyWord);
            Assert.IsTrue(tree.Contains("all"));
            Assert.AreEqual("all", tree.GetNode("all").KeyWord);
        }
Esempio n. 2
0
        public IBaseTree BuildTreeGoodMatches()
        {
            //change the .1?
            var orderedByIDF = WordDocumentAppearances.Where(x => x.Value.IDF > .2).OrderBy(x => x.Value.IDF).ToList();
            //int b = 0;
            //var orderedByIDF = WordDocumentAppearances.Where(x => !Int32.TryParse(x.Key, out b)).OrderByDescending(x => x.Value.IDF).Take((int)(WordDocumentAppearances.Count * .7)).ToList();
            IBaseTree tree = new BaseTree();

            tree.AddWord((string)null, "the");
            orderedByIDF = orderedByIDF.OrderBy(x => x.Value.IDF).ToList();
            //Make the cutoff greater than 0, so not quite all words are added? - taken care of above?
            while (orderedByIDF.Count > 0)
            {
                //decide next word to add
                string word = orderedByIDF.First().Key;
                Console.WriteLine("Adding word '" + word + "' to tree");

                //find lowest branch in which all significant occurences of the word/document happen
                int  numDif = Int32.MaxValue;
                Node parent = null;
                foreach (Node n in tree)   //for every branch in the tree
                //Console.WriteLine("Checking branch");
                //first check to make sure it is completely contained
                {
                    bool contained = true;
                    foreach (var element in WordDocumentAppearances[word].Docs)
                    {
                        //TODO: Make this check to make sure appearances are significant
                        if (!WordDocumentAppearances[n.KeyWord].Docs.Contains(element) && PossibleContent[new KeyValuePair <string, Document>(word, new Document()
                        {
                            Name = element
                        })].Frequency > 1)
                        {
                            contained = false;
                            break;
                        }
                    }
                    //if it is,
                    if (contained)
                    {
                        //Console.WriteLine("Determining fit");
                        int numDifTemp = 0;
                        foreach (string s2 in WordDocumentAppearances[n.KeyWord].Docs)   //for each document the branch's word appears in
                        {
                            if ((!WordDocumentAppearances[word].Docs.Contains(s2) && PossibleContent[new KeyValuePair <string, Document>(n.KeyWord, new Document()
                            {
                                Name = s2
                            })].Frequency > 1))                                                                                                                                                    // if this word doesn't appear in a document which the other does
                            {
                                numDifTemp++;
                            }
                        }
                        //if the number of different branches is less than the last node's,
                        if (numDifTemp < numDif)
                        {
                            Console.WriteLine("New parent: " + n.KeyWord);
                            //make this the new parent of the new word
                            numDif = numDifTemp;
                            parent = n;
                        }
                    }
                }

                if (parent != null)
                {
                    Console.WriteLine("Adding " + word + " to tree");
                    //add to that branch
                    tree.AddNode(parent, new Node(word));
                }

                orderedByIDF.Remove(orderedByIDF.First());
                Console.WriteLine("Remaining words: " + orderedByIDF.Count);
            }

            return(tree);
        }