public void AddNodesTest() { IBaseTree tree = new BaseTree(); Assert.IsNull(tree.Root); Assert.IsFalse(tree.Contains("all")); tree.AddNode(null, new Node("All")); Assert.IsNotNull(tree.Root); Assert.AreEqual("all", tree.Root.KeyWord); Assert.IsTrue(tree.Contains("all")); Assert.AreEqual("all", tree.GetNode("all").KeyWord); }
public IBaseTree BuildTreeGoodMatches() { //change the .1? var orderedByIDF = WordDocumentAppearances.Where(x => x.Value.IDF > .2).OrderBy(x => x.Value.IDF).ToList(); //int b = 0; //var orderedByIDF = WordDocumentAppearances.Where(x => !Int32.TryParse(x.Key, out b)).OrderByDescending(x => x.Value.IDF).Take((int)(WordDocumentAppearances.Count * .7)).ToList(); IBaseTree tree = new BaseTree(); tree.AddWord((string)null, "the"); orderedByIDF = orderedByIDF.OrderBy(x => x.Value.IDF).ToList(); //Make the cutoff greater than 0, so not quite all words are added? - taken care of above? while (orderedByIDF.Count > 0) { //decide next word to add string word = orderedByIDF.First().Key; Console.WriteLine("Adding word '" + word + "' to tree"); //find lowest branch in which all significant occurences of the word/document happen int numDif = Int32.MaxValue; Node parent = null; foreach (Node n in tree) //for every branch in the tree //Console.WriteLine("Checking branch"); //first check to make sure it is completely contained { bool contained = true; foreach (var element in WordDocumentAppearances[word].Docs) { //TODO: Make this check to make sure appearances are significant if (!WordDocumentAppearances[n.KeyWord].Docs.Contains(element) && PossibleContent[new KeyValuePair <string, Document>(word, new Document() { Name = element })].Frequency > 1) { contained = false; break; } } //if it is, if (contained) { //Console.WriteLine("Determining fit"); int numDifTemp = 0; foreach (string s2 in WordDocumentAppearances[n.KeyWord].Docs) //for each document the branch's word appears in { if ((!WordDocumentAppearances[word].Docs.Contains(s2) && PossibleContent[new KeyValuePair <string, Document>(n.KeyWord, new Document() { Name = s2 })].Frequency > 1)) // if this word doesn't appear in a document which the other does { numDifTemp++; } } //if the number of different branches is less than the last node's, if (numDifTemp < numDif) { Console.WriteLine("New parent: " + n.KeyWord); //make this the new parent of the new word numDif = numDifTemp; parent = n; } } } if (parent != null) { Console.WriteLine("Adding " + word + " to tree"); //add to that branch tree.AddNode(parent, new Node(word)); } orderedByIDF.Remove(orderedByIDF.First()); Console.WriteLine("Remaining words: " + orderedByIDF.Count); } return(tree); }