public void AddFiles(string[] input, VectorTree tree) { ClearFiles(input, tree); foreach (var file in Directory.GetFiles(input[1], input[2])) { var n = AddFile(file, tree); Console.Write("{0} {1} ", file, n); } Console.WriteLine(tree.Visualize(0, 0)); var size = tree.Size(0, 0); Console.WriteLine(); Console.WriteLine("depth {0} width {1} count: {2}, merges: {3}", size.depth, size.width, tree.Count, tree.MergeCount); using (var treeStream = File.Create("tree.bin")) using (var wordStream = File.Create("word.bin")) using (var posStream = File.Create("pos.bin")) { tree.GetNode(0, 0).Serialize(treeStream, wordStream, posStream); } //using (var treeStream = File.OpenRead("tree.bin")) //using (var wordStream = File.OpenRead("word.bin")) //{ // var deserialized = VectorTree.Load(treeStream, wordStream); // Console.WriteLine(deserialized.Visualize()); // var deserializedSize = deserialized.Size(); // Console.WriteLine("depth {0} width {1}", deserializedSize.depth, deserializedSize.width); //} }
private int AddDocument(string documentId, string[] input, VectorTree tree) { var docId = Hash(documentId); var dic = new SortedList <double, string>(); var count = tree.Count; foreach (var word in input) { if (string.IsNullOrWhiteSpace(word)) { continue; } tree.GetNode(0, 0).Add(word, docId); var wordvec = new VectorNode(word).TermVector; if (wordvec.CosAngle(tree.Find(0, 0, word).TermVector) < VectorNode.IdenticalAngle) { throw new Exception("error"); } //Console.Write("{0} [", word); //foreach (var c in word.Components()) //{ // Console.Write("{0}:{1}, ", c.Key, c.Value); //} //Console.WriteLine("]"); } return(tree.Count - count); }