public static ITree GetCopiedTree(ITree tree, int copyFactor, ref int largestTopicID) { if (copyFactor < 2) { throw new NotImplementedException(); } var scheme = TreeNodeScheme.Get(tree.Graph.NodeTable); var graph2 = new Graph(); var scheme2 = new TreeNodeScheme(graph2, isContraintTree: scheme.GetIsContraintTree()); //root var root2 = graph2.AddNode(); //below var nodeBFSList = TreeUtils.GetBreathFirstTraversalList(tree.Root, node => tree.GetChildren(node)); if (largestTopicID == -1) { largestTopicID = nodeBFSList.Max(node => scheme.GetTopicID(node)); } var offset = 0; for (int iFactor = 0; iFactor < copyFactor; iFactor++) { foreach (var node in nodeBFSList) { //Add node var node2 = graph2.AddNode(); scheme2.SetTopicID(node2, scheme.GetTopicID(node) + offset); scheme2.SetDocIndex(node2, scheme.GetDocIndex(node)); var words = scheme.GetTopicWords(node); if (words != null) { scheme2.SetTopicWords(node2, new Dictionary <string, double>(words)); } var interTreeLinks = scheme.GetInterTreeEdges(node); if (interTreeLinks != null) { var interTreeLinks2 = new Dictionary <int, Tuple <double, double> >(); foreach (var kvp in interTreeLinks) { interTreeLinks2.Add(kvp.Key + offset, Tuple.Create(kvp.Value.Item1, kvp.Value.Item2)); } scheme2.SetInterTreeEdges(node2, interTreeLinks2); } //Add edge var parent = tree.GetParent(node); INode parent2; if (parent == null) { parent2 = root2; } else { parent2 = scheme2.GetNodeByTopicID(scheme.GetTopicID(parent) + offset); } graph2.AddEdge(parent2, node2); } offset += largestTopicID + 100; } //set root scheme2.SetTopicID(root2, 999999); if (scheme2.GetIsContraintTree()) { var interTreeLinks2 = new Dictionary <int, Tuple <double, double> >(); interTreeLinks2.Add(999999, Tuple.Create(1.0, 1.0)); scheme2.SetInterTreeEdges(root2, interTreeLinks2); } else { var words = scheme.GetTopicWords(tree.Root); var words2 = new Dictionary <string, double>(); foreach (var kvp in words) { words2.Add(kvp.Key, kvp.Value * copyFactor); } scheme2.SetTopicWords(root2, words2); } return(graph2.GetSpanningTree(root2)); }
public ITree GetTree() { if (_tree == null) { var graph = new Graph(true); var scheme = new TreeNodeScheme(graph, _topicIDColumn, _topicWordsColumn, _isContraintTree); var isLoadTopicWords = _isLoadTopicWords; var nodeID2Row = new Dictionary <int, int>(); var gvEdges = new List <Tuple <string, string> >(); var gvID2nodeID = new Dictionary <string, int>(); var gvID2DocID = new Dictionary <string, int>(); var allLines = File.ReadAllLines(_filename); for (int i = 3; i < allLines.Length; i++) { var line = allLines[i]; if (line == null || line.StartsWith("}")) { break; } var arrowIndex = line.IndexOf("->"); // edge if (arrowIndex > 0) { var gvParent = line.Substring(0, arrowIndex); var gvChild = line.Substring(arrowIndex + 2); gvEdges.Add(new Tuple <string, string>(gvParent, gvChild)); } // node else { var bracketIndex = line.IndexOf('['); var gvID = line.Substring(0, bracketIndex); var quoteIndex1 = line.IndexOf('"'); var quoteIndex2 = line.IndexOf('"', quoteIndex1 + 1); var content = line.Substring(quoteIndex1 + 1, quoteIndex2 - quoteIndex1 - 1); //Add node var nodeID = int.Parse(content.Substring(1, content.IndexOf("-", 2) - 1)); var row = graph.NodeTable.AddRow(); gvID2nodeID.Add(gvID, nodeID); nodeID2Row.Add(nodeID, row); //var arrs = content.Split(new string[] { "\\n" }, StringSplitOptions.RemoveEmptyEntries); //Trace.WriteLine(string.Format("{0}\t{1}", row, nodeID)); if (content.Contains("\\n\\n")) { // document node gvID2DocID.Add(gvID, nodeID); } } }//gvID2nodeID, nodeID2Row, gvID2DocID, docID2Doc are ready allLines = File.ReadAllLines(_filename); for (int k = 3; k < allLines.Length; k++) { var line = allLines[k]; if (line == null || line.StartsWith("}")) { break; } var index = line.IndexOf("->"); // edge if (index > 0) { continue; } // node, get word dict else { var bracketIndex = line.IndexOf('['); //var fakeNodeID = line.Substring(0, bracketIndex); var quoteIndex1 = line.IndexOf('"'); var quoteIndex2 = line.IndexOf('"', quoteIndex1 + 1); var content = line.Substring(quoteIndex1 + 1, quoteIndex2 - quoteIndex1 - 1); var realNodeID = content.Substring(1, content.IndexOf("-", 2) - 1); var row = nodeID2Row[int.Parse(realNodeID)]; var node = graph.GetNode(row); if (_isContraintTree) { var interEdges = GetInterTreeEdges(content); scheme.SetInterTreeEdges(node, interEdges); } else { var words = isLoadTopicWords ? GetWordDict(content) : null; scheme.SetTopicWords(node, words); } scheme.SetTopicID(node, int.Parse(realNodeID)); scheme.SetDocIndex(node, GetDocIndex(content)); } } //Tree edges foreach (var gvEdge in gvEdges) { var pRow = nodeID2Row[gvID2nodeID[gvEdge.Item1]]; int nodeID = -1; if (gvID2nodeID.TryGetValue(gvEdge.Item2, out nodeID)) { graph.AddEdge(pRow, nodeID2Row[nodeID]); } } _tree = graph.GetSpanningTree(0); _scheme = scheme; CalculateDocuments(); if (_isRemoveDocuments) { RemoveDocuments(); } } return(_tree); }