Пример #1
0
        public static ITree GetCopiedTree(ITree tree, int copyFactor, ref int largestTopicID)
        {
            if (copyFactor < 2)
            {
                throw new NotImplementedException();
            }

            var scheme  = TreeNodeScheme.Get(tree.Graph.NodeTable);
            var graph2  = new Graph();
            var scheme2 = new TreeNodeScheme(graph2, isContraintTree: scheme.GetIsContraintTree());

            //root
            var root2 = graph2.AddNode();

            //below
            var nodeBFSList = TreeUtils.GetBreathFirstTraversalList(tree.Root, node => tree.GetChildren(node));

            if (largestTopicID == -1)
            {
                largestTopicID = nodeBFSList.Max(node => scheme.GetTopicID(node));
            }
            var offset = 0;

            for (int iFactor = 0; iFactor < copyFactor; iFactor++)
            {
                foreach (var node in nodeBFSList)
                {
                    //Add node
                    var node2 = graph2.AddNode();
                    scheme2.SetTopicID(node2, scheme.GetTopicID(node) + offset);
                    scheme2.SetDocIndex(node2, scheme.GetDocIndex(node));
                    var words = scheme.GetTopicWords(node);
                    if (words != null)
                    {
                        scheme2.SetTopicWords(node2, new Dictionary <string, double>(words));
                    }
                    var interTreeLinks = scheme.GetInterTreeEdges(node);
                    if (interTreeLinks != null)
                    {
                        var interTreeLinks2 = new Dictionary <int, Tuple <double, double> >();
                        foreach (var kvp in interTreeLinks)
                        {
                            interTreeLinks2.Add(kvp.Key + offset, Tuple.Create(kvp.Value.Item1, kvp.Value.Item2));
                        }
                        scheme2.SetInterTreeEdges(node2, interTreeLinks2);
                    }

                    //Add edge
                    var   parent = tree.GetParent(node);
                    INode parent2;
                    if (parent == null)
                    {
                        parent2 = root2;
                    }
                    else
                    {
                        parent2 = scheme2.GetNodeByTopicID(scheme.GetTopicID(parent) + offset);
                    }
                    graph2.AddEdge(parent2, node2);
                }

                offset += largestTopicID + 100;
            }

            //set root
            scheme2.SetTopicID(root2, 999999);
            if (scheme2.GetIsContraintTree())
            {
                var interTreeLinks2 = new Dictionary <int, Tuple <double, double> >();
                interTreeLinks2.Add(999999, Tuple.Create(1.0, 1.0));
                scheme2.SetInterTreeEdges(root2, interTreeLinks2);
            }
            else
            {
                var words  = scheme.GetTopicWords(tree.Root);
                var words2 = new Dictionary <string, double>();
                foreach (var kvp in words)
                {
                    words2.Add(kvp.Key, kvp.Value * copyFactor);
                }
                scheme2.SetTopicWords(root2, words2);
            }

            return(graph2.GetSpanningTree(root2));
        }
Пример #2
0
        public ITree GetTree()
        {
            if (_tree == null)
            {
                var graph            = new Graph(true);
                var scheme           = new TreeNodeScheme(graph, _topicIDColumn, _topicWordsColumn, _isContraintTree);
                var isLoadTopicWords = _isLoadTopicWords;

                var nodeID2Row  = new Dictionary <int, int>();
                var gvEdges     = new List <Tuple <string, string> >();
                var gvID2nodeID = new Dictionary <string, int>();
                var gvID2DocID  = new Dictionary <string, int>();
                var allLines    = File.ReadAllLines(_filename);
                for (int i = 3; i < allLines.Length; i++)
                {
                    var line = allLines[i];
                    if (line == null || line.StartsWith("}"))
                    {
                        break;
                    }
                    var arrowIndex = line.IndexOf("->");
                    // edge
                    if (arrowIndex > 0)
                    {
                        var gvParent = line.Substring(0, arrowIndex);
                        var gvChild  = line.Substring(arrowIndex + 2);
                        gvEdges.Add(new Tuple <string, string>(gvParent, gvChild));
                    }
                    // node
                    else
                    {
                        var bracketIndex = line.IndexOf('[');
                        var gvID         = line.Substring(0, bracketIndex);

                        var quoteIndex1 = line.IndexOf('"');
                        var quoteIndex2 = line.IndexOf('"', quoteIndex1 + 1);
                        var content     = line.Substring(quoteIndex1 + 1, quoteIndex2 - quoteIndex1 - 1);
                        //Add node
                        var nodeID = int.Parse(content.Substring(1, content.IndexOf("-", 2) - 1));
                        var row    = graph.NodeTable.AddRow();
                        gvID2nodeID.Add(gvID, nodeID);
                        nodeID2Row.Add(nodeID, row);                    //var arrs = content.Split(new string[] { "\\n" }, StringSplitOptions.RemoveEmptyEntries);
                        //Trace.WriteLine(string.Format("{0}\t{1}", row, nodeID));
                        if (content.Contains("\\n\\n"))
                        {
                            // document node
                            gvID2DocID.Add(gvID, nodeID);
                        }
                    }
                }//gvID2nodeID, nodeID2Row, gvID2DocID, docID2Doc are ready

                allLines = File.ReadAllLines(_filename);
                for (int k = 3; k < allLines.Length; k++)
                {
                    var line = allLines[k];
                    if (line == null || line.StartsWith("}"))
                    {
                        break;
                    }
                    var index = line.IndexOf("->");
                    // edge
                    if (index > 0)
                    {
                        continue;
                    }
                    // node, get word dict
                    else
                    {
                        var bracketIndex = line.IndexOf('[');
                        //var fakeNodeID = line.Substring(0, bracketIndex);

                        var quoteIndex1 = line.IndexOf('"');
                        var quoteIndex2 = line.IndexOf('"', quoteIndex1 + 1);
                        var content     = line.Substring(quoteIndex1 + 1, quoteIndex2 - quoteIndex1 - 1);
                        var realNodeID  = content.Substring(1, content.IndexOf("-", 2) - 1);
                        var row         = nodeID2Row[int.Parse(realNodeID)];
                        var node        = graph.GetNode(row);
                        if (_isContraintTree)
                        {
                            var interEdges = GetInterTreeEdges(content);
                            scheme.SetInterTreeEdges(node, interEdges);
                        }
                        else
                        {
                            var words = isLoadTopicWords ? GetWordDict(content) : null;
                            scheme.SetTopicWords(node, words);
                        }
                        scheme.SetTopicID(node, int.Parse(realNodeID));
                        scheme.SetDocIndex(node, GetDocIndex(content));
                    }
                }

                //Tree edges
                foreach (var gvEdge in gvEdges)
                {
                    var pRow   = nodeID2Row[gvID2nodeID[gvEdge.Item1]];
                    int nodeID = -1;
                    if (gvID2nodeID.TryGetValue(gvEdge.Item2, out nodeID))
                    {
                        graph.AddEdge(pRow, nodeID2Row[nodeID]);
                    }
                }

                _tree   = graph.GetSpanningTree(0);
                _scheme = scheme;

                CalculateDocuments();

                if (_isRemoveDocuments)
                {
                    RemoveDocuments();
                }
            }
            return(_tree);
        }