/// <summary> /// Simplify the graph by removing the cycles and smoothing techniques. On the way of removing the cycle, if any nodes /// disappear from the graph, the edge color mapping should be return back /// </summary> /// <param name="cycleLenghtThreshold">The maximum cycle length that can be re-route</param> /// <param name="smoothingThreshold">the maximum length of the simple path that can be splitted</param> /// <param name="shouldSmooth">If we should smooth the graph</param> /// <param name="splitNodes">Split nodes Set</param> /// <returns>Color edge Set</returns> public HashSet <KeyValuePair <int, int> > Simplify(int cycleLenghtThreshold, int smoothingThreshold, bool shouldSmooth, out HashSet <int> splitNodes) { splitNodes = new HashSet <int>(); HashSet <KeyValuePair <int, int> > colorEdges = new HashSet <KeyValuePair <int, int> >(); IDictionary <Pair <int>, int> multiplicityByEdge = GenerateMultiplicityByEdge(_workingSequence.GetMembersValue()); //用字典记录有多少边,key是边,value是个数 Console.Out.WriteLine("Number of edges: " + multiplicityByEdge.Count); IList <Pair <int> > weakEdges = _graphTool.GetWeakEdges(multiplicityByEdge); //找到循环 while (weakEdges.Count != 0) { Pair <int> currentWeakEdge = weakEdges[0]; weakEdges.RemoveAt(0); HashSet <KeyValuePair <int, int> > suspectedWeakEdges = _graphTool.ReSolveCycle(currentWeakEdge, cycleLenghtThreshold, ref _graph); //去除循环,返回颜色边界 if (suspectedWeakEdges != null) { foreach (KeyValuePair <int, int> edge in suspectedWeakEdges) //标记所有颜色边界 { if (!colorEdges.Contains(edge)) { colorEdges.Add(edge); } } } /* * foreach (pair<int> edge in suspectedweakedges) * weakedges.insert(0, edge); */ } Console.Out.WriteLine("Nodes:" + _graph.Keys.Count); //平滑需要 if (shouldSmooth) { //PRocess tandem Repeat A-A _graphTool.ProcessTandem(ref _workingSequence, ref _graph); //smoothing step IDictionary <Pair <int>, int> newMultiplicityByEdge = GenerateMultiplicityByEdge(_workingSequence.GetMembersValue()); //返回一个字典,key是一个边的pair,value是边的个数 IDictionary <int, IList <int> > graphLinkStructure = GenerateGraphLinkStructure(_workingSequence.GetMembersValue()); //返回一个字典,key是基因,value是这个基因所有邻居的列表 IDictionary <int, int> multiplicityByNodeID = GetMultiplicityByNodeID(); //返回一个字典,key是基因,value是基因的个数 IList <IList <int> > simplePaths = _graphTool.GetSimplePath(graphLinkStructure, newMultiplicityByEdge, multiplicityByNodeID); //获得多条简单路径 foreach (IList <int> path in simplePaths) { if (path.Count < smoothingThreshold && _graph[path[0]].Count > 1) //只有小于2个点的基因才可以平滑 { IList <int> smooth = _graphTool.Smooth(path, ref _graph); //path可分才返回path,不然返回null foreach (int i in smooth) { if (!splitNodes.Contains(i)) { splitNodes.Add(i); } } } } //ProcessPalindrome(_workingSequence); _graphTool.ProcessPalindrome(ref _workingSequence, ref _graph); } return(colorEdges); }
/// <summary> /// Return the list nodes in the modified sequence /// </summary> /// <returns></returns> public IList <int> GetModifiedSequence() { return(_workingSequence.GetMembersValue()); }