/// <summary> /// Combines two assemblies derived from going different directions from the same start node. /// </summary> /// <param name="leftSide"></param> /// <param name="rightSide"></param> public PossibleAssembly(PossibleAssembly leftSide, PossibleAssembly rightSide) { //First to verify that both start in same location. if (leftSide.constituentNodes[0] != rightSide.constituentNodes[0]) { throw new Exception("Cannot combine assemblies that start in different locations!"); } var tmpRightSeq = rightSide.contigSequence.ToArray(); //skip the first node var tmpRightNodes = rightSide.constituentNodes.Skip(1).ToArray(); constituentNodes = leftSide.constituentNodes.ToList(); constituentNodes.Reverse(); //now lets combine constituentNodes.AddRange(tmpRightNodes); var tmpSequence = new Sequence(DnaAlphabet.Instance, leftSide.contigSequence.ToArray()); tmpSequence = new Sequence(tmpSequence.GetReverseComplementedSequence()); string LeftSequence = ""; LeftSequence = tmpSequence.ConvertToString(0, tmpSequence.Count); tmpSequence = new Sequence(DnaAlphabet.Instance, tmpRightSeq); string tmpSequence2 = LeftSequence + tmpSequence.ConvertToString(LargeDeletionFinder.graph.KmerLength, (tmpSequence.Count - LargeDeletionFinder.graph.KmerLength)); contigSequence = new Sequence(DnaAlphabet.Instance, tmpSequence2).ToList(); }
public IEnumerable <PossibleAssembly> ExtendFromStartNode(DeBruijnNode start) { //TODO: I believe this handles figure 8s and palindromes just fine, should verify though. //First go Right var rightNeighbors = start.GetRightExtensionNodesWithOrientationMarkingEdgeAsVisited(false); List <PossibleAssembly> rights = new List <PossibleAssembly>(); foreach (var direction in rightNeighbors) { PossibleAssembly pa = new PossibleAssembly(start, true); rights.AddRange(ExtendChain(pa, direction.Key, true, direction.Value)); } List <PossibleAssembly> lefts = new List <PossibleAssembly>(); var leftNeighbors = start.GetLeftExtensionNodesWithOrientationMarkingEdgeAsVisited(false); foreach (var direction in leftNeighbors) { PossibleAssembly pa = new PossibleAssembly(start, false); lefts.AddRange(ExtendChain(pa, direction.Key, false, direction.Value)); } //Now to combine a left and right chain if (lefts.Count > 0 && rights.Count > 0) { foreach (var right in rights) { foreach (var left in lefts) { yield return(new PossibleAssembly(left, right)); } } } else if (lefts.Count > 0) { foreach (var left in lefts) { yield return(left); } } else if (rights.Count > 0) { foreach (var right in rights) { yield return(right); } } }
/// <summary> /// Create a deep copy of the list and sorts it so that it is easy to identify redundant paths node comes first /// </summary> /// <returns></returns> public PossibleAssembly Clone() { PossibleAssembly pdb = new PossibleAssembly(this.constituentNodes, this.contigSequence); return(pdb); }
private IEnumerable <PossibleAssembly> ExtendChain(PossibleAssembly currentPath, DeBruijnNode nextNeighbor, bool goingRight, bool sameOrientation) { byte nextSymbol = MetaNode.GetNextSymbol(nextNeighbor, KmerLength, !goingRight); currentPath.Add(nextNeighbor, nextSymbol); nextNeighbor.IsVisited = true; bool nextRight = !goingRight ^ sameOrientation; List <KeyValuePair <DeBruijnNode, bool> > nextNodes = nextRight ? nextNeighbor.GetRightExtensionNodesWithOrientationMarkingEdgeAsVisited() : nextNeighbor.GetLeftExtensionNodesWithOrientationMarkingEdgeAsVisited(); DeBruijnNode next; //DeBruijnNode last = currentPath.constituentNodes[currentPath.constituentNodes.Count-1]; //DeBruijnNode first=currentPath.constituentNodes[0]; while (nextNodes.Count == 1) { var nextSet = nextNodes.First(); next = nextSet.Key; sameOrientation = nextSet.Value; nextRight = (!nextRight) ^ sameOrientation; nextSymbol = MetaNode.GetNextSymbol(next, KmerLength, !nextRight); //now check if we are in a circle or a loop at the end, these are very annoying situtations, basic criteria, can't leave //the same node the same way twice if (next.IsVisited && currentPath.constituentNodes.Contains(next)) { //okay, if we are equal to the first node or the last node, we can't leave or return the same way we came, otherwise we are done. var excludedNextNodes = currentPath.GetPreviousWaysNodeWasLeft(next); //how many neighbors dow we have in this group? var temp = nextRight ? next.GetRightExtensionNodesWithOrientationMarkingEdgeAsVisited() : next.GetLeftExtensionNodesWithOrientationMarkingEdgeAsVisited(); temp = temp.Where(x => !excludedNextNodes.Contains(x.Key)).ToList(); //only one way to go if (temp.Count == 1) { nextNodes = temp; //currentPath.contigSequence.Add(nextSymbol); currentPath.Add(next, nextSymbol); next.IsVisited = true; //flag not actually used though } else if (temp.Count == 0) //done { if (currentPath.constituentNodes[0] == next) { currentPath.CircularLoop = true; } yield return(currentPath); //nextNodes.Clear();//we are done yield break; } else //Extend path using all feasible options, then continue. { foreach (var neighbor in temp) { foreach (var v in ExtendChain(currentPath.Clone(), neighbor.Key, nextRight, neighbor.Value)) { yield return(v); } } //nextNodes.Clear();//done yield break; } } else { //currentPath.contigSequence.Add(nextSymbol); currentPath.Add(next, nextSymbol); next.IsVisited = true;//flag not actually used though nextNodes = nextRight ? next.GetRightExtensionNodesWithOrientationMarkingEdgeAsVisited() : next.GetLeftExtensionNodesWithOrientationMarkingEdgeAsVisited(); } } //If we have more than one node remaining, have to kick it off. if (nextNodes.Count > 1) { foreach (var neighbor in nextNodes) { foreach (var v in ExtendChain(currentPath.Clone(), neighbor.Key, nextRight, neighbor.Value)) { yield return(v); } } } if (nextNodes.Count == 0) { yield return(currentPath); } }
public DeletionAnalysis(PossibleAssembly assemblyToCheck) { DeletionNumber = DeletionReportCounter++; Assembly = assemblyToCheck; LookForDeletion(); }
private void attemptToCreateAssembly() { //TODO: This node should always be a good start node, but may be an erroneous one, check for this. var curNode = gg.MetaNodes.Where(x => x.Lowest_Reference_Position != 0).MaxBy(x => (x.AvgKmerCoverage * x.ConstituentNodes.Count));//*(.2/x.Lowest_Reference_Position));//.MinBy(x => x.Lowest_Reference_Position); //Let's try just going with the forward primer //var match = forwardPrimer.Substring(0, gg.MegaNodes.First().LeadingKmer.Length); //var rc_match = ((new Bio.Sequence(Bio.Alphabets.NoGapDNA, match)).GetReverseComplementedSequence() as Bio.Sequence).ConvertToString(); //var curNode = gg.MegaNodes.Where(x => x.Sequence.Contains(match) || x.Sequence.Contains(rc_match)).First(); _greedyPathAssembly = new PossibleAssembly(); if (!curNode.CircularLoop) { MitoPaintedAssembler.RaiseStatusEvent("\tAttempting to find greedy path, frequencies of majority split below"); //now to attempt to loop back to the start node //will move along while greedily grabbing the next node with the highest kmer coverage //constantly oriented everyone so we go right ot left while (true) { assemblyNodes.Add(curNode); _greedyPathAssembly.AddMetaNode(curNode); var possibles = curNode.GetOutgoingNodes().ToList(); if (possibles.Count > 0) { SplitData sd = new SplitData(possibles); PathSplits.Add(sd); if (possibles.Count > 1) { if (sd.MaxFrequency < MinimumGreedySplit) { MinimumGreedySplit = sd.MaxFrequency; } MitoPaintedAssembler.RaiseStatusEvent("\tPossible Paths: " + possibles.Count + " Frequency: " + sd.MaxFrequency.ToString("P1") + " Range: " + curNode.Lowest_Reference_Position.ToString() + "-" + curNode.Highest_Reference_Position.ToString()); } curNode = sd.BestPath.NeighborNode; if (assemblyNodes.Contains(curNode)) { FormsCompleteLoop = true; break; } } else { FormsCompleteLoop = false; SuccessfulAssembly = false; break; } } } else { FormsCompleteLoop = true; assemblyNodes.Add(curNode); _greedyPathAssembly.AddMetaNode(curNode); MinimumGreedySplit = 1.0; } int length = assemblyNodes.Sum(x => x.LengthOfNode); //now, did we form an assembly? if (FormsCompleteLoop || Math.Abs(length - AssemblyLength) < 100) { SuccessfulAssembly = true; _greedyPathAssembly.FinalizeAndOrientToReference(); AssemblyLength = (int)_greedyPathAssembly.Sequence.Count; //TODO: More sophisticated criteria than larger than 8 kb to validate assembly if (AssemblyLength > StaticResources.SIZE_DIF_BETWEEN_LARGE_AND_SMALL_DELETION) { SuccessfulAssembly = true; MitoPaintedAssembler.RaiseStatusEvent("\tSuccessful assembly of length: " + AssemblyLength.ToString()); } else { SuccessfulAssembly = false; MitoPaintedAssembler.RaiseStatusEvent("\tAssembly failed. Only recovered sequence of length: " + AssemblyLength.ToString()); } } }