Exemple #1
0
        /// <summary>
        /// Combines two assemblies derived from going different directions from the same start node.
        /// </summary>
        /// <param name="leftSide"></param>
        /// <param name="rightSide"></param>
        public PossibleAssembly(PossibleAssembly leftSide, PossibleAssembly rightSide)
        {
            //First to verify that both start in same location.
            if (leftSide.constituentNodes[0] != rightSide.constituentNodes[0])
            {
                throw new Exception("Cannot combine assemblies that start in different locations!");
            }
            var tmpRightSeq = rightSide.contigSequence.ToArray();
            //skip the first node
            var tmpRightNodes = rightSide.constituentNodes.Skip(1).ToArray();

            constituentNodes = leftSide.constituentNodes.ToList();
            constituentNodes.Reverse();
            //now lets combine
            constituentNodes.AddRange(tmpRightNodes);
            var tmpSequence = new Sequence(DnaAlphabet.Instance, leftSide.contigSequence.ToArray());

            tmpSequence = new Sequence(tmpSequence.GetReverseComplementedSequence());
            string LeftSequence = "";

            LeftSequence = tmpSequence.ConvertToString(0, tmpSequence.Count);
            tmpSequence  = new Sequence(DnaAlphabet.Instance, tmpRightSeq);
            string tmpSequence2 = LeftSequence + tmpSequence.ConvertToString(LargeDeletionFinder.graph.KmerLength, (tmpSequence.Count - LargeDeletionFinder.graph.KmerLength));

            contigSequence = new Sequence(DnaAlphabet.Instance, tmpSequence2).ToList();
        }
        public IEnumerable <PossibleAssembly> ExtendFromStartNode(DeBruijnNode start)
        {
            //TODO: I believe this handles figure 8s and palindromes just fine, should verify though.

            //First go Right
            var rightNeighbors             = start.GetRightExtensionNodesWithOrientationMarkingEdgeAsVisited(false);
            List <PossibleAssembly> rights = new List <PossibleAssembly>();

            foreach (var direction in rightNeighbors)
            {
                PossibleAssembly pa = new PossibleAssembly(start, true);
                rights.AddRange(ExtendChain(pa, direction.Key, true, direction.Value));
            }
            List <PossibleAssembly> lefts = new List <PossibleAssembly>();
            var leftNeighbors             = start.GetLeftExtensionNodesWithOrientationMarkingEdgeAsVisited(false);

            foreach (var direction in leftNeighbors)
            {
                PossibleAssembly pa = new PossibleAssembly(start, false);
                lefts.AddRange(ExtendChain(pa, direction.Key, false, direction.Value));
            }
            //Now to combine a left and right chain
            if (lefts.Count > 0 && rights.Count > 0)
            {
                foreach (var right in rights)
                {
                    foreach (var left in lefts)
                    {
                        yield return(new PossibleAssembly(left, right));
                    }
                }
            }
            else if (lefts.Count > 0)
            {
                foreach (var left in lefts)
                {
                    yield return(left);
                }
            }
            else if (rights.Count > 0)
            {
                foreach (var right in rights)
                {
                    yield return(right);
                }
            }
        }
Exemple #3
0
        /// <summary>
        /// Create a deep copy of the list and sorts it so that it is easy to identify redundant paths node comes first
        /// </summary>
        /// <returns></returns>
        public PossibleAssembly Clone()
        {
            PossibleAssembly pdb = new PossibleAssembly(this.constituentNodes, this.contigSequence);

            return(pdb);
        }
        private IEnumerable <PossibleAssembly> ExtendChain(PossibleAssembly currentPath, DeBruijnNode nextNeighbor, bool goingRight, bool sameOrientation)
        {
            byte nextSymbol = MetaNode.GetNextSymbol(nextNeighbor, KmerLength, !goingRight);

            currentPath.Add(nextNeighbor, nextSymbol);
            nextNeighbor.IsVisited = true;
            bool nextRight = !goingRight ^ sameOrientation;
            List <KeyValuePair <DeBruijnNode, bool> > nextNodes = nextRight ? nextNeighbor.GetRightExtensionNodesWithOrientationMarkingEdgeAsVisited() :
                                                                  nextNeighbor.GetLeftExtensionNodesWithOrientationMarkingEdgeAsVisited();
            DeBruijnNode next;

            //DeBruijnNode last = currentPath.constituentNodes[currentPath.constituentNodes.Count-1];
            //DeBruijnNode first=currentPath.constituentNodes[0];
            while (nextNodes.Count == 1)
            {
                var nextSet = nextNodes.First();
                next            = nextSet.Key;
                sameOrientation = nextSet.Value;
                nextRight       = (!nextRight) ^ sameOrientation;
                nextSymbol      = MetaNode.GetNextSymbol(next, KmerLength, !nextRight);
                //now check if we are in a circle or a loop at the end, these are very annoying situtations, basic criteria, can't leave
                //the same node the same way twice
                if (next.IsVisited && currentPath.constituentNodes.Contains(next))
                {
                    //okay, if we are equal to the first node or the last node, we can't leave or return the same way we came, otherwise we are done.
                    var excludedNextNodes = currentPath.GetPreviousWaysNodeWasLeft(next);
                    //how many neighbors dow we have in this group?
                    var temp = nextRight ? next.GetRightExtensionNodesWithOrientationMarkingEdgeAsVisited() : next.GetLeftExtensionNodesWithOrientationMarkingEdgeAsVisited();
                    temp = temp.Where(x => !excludedNextNodes.Contains(x.Key)).ToList();
                    //only one way to go
                    if (temp.Count == 1)
                    {
                        nextNodes = temp;
                        //currentPath.contigSequence.Add(nextSymbol);
                        currentPath.Add(next, nextSymbol);
                        next.IsVisited = true; //flag not actually used though
                    }
                    else if (temp.Count == 0)  //done
                    {
                        if (currentPath.constituentNodes[0] == next)
                        {
                            currentPath.CircularLoop = true;
                        }
                        yield return(currentPath);

                        //nextNodes.Clear();//we are done
                        yield break;
                    }
                    else //Extend path using all feasible options, then continue.
                    {
                        foreach (var neighbor in temp)
                        {
                            foreach (var v in ExtendChain(currentPath.Clone(), neighbor.Key, nextRight, neighbor.Value))
                            {
                                yield return(v);
                            }
                        }
                        //nextNodes.Clear();//done
                        yield break;
                    }
                }
                else
                {
                    //currentPath.contigSequence.Add(nextSymbol);
                    currentPath.Add(next, nextSymbol);
                    next.IsVisited = true;//flag not actually used though
                    nextNodes      = nextRight ? next.GetRightExtensionNodesWithOrientationMarkingEdgeAsVisited() : next.GetLeftExtensionNodesWithOrientationMarkingEdgeAsVisited();
                }
            }
            //If we have more than one node remaining, have to kick it off.
            if (nextNodes.Count > 1)
            {
                foreach (var neighbor in nextNodes)
                {
                    foreach (var v in ExtendChain(currentPath.Clone(), neighbor.Key, nextRight, neighbor.Value))
                    {
                        yield return(v);
                    }
                }
            }
            if (nextNodes.Count == 0)
            {
                yield return(currentPath);
            }
        }
 public DeletionAnalysis(PossibleAssembly assemblyToCheck)
 {
     DeletionNumber = DeletionReportCounter++;
     Assembly       = assemblyToCheck;
     LookForDeletion();
 }
        private void attemptToCreateAssembly()
        {
            //TODO: This node should always be a good start node, but may be an erroneous one, check for this.
            var curNode = gg.MetaNodes.Where(x => x.Lowest_Reference_Position != 0).MaxBy(x => (x.AvgKmerCoverage * x.ConstituentNodes.Count));//*(.2/x.Lowest_Reference_Position));//.MinBy(x => x.Lowest_Reference_Position);

            //Let's try just going with the forward primer
            //var match = forwardPrimer.Substring(0, gg.MegaNodes.First().LeadingKmer.Length);
            //var rc_match = ((new Bio.Sequence(Bio.Alphabets.NoGapDNA, match)).GetReverseComplementedSequence() as Bio.Sequence).ConvertToString();
            //var curNode = gg.MegaNodes.Where(x => x.Sequence.Contains(match) || x.Sequence.Contains(rc_match)).First();
            _greedyPathAssembly = new PossibleAssembly();
            if (!curNode.CircularLoop)
            {
                MitoPaintedAssembler.RaiseStatusEvent("\tAttempting to find greedy path, frequencies of majority split below");
                //now to attempt to loop back to the start node
                //will move along while greedily grabbing the next node with the highest kmer coverage
                //constantly oriented everyone so we go right ot left
                while (true)
                {
                    assemblyNodes.Add(curNode);
                    _greedyPathAssembly.AddMetaNode(curNode);
                    var possibles = curNode.GetOutgoingNodes().ToList();
                    if (possibles.Count > 0)
                    {
                        SplitData sd = new SplitData(possibles);
                        PathSplits.Add(sd);
                        if (possibles.Count > 1)
                        {
                            if (sd.MaxFrequency < MinimumGreedySplit)
                            {
                                MinimumGreedySplit = sd.MaxFrequency;
                            }
                            MitoPaintedAssembler.RaiseStatusEvent("\tPossible Paths: " + possibles.Count
                                                                  + "  Frequency: " + sd.MaxFrequency.ToString("P1")
                                                                  + "  Range: " + curNode.Lowest_Reference_Position.ToString() + "-" + curNode.Highest_Reference_Position.ToString());
                        }
                        curNode = sd.BestPath.NeighborNode;
                        if (assemblyNodes.Contains(curNode))
                        {
                            FormsCompleteLoop = true;
                            break;
                        }
                    }
                    else
                    {
                        FormsCompleteLoop = false; SuccessfulAssembly = false; break;
                    }
                }
            }
            else
            {
                FormsCompleteLoop = true;
                assemblyNodes.Add(curNode);
                _greedyPathAssembly.AddMetaNode(curNode);
                MinimumGreedySplit = 1.0;
            }
            int length = assemblyNodes.Sum(x => x.LengthOfNode);

            //now, did we form an assembly?
            if (FormsCompleteLoop || Math.Abs(length - AssemblyLength) < 100)
            {
                SuccessfulAssembly = true;
                _greedyPathAssembly.FinalizeAndOrientToReference();
                AssemblyLength = (int)_greedyPathAssembly.Sequence.Count;
                //TODO: More sophisticated criteria than larger than 8 kb to validate assembly
                if (AssemblyLength > StaticResources.SIZE_DIF_BETWEEN_LARGE_AND_SMALL_DELETION)
                {
                    SuccessfulAssembly = true;
                    MitoPaintedAssembler.RaiseStatusEvent("\tSuccessful assembly of length: " + AssemblyLength.ToString());
                }
                else
                {
                    SuccessfulAssembly = false;
                    MitoPaintedAssembler.RaiseStatusEvent("\tAssembly failed.  Only recovered sequence of length: " + AssemblyLength.ToString());
                }
            }
        }