コード例 #1
0
        private void LookForDeletion()
        {
            bool movingUp;

            var v = Assembly.Where(x => x.IsInReference).ToList();

            var difs = Enumerable.Zip(v.Skip(1), v.Take(v.Count - 1),
                                      (x, y) => {
                if (x.ReferenceGenomePosition > y.ReferenceGenomePosition)
                {
                    return(1);
                }
                else
                {
                    return(0);
                }
            }).Sum();

            // If monotonically changing, should only not change once (when it goes around the circle).
            if (difs < 2 || difs > (v.Count - 3))
            {
                ReferenceValuesChangeMonotonically = true;
            }
            // Now which way is it increasing, up (big sum) or down (small sum)
            if (difs > (v.Count / 2))
            {
                movingUp = true;
            }
            else
            {
                movingUp = false;
            }

            if (!movingUp)
            {
                Assembly.ReversePath();
                movingUp = true;
            }
            // Only report for sensible assemblies
            if (ReferenceValuesChangeMonotonically)
            {
                Assembly.FinalizeAndOrientToReference();
                // Get Alignments
                var alns = HaploGrepSharp.ReferenceGenome.GetDeltaAlignments(Assembly.Sequence).SelectMany(x => x).ToList();
                if (alns.Count > 0)
                {
                    StartReference = (int)alns.First().FirstSequenceStart;
                    EndReference   = (int)alns.Last().FirstSequenceEnd;
                }
                SizeOfDeletionsSeen = String.Empty;
                if (alns.Count > 1)
                {
                    HasDeletion = true;
                    StringBuilder sb            = new StringBuilder();
                    List <int>    DeletionSizes = new List <int>();
                    for (int i = 0; i < (alns.Count - 1); i++)
                    {
                        var s = ReferenceGenome.ConvertTorCRSPosition((int)alns[i].FirstSequenceEnd);
                        var e = ReferenceGenome.ConvertTorCRSPosition((int)alns[i + 1].FirstSequenceStart);
                        sb.Append(s.ToString());
                        sb.Append("-");
                        sb.Append(e.ToString());
                        sb.Append(";");
                        DeletionSizes.Add(e - s + 1);
                    }
                    DeletedRegions      = sb.ToString();
                    SizeOfDeletionsSeen = String.Join(";", DeletionSizes.Select(x => x.ToString()));
                }

                // Now see if we can get the fractional evidence for this.
                // Note this code can get very nasty as it we have multiple
                // nodes with 2, we can look for
                var totBifurcations = 0;

                double avg  = 0.0;
                var    totL = Assembly.Where(x => x.LeftExtensionNodesCount == 2).ToList();
                var    torR = Assembly.Where(x => x.RightExtensionNodesCount == (byte)2).ToList();
                for (int i = 0; i < (Assembly.Count - 1); i++)
                {
                    var cnode  = Assembly[i];
                    var lefts  = cnode.GetLeftExtensionNodes().ToList();
                    var rights = cnode.GetRightExtensionNodes().ToList();
                    List <List <DeBruijnNode> > neighbors = new List <List <DeBruijnNode> >()
                    {
                        lefts, rights
                    };
                    foreach (var neighbor in neighbors)
                    {
                        if (neighbor.Count == 2)
                        {
                            var tot = neighbor.Sum(z => (double)z.KmerCount);
                            var cur = (double)neighbor.Where(z => z == Assembly[i - 1] || z == Assembly[i + 1]).First().KmerCount;
                            avg += cur / tot;
                            totBifurcations++;
                        }
                        else if (neighbor.Count > 2)
                        {
                            totBifurcations = 100; //arbitrarily set to too high a value
                            break;
                        }
                    }
                }
                if (totBifurcations == 2)
                {
                    SimpleBifurcation = true;
                    avg *= .5;// .5 * (a + b) = Average
                }
                AvgFractionBySplit = SimpleBifurcation ? avg : Double.NaN;
            }
        }
コード例 #2
0
        private void attemptToCreateAssembly()
        {
            //TODO: This node should always be a good start node, but may be an erroneous one, check for this.
            var curNode = gg.MetaNodes.Where(x => x.Lowest_Reference_Position != 0).MaxBy(x => (x.AvgKmerCoverage * x.ConstituentNodes.Count));//*(.2/x.Lowest_Reference_Position));//.MinBy(x => x.Lowest_Reference_Position);

            //Let's try just going with the forward primer
            //var match = forwardPrimer.Substring(0, gg.MegaNodes.First().LeadingKmer.Length);
            //var rc_match = ((new Bio.Sequence(Bio.Alphabets.NoGapDNA, match)).GetReverseComplementedSequence() as Bio.Sequence).ConvertToString();
            //var curNode = gg.MegaNodes.Where(x => x.Sequence.Contains(match) || x.Sequence.Contains(rc_match)).First();
            _greedyPathAssembly = new PossibleAssembly();
            if (!curNode.CircularLoop)
            {
                MitoPaintedAssembler.RaiseStatusEvent("\tAttempting to find greedy path, frequencies of majority split below");
                //now to attempt to loop back to the start node
                //will move along while greedily grabbing the next node with the highest kmer coverage
                //constantly oriented everyone so we go right ot left
                while (true)
                {
                    assemblyNodes.Add(curNode);
                    _greedyPathAssembly.AddMetaNode(curNode);
                    var possibles = curNode.GetOutgoingNodes().ToList();
                    if (possibles.Count > 0)
                    {
                        SplitData sd = new SplitData(possibles);
                        PathSplits.Add(sd);
                        if (possibles.Count > 1)
                        {
                            if (sd.MaxFrequency < MinimumGreedySplit)
                            {
                                MinimumGreedySplit = sd.MaxFrequency;
                            }
                            MitoPaintedAssembler.RaiseStatusEvent("\tPossible Paths: " + possibles.Count
                                                                  + "  Frequency: " + sd.MaxFrequency.ToString("P1")
                                                                  + "  Range: " + curNode.Lowest_Reference_Position.ToString() + "-" + curNode.Highest_Reference_Position.ToString());
                        }
                        curNode = sd.BestPath.NeighborNode;
                        if (assemblyNodes.Contains(curNode))
                        {
                            FormsCompleteLoop = true;
                            break;
                        }
                    }
                    else
                    {
                        FormsCompleteLoop = false; SuccessfulAssembly = false; break;
                    }
                }
            }
            else
            {
                FormsCompleteLoop = true;
                assemblyNodes.Add(curNode);
                _greedyPathAssembly.AddMetaNode(curNode);
                MinimumGreedySplit = 1.0;
            }
            int length = assemblyNodes.Sum(x => x.LengthOfNode);

            //now, did we form an assembly?
            if (FormsCompleteLoop || Math.Abs(length - AssemblyLength) < 100)
            {
                SuccessfulAssembly = true;
                _greedyPathAssembly.FinalizeAndOrientToReference();
                AssemblyLength = (int)_greedyPathAssembly.Sequence.Count;
                //TODO: More sophisticated criteria than larger than 8 kb to validate assembly
                if (AssemblyLength > StaticResources.SIZE_DIF_BETWEEN_LARGE_AND_SMALL_DELETION)
                {
                    SuccessfulAssembly = true;
                    MitoPaintedAssembler.RaiseStatusEvent("\tSuccessful assembly of length: " + AssemblyLength.ToString());
                }
                else
                {
                    SuccessfulAssembly = false;
                    MitoPaintedAssembler.RaiseStatusEvent("\tAssembly failed.  Only recovered sequence of length: " + AssemblyLength.ToString());
                }
            }
        }