예제 #1
0
        /// <summary>
        /// Refine layout in the delta alignments.
        /// </summary>
        public void RefineLayout()
        {
            TimeSpan  timeSpan     = new TimeSpan();
            Stopwatch runAlgorithm = new Stopwatch();

            runAlgorithm.Restart();
            FileInfo inputFileinfo   = new FileInfo(this.FilePath[1]);
            long     inputFileLength = inputFileinfo.Length;
            FastASequencePositionParser queryParser;

            using (var input = File.OpenRead(FilePath[1]))
            {
                queryParser = new FastASequencePositionParser(input, true);
                queryParser.CacheSequencesForRandomAccess();
            }
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Processed Query FastA file: {0}", Path.GetFullPath(this.FilePath[1]));
                Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time   : {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "   File Size              : {0}", inputFileLength);
            }

            inputFileinfo   = new FileInfo(this.FilePath[0]);
            inputFileLength = inputFileinfo.Length;
            runAlgorithm.Restart();
            using (var input = File.OpenRead(FilePath[0]))
                using (DeltaAlignmentCollection deltaCollection = new DeltaAlignmentCollection(input, queryParser))
                {
                    runAlgorithm.Stop();

                    if (this.Verbose)
                    {
                        Output.WriteLine(OutputLevel.Verbose);
                        Output.WriteLine(OutputLevel.Verbose, "Processed DeltaAlignment file: {0}", Path.GetFullPath(this.FilePath[0]));
                        Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time      : {0}", runAlgorithm.Elapsed);
                        Output.WriteLine(OutputLevel.Verbose, "   File Size                 : {0}", inputFileLength);
                    }

                    runAlgorithm.Restart();
                    IEnumerable <DeltaAlignment> result = LayoutRefiner.RefineLayout(deltaCollection);
                    DeltaAlignmentSorter         sorter = new DeltaAlignmentSorter();
                    WriteDelta(result, sorter, UnsortedLayoutRefinmentOutputFilename);
                    runAlgorithm.Stop();
                    timeSpan = timeSpan.Add(runAlgorithm.Elapsed);

                    runAlgorithm.Restart();
                    WriteSortedDelta(sorter, UnsortedLayoutRefinmentOutputFilename, queryParser, this.OutputFile);
                    runAlgorithm.Stop();
                }

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", timeSpan);
                Output.WriteLine(OutputLevel.Verbose, "Write time: {0}", runAlgorithm.Elapsed);
            }
        }
예제 #2
0
 /// <summary>
 /// Initializes a new instance of the VirtualDeltaAlignmentCollection class.
 /// </summary>
 /// <param name="orderedDeltas"></param>
 /// <param name="windowSize"></param>
 public VirtualDeltaAlignmentCollection(DeltaAlignmentCollection orderedDeltas, int windowSize)
 {
     this.windowSize    = windowSize;
     this.Count         = orderedDeltas.Count;
     this.catchedDeltas = new List <DeltaAlignment>();
     this.catchSize     = windowSize * 1000;
     this.sourceDeltas  = orderedDeltas.DeltaAlignmentParser.Parse().GetEnumerator();
 }
예제 #3
0
        /// <summary>
        /// Resolve ambiguity in the delta alignments.
        /// </summary>
        public void ResolveAmbiguity()
        {
            TimeSpan  repeatResolutionSpan = new TimeSpan();
            Stopwatch runRepeatResolution  = new Stopwatch();

            runRepeatResolution.Restart();
            FileInfo inputFileinfo   = new FileInfo(this.FilePath[1]);
            long     inputFileLength = inputFileinfo.Length;

            runRepeatResolution.Stop();

            if (this.Verbose)
            {
                Console.Error.WriteLine();
                Console.Error.WriteLine("  Processed Query FastA file: {0}", Path.GetFullPath(this.FilePath[1]));
                Console.Error.WriteLine("            Read/Processing time: {0}", runRepeatResolution.Elapsed);
                Console.Error.WriteLine("            File Size           : {0}", inputFileLength);
            }

            inputFileinfo   = new FileInfo(this.FilePath[0]);
            inputFileLength = inputFileinfo.Length;

            runRepeatResolution.Restart();
            DeltaAlignmentSorter sorter = null;

            using (var alignmentStream = File.OpenRead(this.FilePath[0]))
                using (var readStream = File.OpenRead(this.FilePath[1]))
                    using (DeltaAlignmentCollection deltaCollection = new DeltaAlignmentCollection(alignmentStream, readStream))
                    {
                        runRepeatResolution.Stop();

                        if (this.Verbose)
                        {
                            Console.Error.WriteLine();
                            Console.Error.WriteLine("  Processed DeltaAlignment file: {0}", Path.GetFullPath(this.FilePath[0]));
                            Console.Error.WriteLine("            Read/Processing time: {0}", runRepeatResolution.Elapsed);
                            Console.Error.WriteLine("            File Size           : {0}", inputFileLength);
                        }

                        runRepeatResolution.Restart();
                        IEnumerable <DeltaAlignment> outputDeltas = RepeatResolver.ResolveAmbiguity(deltaCollection);
                        sorter = new DeltaAlignmentSorter();
                        WriteUnsortedDelta(outputDeltas, sorter);
                    }

            runRepeatResolution.Stop();
            repeatResolutionSpan = repeatResolutionSpan.Add(runRepeatResolution.Elapsed);
            runRepeatResolution.Restart();
            this.WriteDelta(sorter);
            runRepeatResolution.Stop();

            if (this.Verbose)
            {
                Console.Error.WriteLine("  Compute time: {0}", repeatResolutionSpan);
                Console.Error.WriteLine("  Write() time: {0}", runRepeatResolution.Elapsed);
            }
        }
예제 #4
0
        /// <summary>
        /// Refine layout in the delta alignments.
        /// </summary>
        public void GenerateConsensus()
        {
            TimeSpan  timeSpan     = new TimeSpan();
            Stopwatch runAlgorithm = new Stopwatch();

            runAlgorithm.Restart();
            FileInfo inputFileinfo   = new FileInfo(this.FilePath[1]);
            long     inputFileLength = inputFileinfo.Length;

            inputFileinfo = null;
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Processed Query FastA file: {0}", Path.GetFullPath(this.FilePath[1]));
                Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time  : {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "   File Size             : {0}", inputFileLength);
            }

            inputFileinfo   = new FileInfo(this.FilePath[0]);
            inputFileLength = inputFileinfo.Length;
            inputFileinfo   = null;
            runAlgorithm.Restart();

            using (DeltaAlignmentCollection deltaCollection = new DeltaAlignmentCollection(this.FilePath[0], this.FilePath[1]))
            {
                runAlgorithm.Stop();

                if (this.Verbose)
                {
                    Output.WriteLine(OutputLevel.Verbose);
                    Output.WriteLine(OutputLevel.Verbose, "Processed DeltaAlignment file: {0}", Path.GetFullPath(this.FilePath[0]));
                    Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time      : {0}", runAlgorithm.Elapsed);
                    Output.WriteLine(OutputLevel.Verbose, "   File Size                 : {0}", inputFileLength);
                }

                runAlgorithm.Restart();
                IEnumerable <ISequence> consensus = ConsensusGeneration.GenerateConsensus(deltaCollection);
                runAlgorithm.Stop();
                timeSpan = timeSpan.Add(runAlgorithm.Elapsed);

                runAlgorithm.Restart();
                this.WriteSequences(consensus);
                runAlgorithm.Stop();
            }

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", timeSpan);
                Output.WriteLine(OutputLevel.Verbose, "Write() time: {0}", runAlgorithm.Elapsed);
            }
        }
예제 #5
0
        /// <summary>
        /// Generates consensus sequences from alignment layout.
        /// </summary>
        /// <param name="alignmentBetweenReferenceAndReads">Input list of reads.</param>
        /// <returns>List of contigs.</returns>
        public static IEnumerable <ISequence> GenerateConsensus(DeltaAlignmentCollection alignmentBetweenReferenceAndReads)
        {
            if (alignmentBetweenReferenceAndReads == null)
            {
                throw new ArgumentNullException("alignmentBetweenReferenceAndReads");
            }

            SimpleConsensusResolver resolver = new SimpleConsensusResolver(AmbiguousDnaAlphabet.Instance, 49);

            // this dictionary will not grow more than a few hundread in worst scenario,
            // as this stores delta and its corresponding sequences
            Dictionary <DeltaAlignment, ISequence> deltasInCurrentContig = new Dictionary <DeltaAlignment, ISequence>();

            long           currentAlignmentStartOffset = 0;
            long           currentIndex = 0;
            long           inDeltaIndex = 0;
            DeltaAlignment lastDelta;

            List <byte>           currentContig  = new List <byte>();
            List <DeltaAlignment> deltasToRemove = new List <DeltaAlignment>();

            // no deltas
            if (alignmentBetweenReferenceAndReads.Count == 0)
            {
                yield break;
            }

            long index = 0;

            lastDelta = alignmentBetweenReferenceAndReads[index];
            do
            {
                // Starting a new contig
                if (deltasInCurrentContig.Count == 0)
                {
                    currentAlignmentStartOffset = lastDelta.FirstSequenceStart;
                    currentIndex = 0;
                    currentContig.Clear();
                }

                // loop through all deltas at current index and find consensus
                do
                {
                    // Proceed creating consensus till we find another delta stats aligning
                    while (lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta));

                        // Get next delta
                        index++;
                        if (alignmentBetweenReferenceAndReads.Count > index)
                        {
                            lastDelta = alignmentBetweenReferenceAndReads[index];
                            continue; // see if new delta starts from the same offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }

                    byte[] symbolsAtCurrentIndex = new byte[deltasInCurrentContig.Count];
                    int    symbolCounter         = 0;

                    foreach (var delta in deltasInCurrentContig)
                    {
                        inDeltaIndex = currentIndex - (delta.Key.FirstSequenceStart - currentAlignmentStartOffset);
                        symbolsAtCurrentIndex[symbolCounter++] = delta.Value[inDeltaIndex];

                        if (inDeltaIndex == delta.Value.Count - 1)
                        {
                            deltasToRemove.Add(delta.Key);
                        }
                    }

                    if (deltasToRemove.Count > 0)
                    {
                        for (int i = 0; i < deltasToRemove.Count; i++)
                        {
                            deltasInCurrentContig.Remove(deltasToRemove[i]);
                        }

                        deltasToRemove.Clear();
                    }

                    byte consensusSymbol = resolver.GetConsensus(symbolsAtCurrentIndex);
                    currentContig.Add(consensusSymbol);

                    currentIndex++;

                    // See if another delta is adjacent
                    if (deltasInCurrentContig.Count == 0 && lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta));

                        // check next delta
                        index++;
                        if (alignmentBetweenReferenceAndReads.Count > index)
                        {
                            lastDelta = alignmentBetweenReferenceAndReads[index];
                            continue; // read next delta to see if it starts from current reference sequence offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }
                }while (deltasInCurrentContig.Count > 0);

                yield return(new Sequence(AmbiguousDnaAlphabet.Instance, currentContig.ToArray(), false));
            }while (lastDelta != null);
        }
예제 #6
0
        /// <summary>
        /// Refines alignment layout by taking in consideration indels (insertions and deletions) and rearrangements between two genomes.
        /// Requires mate-pair information to resolve ambiguity.
        /// </summary>
        /// <param name="orderedDeltas">Order deltas.</param>
        public static IEnumerable <DeltaAlignment> RefineLayout(DeltaAlignmentCollection orderedDeltas)
        {
            if (orderedDeltas == null)
            {
                throw new ArgumentNullException("orderedDeltas");
            }

            if (orderedDeltas.Count == 0)
            {
                yield break;
            }

            // As we dont know what is the maximum posible insert and deltes,
            // assuming 1,000,000 deltas are sufficient for operation.
            int windowSize = 1000;

            VirtualDeltaAlignmentCollection deltaCatche = new VirtualDeltaAlignmentCollection(orderedDeltas, windowSize);

            List <DeltaAlignment> deltasOverlappingAtCurrentIndex = null;
            List <DeltaAlignment> leftSideDeltas  = null;
            List <DeltaAlignment> rightSideDeltas = null;
            List <DeltaAlignment> unloadedDeltas  = null;

            try
            {
                deltasOverlappingAtCurrentIndex = new List <DeltaAlignment>();
                leftSideDeltas  = new List <DeltaAlignment>();
                rightSideDeltas = new List <DeltaAlignment>();

                long           currentProcessedOffset = 0;
                DeltaAlignment alignment = deltaCatche[0];
                deltasOverlappingAtCurrentIndex.Add(alignment);
                DeltaAlignment deltaWithLargestEndIndex = alignment;

                for (int currentIndex = 0; currentIndex < deltaCatche.Count - 1; currentIndex++)
                {
                    DeltaAlignment nextDelta = deltaCatche[currentIndex + 1];
                    unloadedDeltas = null;
                    if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas))
                    {
                        for (int i = 0; i < unloadedDeltas.Count; i++)
                        {
                            yield return(unloadedDeltas[i]);
                        }

                        unloadedDeltas.Clear();
                    }

                    if (currentProcessedOffset != 0)
                    {
                        nextDelta.FirstSequenceStart += currentProcessedOffset;
                        nextDelta.FirstSequenceEnd   += currentProcessedOffset;
                    }

                    // Check if next delta is just adjacent
                    if (nextDelta.FirstSequenceStart - 1 == deltaWithLargestEndIndex.FirstSequenceEnd)
                    {
                        // If next delta is adjacent there is a possible insertion in target (deletion in reference)
                        // Try to extend the deltas from both sides and make them meet
                        leftSideDeltas.Clear();
                        for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++)
                        {
                            DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index];
                            if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd)
                            {
                                leftSideDeltas.Add(delta);
                            }
                        }

                        // Find all deltas starting at the adjacent right side
                        rightSideDeltas.Clear();
                        for (long index = currentIndex + 1; index < deltaCatche.Count; index++)
                        {
                            DeltaAlignment delta = deltaCatche[index];
                            unloadedDeltas = null;
                            if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas))
                            {
                                for (int i = 0; i < unloadedDeltas.Count; i++)
                                {
                                    yield return(unloadedDeltas[i]);
                                }

                                unloadedDeltas.Clear();
                            }

                            if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart)
                            {
                                break;
                            }

                            rightSideDeltas.Add(delta);
                        }

                        long offset = ExtendDeltas(leftSideDeltas, rightSideDeltas);

                        if (offset != 0)
                        {
                            nextDelta.FirstSequenceStart += offset;
                            nextDelta.FirstSequenceEnd   += offset;
                        }

                        currentProcessedOffset += offset;
                    }
                    else
                    if (nextDelta.FirstSequenceStart <= deltaWithLargestEndIndex.FirstSequenceEnd)
                    {
                        // Check if next delta overlaps with current overlap group
                        deltasOverlappingAtCurrentIndex.Add(nextDelta);

                        // Check if nextDelta is reaching farther than the current farthest delta
                        if (nextDelta.FirstSequenceEnd > deltaWithLargestEndIndex.FirstSequenceEnd)
                        {
                            deltaWithLargestEndIndex = nextDelta;
                        }

                        if (deltasOverlappingAtCurrentIndex.Count > windowSize)
                        {
                            for (int i = deltasOverlappingAtCurrentIndex.Count - 1; i >= 0; i--)
                            {
                                if (deltasOverlappingAtCurrentIndex[i].FirstSequenceEnd < deltaWithLargestEndIndex.FirstSequenceEnd)
                                {
                                    deltasOverlappingAtCurrentIndex.RemoveAt(i);
                                }
                            }
                        }
                    }
                    else
                    {
                        // No overlap with nextDelta, so there is a gap at the end of deltaWithLargestEndIndex
                        // Try fix insertion in reference by pulling together two ends of deltas on both sides of the gap
                        leftSideDeltas.Clear();
                        for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++)
                        {
                            DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index];
                            if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd)
                            {
                                leftSideDeltas.Add(delta);
                            }
                        }

                        // Find all deltas starting at the right end of the gap
                        rightSideDeltas.Clear();
                        for (long index = currentIndex + 1; index < deltaCatche.Count; index++)
                        {
                            DeltaAlignment delta = deltaCatche[index];
                            unloadedDeltas = null;
                            if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas))
                            {
                                for (int i = 0; i < unloadedDeltas.Count; i++)
                                {
                                    yield return(unloadedDeltas[i]);
                                }

                                unloadedDeltas.Clear();
                            }

                            if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart)
                            {
                                break;
                            }

                            rightSideDeltas.Add(delta);
                        }

                        int score = 0;
                        for (int i = 0; i < leftSideDeltas.Count; i++)
                        {
                            var l = leftSideDeltas[i];
                            int j = 0;

                            for (; j < rightSideDeltas.Count; j++)
                            {
                                var r = rightSideDeltas[j];

                                // if (object.ReferenceEquals(l.QuerySequence, r.QuerySequence))
                                // As reference check is not posible, verifying ids here. as id are unique for a given read.
                                if (l.QuerySequence.ID == r.QuerySequence.ID)
                                {
                                    score++;
                                    break;
                                }
                            }

                            if (j == rightSideDeltas.Count)
                            {
                                score--;
                            }
                        }

                        // Score > 0 means most deltas share same query sequence at both ends, so close this gap
                        if (score > 0)
                        {
                            long gaplength = (nextDelta.FirstSequenceStart - deltaWithLargestEndIndex.FirstSequenceEnd) - 1;
                            currentProcessedOffset -= gaplength;

                            // Pull deltas on right side to close the gap
                            for (int i = 0; i < rightSideDeltas.Count; i++)
                            {
                                DeltaAlignment delta = rightSideDeltas[i];
                                delta.FirstSequenceStart -= gaplength;
                                delta.FirstSequenceEnd   -= gaplength;
                                // deltaCatche.Update(delta.Id);
                            }
                        }

                        // Start a new group from the right side of the gap
                        deltaWithLargestEndIndex = nextDelta;
                        deltasOverlappingAtCurrentIndex.Clear();
                        deltasOverlappingAtCurrentIndex.Add(nextDelta);
                    }
                }

                unloadedDeltas = deltaCatche.GetCachedDeltas();

                for (int i = 0; i < unloadedDeltas.Count; i++)
                {
                    yield return(unloadedDeltas[i]);
                }

                unloadedDeltas.Clear();
            }
            finally
            {
                if (deltasOverlappingAtCurrentIndex != null)
                {
                    deltasOverlappingAtCurrentIndex.Clear();
                    deltasOverlappingAtCurrentIndex = null;
                }

                if (leftSideDeltas != null)
                {
                    leftSideDeltas.Clear();
                    leftSideDeltas = null;
                }

                if (rightSideDeltas != null)
                {
                    rightSideDeltas.Clear();
                    rightSideDeltas = null;
                }

                if (deltaCatche != null)
                {
                    deltaCatche = null;
                }
            }
        }
예제 #7
0
        /// <summary>
        /// Reads ambiguously placed due to genomic reads.
        /// This step requires mate pair information to resolve the ambiguity about placements of repeated sequences.
        /// </summary>
        /// <param name="alignmentBetweenReferenceAndReads">Alignment between reference genome and reads.</param>
        /// <returns>List of DeltaAlignments after resolving repeating reads.</returns>
        public static IEnumerable <DeltaAlignment> ResolveAmbiguity(DeltaAlignmentCollection alignmentBetweenReferenceAndReads)
        {
            if (alignmentBetweenReferenceAndReads == null)
            {
                throw new ArgumentNullException("alignmentBetweenReferenceAndReads");
            }

            // Process reads and add to result list.
            // Loop till all reads are processed
            foreach (var curReadDeltas in alignmentBetweenReferenceAndReads.GetDeltaAlignmentsByReads())
            {
                if (curReadDeltas == null)
                {
                    continue;
                }

                int deltasInCurrentRead = curReadDeltas.Count;

                // If curReadDeltas has only one delta, then there are no repeats so add it to result
                // Or if any delta is a partial alignment, dont try to resolve, add all deltas to result
                if (deltasInCurrentRead == 1 || curReadDeltas.Any(a =>
                {
                    return(a.SecondSequenceEnd != a.QuerySequence.Count - 1);
                }))
                {
                    //result.AddRange(curReadDeltas);
                    foreach (var delta in curReadDeltas)
                    {
                        yield return(delta);
                    }
                }
                else if (deltasInCurrentRead == 0)
                {
                    continue;
                }
                else
                {
                    // Resolve repeats
                    string sequenceId = curReadDeltas[0].QuerySequence.ID;
                    string originalSequenceId;
                    bool   forwardRead;
                    string pairedReadType;
                    string libraryName;

                    bool pairedRead = Helper.ValidatePairedSequenceId(sequenceId, out originalSequenceId, out forwardRead, out pairedReadType, out libraryName);

                    // If read is not having proper ID, ignore the read
                    if (!pairedRead)
                    {
                        //result.AddRange(curReadDeltas);
                        foreach (var delta in curReadDeltas)
                        {
                            yield return(delta);
                        }

                        continue;
                    }

                    string pairedReadId = Helper.GetPairedReadId(originalSequenceId, Helper.GetMatePairedReadType(pairedReadType), libraryName);

                    // Find mate pair
                    List <DeltaAlignment> mateDeltas = alignmentBetweenReferenceAndReads.GetDeltaAlignmentFor(pairedReadId);

                    // If mate pair not found, ignore current read
                    if (mateDeltas.Count == 0)
                    {
                        //result.AddRange(curReadDeltas);
                        foreach (var delta in curReadDeltas)
                        {
                            yield return(delta);
                        }
                        continue;
                    }

                    // Resolve using distance method
                    List <DeltaAlignment> resolvedDeltas = ResolveRepeatUsingMatePair(curReadDeltas, mateDeltas, libraryName);
                    if (resolvedDeltas != null)
                    {
                        //result.AddRange(resolvedDeltas);
                        foreach (var delta in resolvedDeltas)
                        {
                            yield return(delta);
                        }
                    }
                }
            }
        }
예제 #8
0
 /// <summary>
 /// Generates a consensus sequence for the genomic region covered by reads.
 /// </summary>
 /// <param name="deltaAlignmentCollection">Alignment between reference genome and reads.</param>
 /// <returns>List of contigs.</returns>
 private IEnumerable <ISequence> ConsensusGenerator(DeltaAlignmentCollection deltaAlignmentCollection)
 {
     return(ConsensusGeneration.GenerateConsensus(deltaAlignmentCollection));
 }
예제 #9
0
 /// <summary>
 /// Reads ambiguously placed due to genomic reads.
 /// This step requires mate pair information to resolve the ambiguity about placements of repeated sequences.
 /// </summary>
 /// <param name="deltaAlignmentCollection">Alignment between reference genome and reads.</param>
 /// <returns>List of DeltaAlignments after resolving repeating reads.</returns>
 private static IEnumerable <DeltaAlignment> RepeatResolution(DeltaAlignmentCollection deltaAlignmentCollection)
 {
     return(RepeatResolver.ResolveAmbiguity(deltaAlignmentCollection));
 }
예제 #10
0
 /// <summary>
 /// Refines layout of alignment between reads and reference genome by taking care of indels and rearrangements.
 /// </summary>
 /// <param name="deltaAlignmentCollection">Ordered Repeat Resolved Deltas.</param>
 private static IEnumerable <DeltaAlignment> LayoutRefinment(DeltaAlignmentCollection deltaAlignmentCollection)
 {
     return(LayoutRefiner.RefineLayout(deltaAlignmentCollection));
 }
예제 #11
0
        /// <summary>
        /// Assemble the input sequences into the largest possible contigs.
        /// </summary>
        /// <param name="referenceSequence">The sequence used as backbone for assembly.</param>
        /// <param name="queryParser">The parser to load the sequences to assemble.</param>
        /// <returns>IComparativeAssembly instance which contains list of assembled sequences.</returns>
        public IEnumerable <ISequence> Assemble(IEnumerable <ISequence> referenceSequence, FastASequencePositionParser queryParser)
        {
            this._progressTimer          = new Timer(ProgressTimerInterval);
            this._progressTimer.Elapsed += this.ProgressTimerElapsed;
            if (queryParser == null)
            {
                throw new ArgumentNullException("queryParser");
            }

            string readAlignmentOutputFilename            = null;
            string unsortedRepeatResolutionOutputFilename = null;
            string repeateResolutionOutputFilename        = null;
            string unsortedLayoutRefinmentOutputFilename  = null;
            string layoutRefinmentOutputFileName          = null;

            try
            {
                // Converting to list to avoid multiple parse of the reference file if its a yield return
                var refSequences = referenceSequence.ToList();

                // CacheSequencesForRandomAccess will ignore the call if called more than once.
                queryParser.CacheSequencesForRandomAccess();
                IEnumerable <ISequence> reads = queryParser.Parse();

                // Comparative Assembly Steps
                // 1) Read Alignment (Calling NUCmer for aligning reads to reference sequence)
                this.StatusEventStart(Properties.Resources.ReadAlignmentStarted);
                IEnumerable <DeltaAlignment> alignmentBetweenReferenceAndReads = this.ReadAlignment(refSequences,
                                                                                                    reads.Where(a => a.Count >= this.LengthOfMum));

                readAlignmentOutputFilename = Path.GetTempFileName();
                WriteDelta(alignmentBetweenReferenceAndReads, readAlignmentOutputFilename);
                this.StatusEventEnd(Properties.Resources.ReadAlignmentEnded);

                // 2) Repeat Resolution
                this.StatusEventStart(Properties.Resources.RepeatResolutionStarted);
                DeltaAlignmentSorter sorter;

                unsortedRepeatResolutionOutputFilename = Path.GetTempFileName();
                using (DeltaAlignmentCollection deltaAlignmentFromReadAlignment = new DeltaAlignmentCollection(readAlignmentOutputFilename, queryParser))
                {
                    IEnumerable <DeltaAlignment> repeatResolvedDeltas = RepeatResolution(deltaAlignmentFromReadAlignment);
                    sorter = new DeltaAlignmentSorter(refSequences[0].Count);
                    WriteUnsortedDelta(repeatResolvedDeltas, sorter, unsortedRepeatResolutionOutputFilename);
                }

                this.StatusEventEnd(Properties.Resources.RepeatResolutionEnded);
                this.StatusEventStart(Properties.Resources.SortingResolvedDeltasStarted);

                repeateResolutionOutputFilename = Path.GetTempFileName();
                WriteSortedDelta(sorter, unsortedRepeatResolutionOutputFilename, queryParser, repeateResolutionOutputFilename);
                this.StatusEventEnd(Properties.Resources.SortingResolvedDeltasEnded);

                // 3) Layout Refinement
                this.StatusEventStart(Properties.Resources.LayoutRefinementStarted);

                layoutRefinmentOutputFileName = Path.GetTempFileName();
                using (DeltaAlignmentCollection unsortedDeltaCollectionForLayoutRefinment = new DeltaAlignmentCollection(repeateResolutionOutputFilename, queryParser))
                {
                    unsortedLayoutRefinmentOutputFilename = Path.GetTempFileName();
                    IEnumerable <DeltaAlignment> layoutRefinedDeltas = LayoutRefinment(unsortedDeltaCollectionForLayoutRefinment);
                    sorter = new DeltaAlignmentSorter(refSequences[0].Count);
                    WriteUnsortedDelta(layoutRefinedDeltas, sorter, unsortedLayoutRefinmentOutputFilename);
                    WriteSortedDelta(sorter, unsortedLayoutRefinmentOutputFilename, queryParser, layoutRefinmentOutputFileName);
                }

                this.StatusEventEnd(Properties.Resources.LayoutRefinementEnded);

                // 4) Consensus Generation
                this.StatusEventStart(Properties.Resources.ConsensusGenerationStarted);
                IList <ISequence> contigs;
                using (var delta = new DeltaAlignmentCollection(layoutRefinmentOutputFileName, queryParser))
                {
                    contigs = this.ConsensusGenerator(delta).ToList();
                }
                this.StatusEventEnd(Properties.Resources.ConsensusGenerationEnded);

                if (this.ScaffoldingEnabled)
                {
                    // 5) Scaffold Generation
                    this.StatusEventStart(Properties.Resources.ScaffoldGenerationStarted);
                    IEnumerable <ISequence> scaffolds = this.ScaffoldsGenerator(contigs, reads);
                    this.StatusEventEnd(Properties.Resources.ScaffoldGenerationEnded);
                    return(scaffolds);
                }
                else
                {
                    return(contigs);
                }
            }
            finally
            {
                this._progressTimer.Stop();

                // Cleanup temp files.
                if (!string.IsNullOrEmpty(readAlignmentOutputFilename))
                {
                    File.Delete(readAlignmentOutputFilename);
                }
                if (!string.IsNullOrEmpty(unsortedRepeatResolutionOutputFilename))
                {
                    File.Delete(unsortedRepeatResolutionOutputFilename);
                }
                if (!string.IsNullOrEmpty(repeateResolutionOutputFilename))
                {
                    File.Delete(repeateResolutionOutputFilename);
                }
                if (!string.IsNullOrEmpty(unsortedLayoutRefinmentOutputFilename))
                {
                    File.Delete(unsortedLayoutRefinmentOutputFilename);
                }
                if (!string.IsNullOrEmpty(layoutRefinmentOutputFileName))
                {
                    File.Delete(layoutRefinmentOutputFileName);
                }
            }
        }
예제 #12
0
        /// <summary>
        /// Assemble the input sequences into the largest possible contigs.
        /// </summary>
        /// <param name="referenceSequence">The sequence used as backbone for assembly.</param>
        /// <param name="queryParser">The parser to load the sequences to assemble.</param>
        /// <returns>IComparativeAssembly instance which contains list of assembled sequences.</returns>
        public IEnumerable <ISequence> Assemble(IEnumerable <ISequence> referenceSequence, FastASequencePositionParser queryParser)
        {
            if (referenceSequence == null)
            {
                throw new ArgumentNullException("referenceSequence");
            }

            if (queryParser == null)
            {
                throw new ArgumentNullException("queryParser");
            }

            Stream readAlignmentOutputStream            = null;
            Stream unsortedRepeatResolutionOutputStream = null;
            Stream repeatResolutionOutputStream         = null;
            Stream unsortedLayoutRefinmentOutputStream  = null;
            Stream layoutRefinmentOutputStream          = null;

            try
            {
                // Converting to list to avoid multiple parse of the reference file if its a yield return
                var refSequences = referenceSequence.ToList();

                // CacheSequencesForRandomAccess will ignore the call if called more than once.
                queryParser.CacheSequencesForRandomAccess();
                IEnumerable <ISequence> reads = queryParser.Parse();

                // Comparative Assembly Steps
                // 1) Read Alignment (Calling NUCmer for aligning reads to reference sequence)
                this.StatusEventStart(Properties.Resource.ReadAlignmentStarted);
                IEnumerable <DeltaAlignment> alignmentBetweenReferenceAndReads = this.ReadAlignment(refSequences,
                                                                                                    reads.Where(a => a.Count >= this.LengthOfMum));

                readAlignmentOutputStream = PlatformManager.Services.CreateTempStream();
                WriteDelta(alignmentBetweenReferenceAndReads, readAlignmentOutputStream);
                this.StatusEventEnd(Properties.Resource.ReadAlignmentEnded);

                // 2) Repeat Resolution
                this.StatusEventStart(Properties.Resource.RepeatResolutionStarted);
                DeltaAlignmentSorter sorter;

                unsortedRepeatResolutionOutputStream = PlatformManager.Services.CreateTempStream();
                using (var deltaAlignmentFromReadAlignment = new DeltaAlignmentCollection(readAlignmentOutputStream, queryParser))
                {
                    IEnumerable <DeltaAlignment> repeatResolvedDeltas = RepeatResolution(deltaAlignmentFromReadAlignment);
                    sorter = new DeltaAlignmentSorter(refSequences[0].Count);
                    WriteUnsortedDelta(repeatResolvedDeltas, sorter, unsortedRepeatResolutionOutputStream);
                }

                this.StatusEventEnd(Properties.Resource.RepeatResolutionEnded);
                this.StatusEventStart(Properties.Resource.SortingResolvedDeltasStarted);

                repeatResolutionOutputStream = PlatformManager.Services.CreateTempStream();
                WriteSortedDelta(sorter, unsortedRepeatResolutionOutputStream, queryParser, repeatResolutionOutputStream);
                this.StatusEventEnd(Properties.Resource.SortingResolvedDeltasEnded);

                // 3) Layout Refinement
                this.StatusEventStart(Properties.Resource.LayoutRefinementStarted);

                layoutRefinmentOutputStream = PlatformManager.Services.CreateTempStream();
                using (var unsortedDeltaCollectionForLayoutRefinment = new DeltaAlignmentCollection(repeatResolutionOutputStream, queryParser))
                {
                    unsortedLayoutRefinmentOutputStream = PlatformManager.Services.CreateTempStream();
                    IEnumerable <DeltaAlignment> layoutRefinedDeltas = LayoutRefinment(unsortedDeltaCollectionForLayoutRefinment);
                    sorter = new DeltaAlignmentSorter(refSequences[0].Count);
                    WriteUnsortedDelta(layoutRefinedDeltas, sorter, unsortedLayoutRefinmentOutputStream);
                    WriteSortedDelta(sorter, unsortedLayoutRefinmentOutputStream, queryParser, layoutRefinmentOutputStream);
                }

                this.StatusEventEnd(Properties.Resource.LayoutRefinementEnded);

                // 4) Consensus Generation
                this.StatusEventStart(Properties.Resource.ConsensusGenerationStarted);
                IList <ISequence> contigs;
                using (var delta = new DeltaAlignmentCollection(layoutRefinmentOutputStream, queryParser))
                {
                    contigs = this.ConsensusGenerator(delta).ToList();
                }
                this.StatusEventEnd(Properties.Resource.ConsensusGenerationEnded);

                if (this.ScaffoldingEnabled)
                {
                    // 5) Scaffold Generation
                    this.StatusEventStart(Properties.Resource.ScaffoldGenerationStarted);
                    IEnumerable <ISequence> scaffolds = this.ScaffoldsGenerator(contigs, reads);
                    this.StatusEventEnd(Properties.Resource.ScaffoldGenerationEnded);
                    return(scaffolds);
                }
                else
                {
                    return(contigs);
                }
            }
            finally
            {
                // Cleanup temp files.
                if (readAlignmentOutputStream != null)
                {
                    readAlignmentOutputStream.Dispose();
                }
                if (unsortedRepeatResolutionOutputStream != null)
                {
                    unsortedRepeatResolutionOutputStream.Dispose();
                }
                if (repeatResolutionOutputStream != null)
                {
                    repeatResolutionOutputStream.Dispose();
                }
                if (unsortedLayoutRefinmentOutputStream != null)
                {
                    unsortedLayoutRefinmentOutputStream.Dispose();
                }
                if (layoutRefinmentOutputStream != null)
                {
                    layoutRefinmentOutputStream.Dispose();
                }
            }
        }