public IList <long> GetDeltas(SequenceModel[] sequenceData)
        {
            var sequences = sequenceData.Select(sequence => _provider.Provide(sequence?.FileName, sequence?.Content).First()).ToList();

            _aligner = new DeltaAlignment(sequences.First(), sequences.Last());
            return(_aligner.Deltas);
        }
Esempio n. 2
0
        /// <summary>
        /// Writes delta for query sequences.
        /// </summary>
        /// <param name="sorter">Sorter instance.</param>
        /// <param name="unsortedDeltaFilename">Unsorted Delta Filename.</param>
        /// <param name="queryParser">Query/read sequences parser.</param>
        /// <param name="outputfilename">Output file name.</param>
        public static void WriteSortedDelta(DeltaAlignmentSorter sorter, string unsortedDeltaFilename, FastASequencePositionParser queryParser, string outputfilename)
        {
            if (sorter == null)
            {
                throw new ArgumentNullException("sorter");
            }

            using (DeltaAlignmentParser unsortedDeltaParser = new DeltaAlignmentParser(unsortedDeltaFilename, queryParser))
            {
                using (StreamWriter writer = new StreamWriter(outputfilename))
                {
                    long deltaPositionInFile = 0;
                    foreach (long id in sorter.GetSortedIds())
                    {
                        DeltaAlignment deltaAlignment = unsortedDeltaParser.GetDeltaAlignmentAt(id);
                        deltaAlignment.Id = deltaPositionInFile;
                        string deltaString = Helper.GetString(deltaAlignment);
                        deltaPositionInFile += deltaString.Length;
                        writer.Write(deltaString);
                    }

                    writer.Flush();
                }
            }
        }
        public bool IsReverseQueryDirection(SequenceModel[] sequenceData)
        {
            var sequences = sequenceData.Select(sequence => _provider.Provide(sequence?.FileName, sequence?.Content).First()).ToList();

            _aligner = new DeltaAlignment(sequences.First(), sequences.Last());
            return(_aligner.IsReverseQueryDirection);
        }
Esempio n. 4
0
        public void ValidateDeltaAlignmentToString()
        {
            ISequence refSeq = new Sequence(Alphabets.DNA, "ATCGGGGGGGGAAAAAAATTTTCCCCGGGGG");
            ISequence qrySeq = new Sequence(Alphabets.DNA, "GGGGG");
            var       delta  = new DeltaAlignment(refSeq, qrySeq)
            {
                FirstSequenceEnd = 21, SecondSequenceEnd = 20
            };

            string actualString   = delta.ToString();
            string expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.DeltaAlignmentExpectedNode);

            Assert.AreEqual(expectedString, actualString);

            // Gets the expected sequence from the Xml
            List <ISequence> seqsList;
            string           filePath = this.utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.FilePathNode);

            using (var reader = File.OpenRead(filePath))
            {
                var parser = new FastAParser();
                {
                    parser.Alphabet = Alphabets.Protein;
                    seqsList        = parser.Parse(reader).ToList();
                }
            }

            delta = new DeltaAlignment(seqsList[0], qrySeq)
            {
                FirstSequenceEnd = 21, SecondSequenceStart = 20, QueryDirection = Cluster.ReverseDirection
            };
            actualString   = delta.ToString();
            expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.DeltaAlignmentExpected2Node);
            Assert.AreEqual(expectedString, actualString);
        }
Esempio n. 5
0
        /// <summary>
        /// Writes delta for query sequences.
        /// </summary>
        /// <param name="delta">The Deltas.</param>
        private void WriteDelta(
            DeltaAlignmentSorter sorter)
        {
            FastASequencePositionParser sequenceParser      = null;
            DeltaAlignmentParser        unsortedDeltaParser = null;

            TextWriter   textWriterConsoleOutSave = Console.Out;
            StreamWriter streamWriterConsoleOut   = null;

            try
            {
                sequenceParser      = new FastASequencePositionParser(this.FilePath[1], true);
                unsortedDeltaParser = new DeltaAlignmentParser(UnsortedDeltaFile, sequenceParser);
                if (!string.IsNullOrEmpty(this.OutputFile))
                {
                    streamWriterConsoleOut = new StreamWriter(this.OutputFile);
                    Console.SetOut(streamWriterConsoleOut);
                }

                long deltaPositionInFile = 0;

                foreach (long id in sorter.GetSortedIds())
                {
                    DeltaAlignment deltaAlignment = unsortedDeltaParser.GetDeltaAlignmentAt(id);

                    deltaAlignment.Id = deltaPositionInFile;
                    string deltaString = Helper.GetString(deltaAlignment);
                    deltaPositionInFile += deltaString.Length;
                    Console.Write(deltaString);
                }

                Console.Out.Flush();
            }
            finally
            {
                if (streamWriterConsoleOut != null)
                {
                    streamWriterConsoleOut.Dispose();
                    streamWriterConsoleOut = null;
                }

                if (sequenceParser != null)
                {
                    sequenceParser.Dispose();
                    sequenceParser = null;
                }

                if (unsortedDeltaParser != null)
                {
                    unsortedDeltaParser.Dispose();
                    unsortedDeltaParser = null;
                }

                Console.SetOut(textWriterConsoleOutSave);
            }
        }
Esempio n. 6
0
        public void TestDeltaAlignmentToString()
        {
            ISequence      refSeq = new Sequence(Alphabets.DNA, "ATCGGGGGGGGAAAAAAATTTTCCCCGGGGG");
            ISequence      qrySeq = new Sequence(Alphabets.DNA, "GGGGG");
            DeltaAlignment delta  = new DeltaAlignment(refSeq, qrySeq);

            delta.FirstSequenceEnd  = 21;
            delta.SecondSequenceEnd = 20;
            string actualString   = delta.ToString();
            string expectedString = "Ref ID= Query Id= Ref start=0 Ref End=21 Query start=0 Query End=20, Direction=FORWARD";

            Assert.AreEqual(actualString, expectedString);
        }
Esempio n. 7
0
            /// <summary>
            /// Unloads a cache window.
            /// </summary>
            /// <returns>Returns unloaded deltas.</returns>
            private List <DeltaAlignment> Unload()
            {
                List <DeltaAlignment> unloadedDeltas = new List <DeltaAlignment>();

                for (int i = 0; i < windowSize; i++)
                {
                    DeltaAlignment delta = this.catchedDeltas[i];
                    unloadedDeltas.Add(delta);
                }

                this.catchedDeltas.RemoveRange(0, windowSize);
                startIndexInCatchedList += windowSize;
                return(unloadedDeltas);
            }
Esempio n. 8
0
        public void ValidateDeltaAlignmentToString()
        {
            ISequence      refSeq = new Sequence(Alphabets.DNA, "ATCGGGGGGGGAAAAAAATTTTCCCCGGGGG");
            ISequence      qrySeq = new Sequence(Alphabets.DNA, "GGGGG");
            DeltaAlignment delta  = new DeltaAlignment(refSeq, qrySeq);

            delta.FirstSequenceEnd  = 21;
            delta.SecondSequenceEnd = 20;
            string actualString   = delta.ToString();
            string expectedString = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                    Constants.DeltaAlignmentExpectedNode);

            Assert.AreEqual(actualString, expectedString);

            // Gets the expected sequence from the Xml
            List <ISequence>        seqsList;
            IEnumerable <ISequence> sequences = null;
            string filePath = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                              Constants.FilePathNode);

            using (StreamReader reader = new StreamReader(filePath))
            {
                using (FastAParser parser = new FastAParser())
                {
                    parser.Alphabet = Alphabets.Protein;
                    sequences       = parser.Parse(reader);

                    //Create a list of sequences.
                    seqsList = sequences.ToList();
                }
            }

            DeltaAlignment delta1 = new DeltaAlignment(seqsList[0], qrySeq);

            delta1.FirstSequenceEnd  = 21;
            delta1.SecondSequenceEnd = 20;
            string actualString1   = delta1.ToString();
            string expectedString1 = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                     Constants.DeltaAlignmentExpected2Node);

            Assert.AreEqual(expectedString1, actualString1);
        }
Esempio n. 9
0
        /// <summary>
        /// Resolve repeats between two sets of deltas coming from paired reads
        /// </summary>
        /// <param name="curReadDeltas">Deltas from a read</param>
        /// <param name="mateDeltas">Deltas from mate pair</param>
        /// <returns>Selected delta out of all given deltas</returns>
        private static List <DeltaAlignment> ResolveRepeatUsingMatePair(List <DeltaAlignment> curReadDeltas, List <DeltaAlignment> mateDeltas, string libraryName)
        {
            // Check if all mate pairs are completly aligned, else return null (cannot resolve)
            if (mateDeltas.Any(a =>
            {
                return(a.SecondSequenceEnd != a.QuerySequence.Count - 1);
            }))
            {
                return(null);
            }

            // Get clone library information
            CloneLibraryInformation libraryInfo = CloneLibrary.Instance.GetLibraryInformation(libraryName);
            float mean         = libraryInfo.MeanLengthOfInsert;
            float stdDeviation = libraryInfo.StandardDeviationOfInsert;

            // Find delta with a matching distance.
            for (int indexFR = 0; indexFR < curReadDeltas.Count; indexFR++)
            {
                DeltaAlignment pair1 = curReadDeltas[indexFR];
                for (int indexRR = 0; indexRR < mateDeltas.Count; indexRR++)
                {
                    DeltaAlignment pair2    = mateDeltas[indexRR];
                    long           distance = Math.Abs(pair1.FirstSequenceStart - pair2.FirstSequenceEnd);

                    // Find delta with matching distance.
                    if (distance - mean <= stdDeviation)
                    {
                        List <DeltaAlignment> resolvedDeltas = new List <DeltaAlignment>(2);

                        resolvedDeltas.Add(pair1);
                        resolvedDeltas.Add(pair2);

                        return(resolvedDeltas);
                    }
                }
            }

            return(null);
        }
Esempio n. 10
0
        /// <summary>
        /// Writes delta for query sequences.
        /// </summary>
        /// <param name="sorter">Sorter instance.</param>
        /// <param name="unsortedDeltaFilename">Unsorted Delta Filename.</param>
        /// <param name="queryFilename">Query/read sequences filename.</param>
        /// <param name="outputfilename">Output file name.</param>
        private static void WriteSortedDelta(DeltaAlignmentSorter sorter, string unsortedDeltaFilename, FastASequencePositionParser queryParser, string outputfilename)
        {
            using (DeltaAlignmentParser unsortedDeltaParser = new DeltaAlignmentParser(unsortedDeltaFilename, queryParser))
            {
                TextWriter   textWriterConsoleOutSave = Console.Out;
                StreamWriter streamWriterConsoleOut   = null;
                try
                {
                    if (!string.IsNullOrEmpty(outputfilename))
                    {
                        streamWriterConsoleOut = new StreamWriter(outputfilename);
                        Console.SetOut(streamWriterConsoleOut);
                    }

                    long deltaPositionInFile = 0;
                    foreach (long id in sorter.GetSortedIds())
                    {
                        DeltaAlignment deltaAlignment = unsortedDeltaParser.GetDeltaAlignmentAt(id);
                        deltaAlignment.Id = deltaPositionInFile;
                        string deltaString = Helper.GetString(deltaAlignment);
                        deltaPositionInFile += deltaString.Length;
                        Console.Write(deltaString);
                    }

                    Console.Out.Flush();
                }
                finally
                {
                    if (streamWriterConsoleOut != null)
                    {
                        streamWriterConsoleOut.Dispose();
                        streamWriterConsoleOut = null;
                    }

                    Console.SetOut(textWriterConsoleOutSave);
                }
            }
        }
Esempio n. 11
0
        /// <summary>
        /// Gets the DeltaAlignment at specified position of the file.
        /// </summary>
        /// <param name="position">Position at which delta alignment is required.</param>
        /// <returns>Delta alignment.</returns>
        public DeltaAlignment GetDeltaAlignmentAt(long position)
        {
            bool skipBlankLine = true;

            if (this.deltaFileReader == null)
            {
                this.deltaFileReader = new StreamReader(new FileStream(this.DeltaFilename, FileMode.Open, FileAccess.Read));
            }

            this.deltaFileReader.BaseStream.Position = position;
            this.deltaFileReader.DiscardBufferedData();

            long   deltaPosition = -1;
            string line          = ReadNextLine(this.deltaFileReader);

            if (line == null || !line.StartsWith("@", StringComparison.OrdinalIgnoreCase))
            {
                throw new FormatException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.CorruptedDeltaAlignmentFile, position, this.DeltaFilename));
            }

            deltaPosition = long.Parse(line.Substring(1), CultureInfo.InvariantCulture);
            if (position != deltaPosition)
            {
                throw new FormatException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.DeltaAlignmentIDDoesnotMatch, deltaPosition, position, this.DeltaFilename));
            }

            line = ReadNextLine(this.deltaFileReader);
            if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
            {
                string message = string.Format(
                    CultureInfo.InvariantCulture,
                    Properties.Resource.INVALID_INPUT_FILE,
                    this.DeltaFilename);

                throw new FileFormatException(message);
            }

            string referenceId = line.Substring(1);

            // Read next line.
            line = ReadNextLine(this.deltaFileReader);

            // Second line - Query sequence id
            string queryId = line;

            // fetch the query sequence from the query file
            ISequence querySequence = null;
            Sequence  refEmpty      = null;

            if (!string.IsNullOrEmpty(queryId))
            {
                long sequencePosition = long.Parse(queryId.Substring(queryId.LastIndexOf("@", StringComparison.Ordinal) + 1), CultureInfo.InvariantCulture);
                querySequence = this.QueryParser.GetSequenceAt(sequencePosition);
                refEmpty      = new Sequence(querySequence.Alphabet, "A", false);
                refEmpty.ID   = referenceId;
            }

            DeltaAlignment deltaAlignment = new DeltaAlignment(refEmpty, querySequence);

            deltaAlignment.Id = deltaPosition;
            line = ReadNextLine(this.deltaFileReader);
            string[] deltaAlignmentProperties = line.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
            if (deltaAlignmentProperties != null && deltaAlignmentProperties.Length == 7)
            {
                long temp;
                deltaAlignment.FirstSequenceStart  = long.TryParse(deltaAlignmentProperties[0], out temp) ? temp : 0;
                deltaAlignment.FirstSequenceEnd    = long.TryParse(deltaAlignmentProperties[1], out temp) ? temp : 0;
                deltaAlignment.SecondSequenceStart = long.TryParse(deltaAlignmentProperties[2], out temp) ? temp : 0;
                deltaAlignment.SecondSequenceEnd   = long.TryParse(deltaAlignmentProperties[3], out temp) ? temp : 0;
                int error;
                deltaAlignment.Errors           = int.TryParse(deltaAlignmentProperties[4], out error) ? error : 0;
                deltaAlignment.SimilarityErrors = int.TryParse(deltaAlignmentProperties[5], out error) ? error : 0;
                deltaAlignment.NonAlphas        = int.TryParse(deltaAlignmentProperties[6], out error) ? error : 0;
            }

            // Fifth line - either a 0 - marks the end of the delta alignment or they are deltas
            while (line != null && !line.StartsWith("*", StringComparison.OrdinalIgnoreCase))
            {
                long temp;
                if (long.TryParse(line, out temp))
                {
                    deltaAlignment.Deltas.Add(temp);
                }

                // Read next line.
                line = this.deltaFileReader.ReadLine();

                // Continue reading if blank line found.
                while (skipBlankLine && line != null && string.IsNullOrEmpty(line))
                {
                    line = this.deltaFileReader.ReadLine();
                }
            }

            return(deltaAlignment);
        }
Esempio n. 12
0
        /// <summary>
        /// Refines alignment layout by taking in consideration indels (insertions and deletions) and rearrangements between two genomes.
        /// Requires mate-pair information to resolve ambiguity.
        /// </summary>
        /// <param name="orderedDeltas">Order deltas.</param>
        public static void RefineLayout(IList <DeltaAlignment> orderedDeltas)
        {
            if (orderedDeltas == null)
            {
                throw new ArgumentNullException("orderedDeltas");
            }

            if (orderedDeltas.Count == 0)
            {
                return;
            }

            List <DeltaAlignment> deltasOverlappingAtCurrentIndex = new List <DeltaAlignment>();

            long currentProcessedOffset = 0;

            deltasOverlappingAtCurrentIndex.Add(orderedDeltas[0]);
            DeltaAlignment deltaWithLargestEndIndex = orderedDeltas[0];

            for (int currentIndex = 0; currentIndex < orderedDeltas.Count - 1; currentIndex++)
            {
                DeltaAlignment nextDelta = orderedDeltas[currentIndex + 1];
                nextDelta.FirstSequenceStart += currentProcessedOffset;
                nextDelta.FirstSequenceEnd   += currentProcessedOffset;
                // Check if next delta is just adjacent
                if (nextDelta.FirstSequenceStart - 1 == deltaWithLargestEndIndex.FirstSequenceEnd)
                {
                    // If next delta is adjacent there is a possible insertion in target (deletion in reference)
                    // Try to extend the deltas from both sides and make them meet
                    List <DeltaAlignment> leftSideDeltas = deltasOverlappingAtCurrentIndex.Where(a => a.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd).ToList();

                    // Find all deltas starting at the adjacent right side
                    List <DeltaAlignment> rightSideDeltas = new List <DeltaAlignment>(4);
                    rightSideDeltas.AddRange(orderedDeltas.Skip(currentIndex + 1).TakeWhile(a => a.FirstSequenceStart == nextDelta.FirstSequenceStart));

                    long offset = ExtendDeltas(leftSideDeltas, rightSideDeltas);

                    nextDelta.FirstSequenceStart += offset;
                    nextDelta.FirstSequenceEnd   += offset;
                    currentProcessedOffset       += offset;
                }
                else
                if (nextDelta.FirstSequenceStart <= deltaWithLargestEndIndex.FirstSequenceEnd)
                {
                    // Check if next delta overlaps with current overlap group
                    deltasOverlappingAtCurrentIndex.Add(nextDelta);

                    // Check if nextDelta is reaching farther than the current farthest delta
                    if (nextDelta.FirstSequenceEnd > deltaWithLargestEndIndex.FirstSequenceEnd)
                    {
                        deltaWithLargestEndIndex = nextDelta;
                    }
                }
                else
                {
                    // No overlap with nextDelta, so there is a gap at the end of deltaWithLargestEndIndex
                    // Try fix insertion in reference by pulling together two ends of deltas on both sides of the gap
                    List <DeltaAlignment> leftSideDeltas = deltasOverlappingAtCurrentIndex.Where(a => a.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd).ToList();

                    // Find all deltas starting at the right end of the gap
                    List <DeltaAlignment> rightSideDeltas = new List <DeltaAlignment>(4);
                    rightSideDeltas.AddRange(orderedDeltas.Skip(currentIndex + 1).TakeWhile(a => a.FirstSequenceStart == nextDelta.FirstSequenceStart));

                    int score = 0;
                    foreach (var l in leftSideDeltas)
                    {
                        foreach (var r in rightSideDeltas)
                        {
                            if (object.ReferenceEquals(l.QuerySequence, r.QuerySequence))
                            {
                                score++;
                                break;
                            }
                            else
                            {
                                score--;
                            }
                        }
                    }

                    // Score > 0 means most deltas share same query sequence at both ends, so close this gap
                    if (score > 0)
                    {
                        long gaplength = (nextDelta.FirstSequenceStart - deltaWithLargestEndIndex.FirstSequenceEnd) - 1;
                        currentProcessedOffset -= gaplength;

                        // Pull deltas on right side to close the gap
                        foreach (DeltaAlignment delta in rightSideDeltas)
                        {
                            delta.FirstSequenceStart -= gaplength;
                            delta.FirstSequenceEnd   -= gaplength;
                        }
                    }

                    // Start a new group from the right side of the gap
                    deltaWithLargestEndIndex = nextDelta;
                    deltasOverlappingAtCurrentIndex.Clear();
                    deltasOverlappingAtCurrentIndex.Add(nextDelta);
                }
            }
        }
Esempio n. 13
0
        /// <summary>
        /// Starts parsing from the specified StreamReader.
        /// </summary>
        /// <param name="streamReader">Stream reader to parse.</param>
        /// <returns>IEnumerable of DeltaAlignments.</returns>
        private IEnumerable <DeltaAlignment> ParseFrom(StreamReader streamReader)
        {
            this.parsingReaders.Add(streamReader);

            string    lastReadQuerySequenceId = string.Empty;
            ISequence sequence = null;

            if (streamReader.EndOfStream)
            {
                throw new Exception(Properties.Resource.INVALID_INPUT_FILE);
            }

            string line = ReadNextLine(streamReader);

            do
            {
                if (line == null || !line.StartsWith("@", StringComparison.OrdinalIgnoreCase))
                {
                    throw new Exception(Properties.Resource.INVALID_INPUT_FILE);
                }

                long deltaPosition = long.Parse(line.Substring(1));
                line = ReadNextLine(streamReader);
                if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
                {
                    throw new Exception(Properties.Resource.INVALID_INPUT_FILE);
                }

                DeltaAlignment deltaAlignment = null;

                // First line - reference id
                string referenceId = line.Substring(1);

                // Read next line.
                line = ReadNextLine(streamReader);

                // Second line - Query sequence id
                string queryId = line;

                // fetch the query sequence from the query file
                if (!string.IsNullOrEmpty(queryId))
                {
                    if (queryId != lastReadQuerySequenceId)
                    {
                        // Get the id and remove any alphas - this can happen because the delta might
                        // have "Reverse" appended to it when it's a reversed sequence.
                        string id  = queryId.Substring(queryId.LastIndexOf('@') + 1);
                        int    idx = Array.FindIndex(id.ToCharArray(), c => !Char.IsDigit(c));
                        if (idx > 0)
                        {
                            id = id.Substring(0, idx);
                        }

                        long seqPosition = long.Parse(id, CultureInfo.InvariantCulture);
                        sequence = this.QueryParser.GetSequenceAt(seqPosition);
                        lastReadQuerySequenceId = queryId;
                    }

                    Sequence refEmpty = new Sequence(sequence.Alphabet, "A", false)
                    {
                        ID = referenceId
                    };
                    deltaAlignment = new DeltaAlignment(refEmpty, sequence);
                }

                deltaAlignment.Id = deltaPosition;
                // Fourth line - properties of delta alignment
                // Read next line.
                line = ReadNextLine(streamReader);

                string[] deltaAlignmentProperties = line.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
                if (deltaAlignmentProperties != null && deltaAlignmentProperties.Length == 7)
                {
                    long temp;
                    deltaAlignment.FirstSequenceStart  = long.TryParse(deltaAlignmentProperties[0], out temp) ? temp : 0;
                    deltaAlignment.FirstSequenceEnd    = long.TryParse(deltaAlignmentProperties[1], out temp) ? temp : 0;
                    deltaAlignment.SecondSequenceStart = long.TryParse(deltaAlignmentProperties[2], out temp) ? temp : 0;
                    deltaAlignment.SecondSequenceEnd   = long.TryParse(deltaAlignmentProperties[3], out temp) ? temp : 0;

                    // Look for a reversed sequence
                    if (deltaAlignment.SecondSequenceEnd < deltaAlignment.SecondSequenceStart)
                    {
                        temp = deltaAlignment.SecondSequenceEnd;
                        deltaAlignment.SecondSequenceEnd   = deltaAlignment.SecondSequenceStart;
                        deltaAlignment.SecondSequenceStart = temp;
                        deltaAlignment.QueryDirection      = Cluster.ReverseDirection;
                    }

                    int error;
                    deltaAlignment.Errors           = int.TryParse(deltaAlignmentProperties[4], out error) ? error : 0;
                    deltaAlignment.SimilarityErrors = int.TryParse(deltaAlignmentProperties[5], out error) ? error : 0;
                    deltaAlignment.NonAlphas        = int.TryParse(deltaAlignmentProperties[6], out error) ? error : 0;
                }

                // Fifth line - either a 0 - marks the end of the delta alignment or they are deltas
                while (line != null && !line.StartsWith("*", StringComparison.OrdinalIgnoreCase))
                {
                    long temp;
                    if (long.TryParse(line, out temp))
                    {
                        deltaAlignment.Deltas.Add(temp);
                    }

                    // Read next line.
                    line = streamReader.ReadLine();

                    // Continue reading if blank line found.
                    while (line != null && string.IsNullOrEmpty(line))
                    {
                        line = streamReader.ReadLine();
                    }
                }

                yield return(deltaAlignment);

                // Read the next line
                line = streamReader.ReadLine();
            }while (line != null);
        }
Esempio n. 14
0
        /// <summary>
        /// Gets the DeltaAlignment at specified position of the file.
        /// </summary>
        /// <param name="position">Position at which delta alignment is required.</param>
        /// <returns>Delta alignment.</returns>
        public DeltaAlignment GetDeltaAlignmentAt(long position)
        {
            using (var reader = this.deltaStream.OpenRead())
            {
                long   deltaPosition = -1;
                string line          = ReadNextLine(reader);
                if (line == null || !line.StartsWith("@", StringComparison.OrdinalIgnoreCase))
                {
                    throw new FormatException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.CorruptedDeltaAlignmentFile, position));
                }

                deltaPosition = long.Parse(line.Substring(1), CultureInfo.InvariantCulture);
                if (position != deltaPosition)
                {
                    throw new FormatException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.DeltaAlignmentIDDoesnotMatch, deltaPosition, position));
                }

                line = ReadNextLine(reader);
                if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
                {
                    throw new Exception(Properties.Resource.INVALID_INPUT_FILE);
                }

                string referenceId = line.Substring(1);

                // Read next line.
                line = ReadNextLine(reader);

                // Second line - Query sequence id
                string queryId = line;

                // fetch the query sequence from the query file
                ISequence querySequence = null;
                Sequence  refEmpty      = null;

                if (!string.IsNullOrEmpty(queryId))
                {
                    // Get the id and remove any alphas - this can happen because the delta might
                    // have "Reverse" appended to it when it's a reversed sequence.
                    string id  = queryId.Substring(queryId.LastIndexOf('@') + 1);
                    int    idx = Array.FindIndex(id.ToCharArray(), c => !Char.IsDigit(c));
                    if (idx > 0)
                    {
                        id = id.Substring(0, idx);
                    }

                    long sequencePosition = long.Parse(id, CultureInfo.InvariantCulture);
                    querySequence = this.QueryParser.GetSequenceAt(sequencePosition);
                    refEmpty      = new Sequence(querySequence.Alphabet, "A", false)
                    {
                        ID = referenceId
                    };
                }

                DeltaAlignment deltaAlignment = new DeltaAlignment(refEmpty, querySequence)
                {
                    Id = deltaPosition
                };
                line = ReadNextLine(reader);
                string[] deltaAlignmentProperties = line.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
                if (deltaAlignmentProperties != null && deltaAlignmentProperties.Length == 7)
                {
                    long temp;
                    deltaAlignment.FirstSequenceStart  = long.TryParse(deltaAlignmentProperties[0], out temp) ? temp : 0;
                    deltaAlignment.FirstSequenceEnd    = long.TryParse(deltaAlignmentProperties[1], out temp) ? temp : 0;
                    deltaAlignment.SecondSequenceStart = long.TryParse(deltaAlignmentProperties[2], out temp) ? temp : 0;
                    deltaAlignment.SecondSequenceEnd   = long.TryParse(deltaAlignmentProperties[3], out temp) ? temp : 0;

                    // Look for a reversed sequence
                    if (deltaAlignment.SecondSequenceEnd < deltaAlignment.SecondSequenceStart)
                    {
                        temp = deltaAlignment.SecondSequenceEnd;
                        deltaAlignment.SecondSequenceEnd   = deltaAlignment.SecondSequenceStart;
                        deltaAlignment.SecondSequenceStart = temp;
                        deltaAlignment.QueryDirection      = Cluster.ReverseDirection;
                    }

                    int error;
                    deltaAlignment.Errors           = int.TryParse(deltaAlignmentProperties[4], out error) ? error : 0;
                    deltaAlignment.SimilarityErrors = int.TryParse(deltaAlignmentProperties[5], out error) ? error : 0;
                    deltaAlignment.NonAlphas        = int.TryParse(deltaAlignmentProperties[6], out error) ? error : 0;
                }

                // Fifth line - either a 0 - marks the end of the delta alignment or they are deltas
                while (line != null && !line.StartsWith("*", StringComparison.OrdinalIgnoreCase))
                {
                    long temp;
                    if (long.TryParse(line, out temp))
                    {
                        deltaAlignment.Deltas.Add(temp);
                    }

                    // Read next line.
                    line = reader.ReadLine();

                    // Continue reading if blank line found.
                    while (line != null && string.IsNullOrEmpty(line))
                    {
                        line = reader.ReadLine();
                    }
                }
                return(deltaAlignment);
            }
        }
Esempio n. 15
0
        /// <summary>
        /// Returns an IEnumerable of DeltaAlignment in the file being parsed.
        /// </summary>
        /// <returns>Returns DeltaAlignment collection.</returns>
        public IList <IEnumerable <DeltaAlignment> > Parse()
        {
            bool skipBlankLine     = true;
            int  currentBufferSize = BufferSize;

            byte[]    buffer   = new byte[currentBufferSize];
            IAlphabet alphabet = null;
            List <IEnumerable <DeltaAlignment> > result = new List <IEnumerable <DeltaAlignment> >();
            IList <DeltaAlignment> deltaAlignments      = new List <DeltaAlignment>();
            string message = string.Empty;

            using (StreamReader streamReader = new StreamReader(this.Filename))
            {
                if (streamReader.EndOfStream)
                {
                    message = string.Format(
                        CultureInfo.InvariantCulture,
                        Resources.INVALID_INPUT_FILE,
                        Resources.Parser_Name);

                    throw new FileFormatException(message);
                }

                ReadNextLine(streamReader);

                do
                {
                    if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
                    {
                        message = string.Format(
                            CultureInfo.InvariantCulture,
                            Resources.INVALID_INPUT_FILE,
                            Resources.Parser_Name);

                        throw new FileFormatException(message);
                    }

                    //First line - reference id
                    string referenceId    = line.Substring(1);
                    int    bufferPosition = 0;

                    // Read next line.
                    ReadNextLine(streamReader);

                    //Second line - Query sequence id
                    string queryId = line;

                    //third line - query sequence
                    // Read next line.
                    ReadNextLine(streamReader);

                    // For large files copy the data in memory mapped file.
                    if ((((long)bufferPosition + line.Length) >= MaximumSequenceLength))
                    {
                        throw new ArgumentOutOfRangeException(
                                  string.Format(CultureInfo.CurrentUICulture, Resources.SequenceDataGreaterthan2GB, queryId));
                    }

                    if (((bufferPosition + line.Length) >= currentBufferSize))
                    {
                        Array.Resize <byte>(ref buffer, buffer.Length + BufferSize);
                        currentBufferSize += BufferSize;
                    }

                    byte[] symbols = ASCIIEncoding.ASCII.GetBytes(line);

                    // Array.Copy -- for performance improvement.
                    Array.Copy(symbols, 0, buffer, bufferPosition, symbols.Length);

                    alphabet = Alphabets.AutoDetectAlphabet(buffer, bufferPosition, bufferPosition + line.Length, alphabet);
                    if (alphabet == null)
                    {
                        throw new FileFormatException(string.Format(Resources.InvalidSymbolInString, line));
                    }

                    bufferPosition += line.Length;

                    // Truncate buffer to remove trailing 0's

                    byte[] tmpBuffer = new byte[bufferPosition];
                    Array.Copy(buffer, tmpBuffer, bufferPosition);

                    Sequence sequence = null;

                    // In memory sequence
                    sequence    = new Sequence(alphabet, tmpBuffer, false);
                    sequence.ID = queryId;

                    Sequence refEmpty = new Sequence(sequence.Alphabet, "A", false);
                    refEmpty.ID = referenceId;

                    DeltaAlignment deltaAlignment = new DeltaAlignment(refEmpty, sequence);

                    //Fourth line - properties of deltaalignment
                    // Read next line.
                    ReadNextLine(streamReader);

                    string[] deltaAlignmentProperties = line.Split(' ');
                    if (deltaAlignmentProperties != null && deltaAlignmentProperties.Length == 7)
                    {
                        long temp;
                        deltaAlignment.FirstSequenceStart  = long.TryParse(deltaAlignmentProperties[0], out temp) ? temp : 0;
                        deltaAlignment.FirstSequenceEnd    = long.TryParse(deltaAlignmentProperties[1], out temp) ? temp : 0;
                        deltaAlignment.SecondSequenceStart = long.TryParse(deltaAlignmentProperties[2], out temp) ? temp : 0;
                        deltaAlignment.SecondSequenceEnd   = long.TryParse(deltaAlignmentProperties[3], out temp) ? temp : 0;
                        int error;
                        deltaAlignment.Errors           = int.TryParse(deltaAlignmentProperties[4], out error) ? error : 0;
                        deltaAlignment.SimilarityErrors = int.TryParse(deltaAlignmentProperties[5], out error) ? error : 0;
                        deltaAlignment.NonAlphas        = int.TryParse(deltaAlignmentProperties[6], out error) ? error : 0;
                    }

                    //Fifth line - either a 0 - marks the end of the delta alignment or they are deltas
                    while (line != null && !line.StartsWith("*", StringComparison.OrdinalIgnoreCase))
                    {
                        long temp;
                        if (long.TryParse(line, out temp))
                        {
                            deltaAlignment.Deltas.Add(temp);
                        }
                        // Read next line.
                        line = streamReader.ReadLine();

                        // Continue reading if blank line found.
                        while (skipBlankLine && line != null && string.IsNullOrEmpty(line))
                        {
                            line = streamReader.ReadLine();
                        }
                    }

                    deltaAlignments.Add(deltaAlignment);
                    //Read the next line
                    line = streamReader.ReadLine();
                    if (line.StartsWith("--", StringComparison.OrdinalIgnoreCase))
                    {
                        result.Add(deltaAlignments);
                        //clear the inner list
                        deltaAlignments = new List <DeltaAlignment>();

                        //skip until the next valid delta is found
                        do
                        {
                            line = streamReader.ReadLine();
                        }while (line != null && line.StartsWith("--", StringComparison.OrdinalIgnoreCase));
                    }
                }while (line != null);
            }
            return(result);
        }
Esempio n. 16
0
        /// <summary>
        /// Reads ambiguously placed due to genomic reads.
        /// This step requires mate pair information to resolve the ambiguity about placements of repeated sequences.
        /// </summary>
        /// <param name="alignmentBetweenReferenceAndReads">Alignment between reference genome and reads.</param>
        /// <returns>List of DeltaAlignments after resolving repeating reads.</returns>
        public static List <DeltaAlignment> ResolveAmbiguity(IList <IEnumerable <DeltaAlignment> > alignmentBetweenReferenceAndReads)
        {
            if (alignmentBetweenReferenceAndReads == null)
            {
                throw new ArgumentNullException("alignmentBetweenReferenceAndReads");
            }

            List <DeltaAlignment> result = new List <DeltaAlignment>();
            List <IEnumerable <DeltaAlignment> > readDeltas = alignmentBetweenReferenceAndReads.ToList();

            // Process reads and add to result list.
            // Loop till all reads are processed
            while (readDeltas.Count > 0)
            {
                IEnumerable <DeltaAlignment> curReadDeltas = readDeltas[0];
                readDeltas.RemoveAt(0); // remove currently processing item from the list

                // If curReadDeltas has only one delta, then there are no repeats so add it to result
                // Or if any delta is a partial alignment, dont try to resolve, add all deltas to result
                if (curReadDeltas.Count() == 1 || curReadDeltas.Any(a =>
                {
                    return(a.SecondSequenceEnd != a.QuerySequence.Count - 1);
                }))
                {
                    foreach (DeltaAlignment curDelta in curReadDeltas)
                    {
                        result.Add(curDelta);
                    }
                }
                else
                {
                    // Resolve repeats
                    DeltaAlignment firstDelta   = curReadDeltas.ElementAt(0);
                    string[]       readMetadata = firstDelta.QuerySequence.ID.Split('.', ':');

                    // If read is not having proper ID, ignore the read
                    if (readMetadata.Length != 3 || (readMetadata[1] != "F" && readMetadata[1] != "R"))
                    {
                        foreach (DeltaAlignment curDelta in curReadDeltas)
                        {
                            result.Add(curDelta);
                        }

                        continue;
                    }

                    // Find mate pair
                    IEnumerable <DeltaAlignment> mateDeltas = alignmentBetweenReferenceAndReads.FirstOrDefault(a =>
                    {
                        string[] matepairMetadata = a.ElementAt(0).QuerySequence.ID.Split('.', ':');
                        if (matepairMetadata.Length == 3 &&
                            matepairMetadata[0] == readMetadata[0] && matepairMetadata[2] == readMetadata[2] &&
                            matepairMetadata[1] == (readMetadata[1] == "F" ? "R" : "F"))
                        {
                            return(true);
                        }
                        else
                        {
                            return(false);
                        }
                    });

                    // If mate pair not found, ignore current read
                    if (mateDeltas == null)
                    {
                        foreach (DeltaAlignment curDelta in curReadDeltas)
                        {
                            result.Add(curDelta);
                        }

                        continue;
                    }

                    // Resolve using distance method
                    List <DeltaAlignment> resolvedDeltas = ResolveRepeatUsingMatePair(curReadDeltas, mateDeltas);
                    if (resolvedDeltas != null)
                    {
                        readDeltas.Remove(mateDeltas);
                        result.AddRange(resolvedDeltas);
                    }
                }
            }

            return(result);
        }
Esempio n. 17
0
        /// <summary>
        /// Starts parsing from the specified StreamReader.
        /// </summary>
        /// <param name="streamReader">Stream reader to parse.</param>
        /// <returns>IEnumerable of DeltaAlignments.</returns>
        private IEnumerable <DeltaAlignment> ParseFrom(StreamReader streamReader)
        {
            string    lastReadQuerySequenceId = string.Empty;
            ISequence sequence      = null;
            bool      skipBlankLine = true;
            string    message       = string.Empty;

            if (streamReader.EndOfStream)
            {
                message = string.Format(
                    CultureInfo.InvariantCulture,
                    Properties.Resource.INVALID_INPUT_FILE,
                    this.DeltaFilename);

                throw new FileFormatException(message);
            }

            string line = ReadNextLine(streamReader);

            do
            {
                if (line == null || !line.StartsWith("@", StringComparison.OrdinalIgnoreCase))
                {
                    message = string.Format(
                        CultureInfo.InvariantCulture,
                        Properties.Resource.INVALID_INPUT_FILE,
                        this.DeltaFilename);

                    throw new FileFormatException(message);
                }

                long deltaPosition = long.Parse(line.Substring(1));
                line = ReadNextLine(streamReader);
                if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
                {
                    message = string.Format(
                        CultureInfo.InvariantCulture,
                        Properties.Resource.INVALID_INPUT_FILE,
                        this.DeltaFilename);

                    throw new FileFormatException(message);
                }

                DeltaAlignment deltaAlignment = null;

                // First line - reference id
                string referenceId = line.Substring(1);

                // Read next line.
                line = ReadNextLine(streamReader);

                // Second line - Query sequence id
                string queryId = line;

                // fetch the query sequence from the query file
                if (!string.IsNullOrEmpty(queryId))
                {
                    if (queryId != lastReadQuerySequenceId)
                    {
                        long seqPosition = long.Parse(queryId.Substring(queryId.LastIndexOf('@') + 1));
                        sequence = this.QueryParser.GetSequenceAt(seqPosition);
                        lastReadQuerySequenceId = queryId;
                    }

                    Sequence refEmpty = new Sequence(sequence.Alphabet, "A", false);
                    refEmpty.ID = referenceId;

                    deltaAlignment = new DeltaAlignment(refEmpty, sequence);
                }

                deltaAlignment.Id = deltaPosition;
                // Fourth line - properties of deltaalignment
                // Read next line.
                line = ReadNextLine(streamReader);

                string[] deltaAlignmentProperties = line.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
                if (deltaAlignmentProperties != null && deltaAlignmentProperties.Length == 7)
                {
                    long temp;
                    deltaAlignment.FirstSequenceStart  = long.TryParse(deltaAlignmentProperties[0], out temp) ? temp : 0;
                    deltaAlignment.FirstSequenceEnd    = long.TryParse(deltaAlignmentProperties[1], out temp) ? temp : 0;
                    deltaAlignment.SecondSequenceStart = long.TryParse(deltaAlignmentProperties[2], out temp) ? temp : 0;
                    deltaAlignment.SecondSequenceEnd   = long.TryParse(deltaAlignmentProperties[3], out temp) ? temp : 0;
                    int error;
                    deltaAlignment.Errors           = int.TryParse(deltaAlignmentProperties[4], out error) ? error : 0;
                    deltaAlignment.SimilarityErrors = int.TryParse(deltaAlignmentProperties[5], out error) ? error : 0;
                    deltaAlignment.NonAlphas        = int.TryParse(deltaAlignmentProperties[6], out error) ? error : 0;
                }

                // Fifth line - either a 0 - marks the end of the delta alignment or they are deltas
                while (line != null && !line.StartsWith("*", StringComparison.OrdinalIgnoreCase))
                {
                    long temp;
                    if (long.TryParse(line, out temp))
                    {
                        deltaAlignment.Deltas.Add(temp);
                    }

                    // Read next line.
                    line = streamReader.ReadLine();

                    // Continue reading if blank line found.
                    while (skipBlankLine && line != null && string.IsNullOrEmpty(line))
                    {
                        line = streamReader.ReadLine();
                    }
                }

                yield return(deltaAlignment);

                // Read the next line
                line = streamReader.ReadLine();
            }while (line != null);
        }
Esempio n. 18
0
        /// <summary>
        /// Refines alignment layout by taking in consideration indels (insertions and deletions) and rearrangements between two genomes.
        /// Requires mate-pair information to resolve ambiguity.
        /// </summary>
        /// <param name="orderedDeltas">Order deltas.</param>
        public static IEnumerable <DeltaAlignment> RefineLayout(DeltaAlignmentCollection orderedDeltas)
        {
            if (orderedDeltas == null)
            {
                throw new ArgumentNullException("orderedDeltas");
            }

            if (orderedDeltas.Count == 0)
            {
                yield break;
            }

            // As we dont know what is the maximum posible insert and deltes,
            // assuming 1,000,000 deltas are sufficient for operation.
            int windowSize = 1000;

            VirtualDeltaAlignmentCollection deltaCatche = new VirtualDeltaAlignmentCollection(orderedDeltas, windowSize);

            List <DeltaAlignment> deltasOverlappingAtCurrentIndex = null;
            List <DeltaAlignment> leftSideDeltas  = null;
            List <DeltaAlignment> rightSideDeltas = null;
            List <DeltaAlignment> unloadedDeltas  = null;

            try
            {
                deltasOverlappingAtCurrentIndex = new List <DeltaAlignment>();
                leftSideDeltas  = new List <DeltaAlignment>();
                rightSideDeltas = new List <DeltaAlignment>();

                long           currentProcessedOffset = 0;
                DeltaAlignment alignment = deltaCatche[0];
                deltasOverlappingAtCurrentIndex.Add(alignment);
                DeltaAlignment deltaWithLargestEndIndex = alignment;

                for (int currentIndex = 0; currentIndex < deltaCatche.Count - 1; currentIndex++)
                {
                    DeltaAlignment nextDelta = deltaCatche[currentIndex + 1];
                    unloadedDeltas = null;
                    if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas))
                    {
                        for (int i = 0; i < unloadedDeltas.Count; i++)
                        {
                            yield return(unloadedDeltas[i]);
                        }

                        unloadedDeltas.Clear();
                    }

                    if (currentProcessedOffset != 0)
                    {
                        nextDelta.FirstSequenceStart += currentProcessedOffset;
                        nextDelta.FirstSequenceEnd   += currentProcessedOffset;
                    }

                    // Check if next delta is just adjacent
                    if (nextDelta.FirstSequenceStart - 1 == deltaWithLargestEndIndex.FirstSequenceEnd)
                    {
                        // If next delta is adjacent there is a possible insertion in target (deletion in reference)
                        // Try to extend the deltas from both sides and make them meet
                        leftSideDeltas.Clear();
                        for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++)
                        {
                            DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index];
                            if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd)
                            {
                                leftSideDeltas.Add(delta);
                            }
                        }

                        // Find all deltas starting at the adjacent right side
                        rightSideDeltas.Clear();
                        for (long index = currentIndex + 1; index < deltaCatche.Count; index++)
                        {
                            DeltaAlignment delta = deltaCatche[index];
                            unloadedDeltas = null;
                            if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas))
                            {
                                for (int i = 0; i < unloadedDeltas.Count; i++)
                                {
                                    yield return(unloadedDeltas[i]);
                                }

                                unloadedDeltas.Clear();
                            }

                            if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart)
                            {
                                break;
                            }

                            rightSideDeltas.Add(delta);
                        }

                        long offset = ExtendDeltas(leftSideDeltas, rightSideDeltas);

                        if (offset != 0)
                        {
                            nextDelta.FirstSequenceStart += offset;
                            nextDelta.FirstSequenceEnd   += offset;
                        }

                        currentProcessedOffset += offset;
                    }
                    else
                    if (nextDelta.FirstSequenceStart <= deltaWithLargestEndIndex.FirstSequenceEnd)
                    {
                        // Check if next delta overlaps with current overlap group
                        deltasOverlappingAtCurrentIndex.Add(nextDelta);

                        // Check if nextDelta is reaching farther than the current farthest delta
                        if (nextDelta.FirstSequenceEnd > deltaWithLargestEndIndex.FirstSequenceEnd)
                        {
                            deltaWithLargestEndIndex = nextDelta;
                        }

                        if (deltasOverlappingAtCurrentIndex.Count > windowSize)
                        {
                            for (int i = deltasOverlappingAtCurrentIndex.Count - 1; i >= 0; i--)
                            {
                                if (deltasOverlappingAtCurrentIndex[i].FirstSequenceEnd < deltaWithLargestEndIndex.FirstSequenceEnd)
                                {
                                    deltasOverlappingAtCurrentIndex.RemoveAt(i);
                                }
                            }
                        }
                    }
                    else
                    {
                        // No overlap with nextDelta, so there is a gap at the end of deltaWithLargestEndIndex
                        // Try fix insertion in reference by pulling together two ends of deltas on both sides of the gap
                        leftSideDeltas.Clear();
                        for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++)
                        {
                            DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index];
                            if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd)
                            {
                                leftSideDeltas.Add(delta);
                            }
                        }

                        // Find all deltas starting at the right end of the gap
                        rightSideDeltas.Clear();
                        for (long index = currentIndex + 1; index < deltaCatche.Count; index++)
                        {
                            DeltaAlignment delta = deltaCatche[index];
                            unloadedDeltas = null;
                            if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas))
                            {
                                for (int i = 0; i < unloadedDeltas.Count; i++)
                                {
                                    yield return(unloadedDeltas[i]);
                                }

                                unloadedDeltas.Clear();
                            }

                            if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart)
                            {
                                break;
                            }

                            rightSideDeltas.Add(delta);
                        }

                        int score = 0;
                        for (int i = 0; i < leftSideDeltas.Count; i++)
                        {
                            var l = leftSideDeltas[i];
                            int j = 0;

                            for (; j < rightSideDeltas.Count; j++)
                            {
                                var r = rightSideDeltas[j];

                                // if (object.ReferenceEquals(l.QuerySequence, r.QuerySequence))
                                // As reference check is not posible, verifying ids here. as id are unique for a given read.
                                if (l.QuerySequence.ID == r.QuerySequence.ID)
                                {
                                    score++;
                                    break;
                                }
                            }

                            if (j == rightSideDeltas.Count)
                            {
                                score--;
                            }
                        }

                        // Score > 0 means most deltas share same query sequence at both ends, so close this gap
                        if (score > 0)
                        {
                            long gaplength = (nextDelta.FirstSequenceStart - deltaWithLargestEndIndex.FirstSequenceEnd) - 1;
                            currentProcessedOffset -= gaplength;

                            // Pull deltas on right side to close the gap
                            for (int i = 0; i < rightSideDeltas.Count; i++)
                            {
                                DeltaAlignment delta = rightSideDeltas[i];
                                delta.FirstSequenceStart -= gaplength;
                                delta.FirstSequenceEnd   -= gaplength;
                                // deltaCatche.Update(delta.Id);
                            }
                        }

                        // Start a new group from the right side of the gap
                        deltaWithLargestEndIndex = nextDelta;
                        deltasOverlappingAtCurrentIndex.Clear();
                        deltasOverlappingAtCurrentIndex.Add(nextDelta);
                    }
                }

                unloadedDeltas = deltaCatche.GetCachedDeltas();

                for (int i = 0; i < unloadedDeltas.Count; i++)
                {
                    yield return(unloadedDeltas[i]);
                }

                unloadedDeltas.Clear();
            }
            finally
            {
                if (deltasOverlappingAtCurrentIndex != null)
                {
                    deltasOverlappingAtCurrentIndex.Clear();
                    deltasOverlappingAtCurrentIndex = null;
                }

                if (leftSideDeltas != null)
                {
                    leftSideDeltas.Clear();
                    leftSideDeltas = null;
                }

                if (rightSideDeltas != null)
                {
                    rightSideDeltas.Clear();
                    rightSideDeltas = null;
                }

                if (deltaCatche != null)
                {
                    deltaCatche = null;
                }
            }
        }
Esempio n. 19
0
        /// <summary>
        /// Extended Deltas.
        /// </summary>
        /// <param name="leftSideDeltas">Left Side Deltas.</param>
        /// <param name="rightSideDeltas">Right Side Deltas.</param>
        /// <returns>Returns Extend Deltas.</returns>
        private static long ExtendDeltas(List <DeltaAlignment> leftSideDeltas, List <DeltaAlignment> rightSideDeltas)
        {
            long extendedIndex = 1;

            int[]       symbolCount    = new int[255];
            List <byte> leftExtension  = new List <byte>();
            List <byte> rightExtension = new List <byte>();

            #region left extension

            // Left extension
            do
            {
                symbolCount['A'] = symbolCount['C'] = symbolCount['G'] = symbolCount['T'] = 0;

                // loop through all queries at current index and find symbol counts
                for (int index = 0; index < leftSideDeltas.Count; index++)
                {
                    DeltaAlignment da = leftSideDeltas[index];

                    if (da.QuerySequence.Count > da.SecondSequenceEnd + extendedIndex)
                    {
                        char symbol = (char)da.QuerySequence[da.SecondSequenceEnd + extendedIndex];
                        symbolCount[char.ToUpperInvariant(symbol)]++;
                    }
                }

                // no symbols at current position, then break;
                if (symbolCount['A'] == 0 && symbolCount['C'] == 0 && symbolCount['G'] == 0 && symbolCount['T'] == 0)
                {
                    break;
                }

                // find symbol with max occurence
                byte indexLargest, indexSecond;
                FindLargestAndSecondLargest(symbolCount, out indexLargest, out indexSecond);

                // Dont extend if largest symbol count is higher than double of second largest symbol count
                if (symbolCount[indexSecond] > symbolCount[indexLargest] / 2)
                {
                    return(0);
                }

                leftExtension.Add(indexLargest); // index will be the byte value of the appropriate symbol

                extendedIndex++;
            } while (true);

            #endregion

            #region Right extension

            // Right extension
            extendedIndex = 1;
            do
            {
                symbolCount['A'] = symbolCount['C'] = symbolCount['G'] = symbolCount['T'] = 0;

                // loop through all queries at current index and find symbol counts
                for (int index = 0; index < rightSideDeltas.Count; index++)
                {
                    DeltaAlignment da = rightSideDeltas[index];

                    if (da.SecondSequenceStart - extendedIndex >= 0)
                    {
                        char symbol = (char)da.QuerySequence[da.SecondSequenceStart - extendedIndex];
                        symbolCount[char.ToUpperInvariant(symbol)]++;
                    }
                }

                // no symbols at current position, then break;
                if (symbolCount['A'] == 0 && symbolCount['C'] == 0 && symbolCount['G'] == 0 && symbolCount['T'] == 0)
                {
                    break;
                }

                // find symbol with max occurence
                byte indexLargest, indexSecond;
                FindLargestAndSecondLargest(symbolCount, out indexLargest, out indexSecond);

                // Dont extend if largest symbol count is higher than double of second largest symbol count
                if (symbolCount[indexSecond] > symbolCount[indexLargest] / 2)
                {
                    return(0);
                }

                rightExtension.Insert(0, indexLargest); // index will be the byte value of the appropriate symbol

                extendedIndex++;
            } while (true);

            #endregion

            // One of the side cannot be extended, so cancel extension
            if (leftExtension.Count == 0 || rightExtension.Count == 0)
            {
                return(0);
            }

            int overlapStart = FindMaxOverlap(leftExtension, rightExtension);

            if (overlapStart == -1)
            {
                return(0);
            }
            else
            {
                // Update left side deltas
                for (int index = 0; index < leftSideDeltas.Count; index++)
                {
                    var d = leftSideDeltas[index];
                    d.FirstSequenceEnd += (d.QuerySequence.Count - 1) - d.SecondSequenceEnd;
                    d.SecondSequenceEnd = d.QuerySequence.Count - 1;
                }

                // Update right side deltas
                int toRightOffset = rightExtension.Count + overlapStart;
                for (int index = 0; index < rightSideDeltas.Count; index++)
                {
                    var d = rightSideDeltas[index];
                    d.FirstSequenceStart += toRightOffset - d.SecondSequenceStart;

                    // Subtracting as all these deltas will be processed in the outer loop
                    d.FirstSequenceStart -= toRightOffset;
                    d.SecondSequenceStart = 0;
                }

                return(toRightOffset);
            }
        }
Esempio n. 20
0
        /// <summary>
        /// Gets the error removed sequence from the delta.
        /// </summary>
        /// <param name="deltaAlignment">DeltaAlignment instance.</param>
        private static ISequence GetSequenceFromDelta(DeltaAlignment deltaAlignment)
        {
            int  indelListIndex    = 0;
            long indelIndex        = 0;
            long nextIndelPosition = 0;
            long indelCount        = deltaAlignment.Deltas.Count;

            if (indelListIndex < indelCount)
            {
                indelIndex = deltaAlignment.Deltas[indelListIndex++];
            }

            nextIndelPosition  = deltaAlignment.SecondSequenceStart - 1;
            nextIndelPosition += indelIndex >= 0 ? indelIndex : -indelIndex;


            long symbolsCount = deltaAlignment.SecondSequenceEnd - deltaAlignment.SecondSequenceStart + 1 +
                                deltaAlignment.Deltas.Count(I => I > 0) - deltaAlignment.Deltas.Count(I => I < 0);

            long symbolIndex = 0;

            byte[] symbols = new byte[symbolsCount];

            for (long index = deltaAlignment.SecondSequenceStart; index <= deltaAlignment.SecondSequenceEnd;)
            {
                if (indelIndex != 0 && index == nextIndelPosition)
                {
                    if (indelIndex > 0)
                    {
                        // a symbol is deleted from the query, thus insert a gap symbol in query.
                        symbols[symbolIndex] = AmbiguousDnaAlphabet.Instance.Gap;
                        symbolIndex++;
                        nextIndelPosition--;
                    }
                    else
                    {
                        // a symbol is inserted to query, thus delete the symbol from query.
                        // skip one symbol from the query sequence.
                        index++;
                    }

                    // Get nextIndelPosition.
                    if (indelListIndex < indelCount)
                    {
                        indelIndex = deltaAlignment.Deltas[indelListIndex++];
                    }
                    else
                    {
                        indelIndex = 0;
                    }

                    nextIndelPosition += indelIndex >= 0 ? indelIndex : -indelIndex;
                }
                else
                {
                    symbols[symbolIndex] = deltaAlignment.QuerySequence[index];
                    symbolIndex++;
                    index++;
                }
            }

            return(new Sequence(AmbiguousDnaAlphabet.Instance, symbols)
            {
                ID = deltaAlignment.QuerySequence.ID
            });
        }
Esempio n. 21
0
        /// <summary>
        /// Generates consensus sequences from alignment layout.
        /// </summary>
        /// <param name="alignmentBetweenReferenceAndReads">Input list of reads.</param>
        /// <returns>List of contigs.</returns>
        public static IEnumerable <ISequence> GenerateConsensus(DeltaAlignmentCollection alignmentBetweenReferenceAndReads)
        {
            if (alignmentBetweenReferenceAndReads == null)
            {
                throw new ArgumentNullException("alignmentBetweenReferenceAndReads");
            }

            SimpleConsensusResolver resolver = new SimpleConsensusResolver(AmbiguousDnaAlphabet.Instance, 49);

            // this dictionary will not grow more than a few hundread in worst scenario,
            // as this stores delta and its corresponding sequences
            Dictionary <DeltaAlignment, ISequence> deltasInCurrentContig = new Dictionary <DeltaAlignment, ISequence>();

            long currentAlignmentStartOffset = 0;
            long currentIndex = 0;

            List <byte>           currentContig  = new List <byte>();
            List <DeltaAlignment> deltasToRemove = new List <DeltaAlignment>();

            // no deltas
            if (alignmentBetweenReferenceAndReads.Count == 0)
            {
                yield break;
            }

            long index = 0;

            DeltaAlignment lastDelta = alignmentBetweenReferenceAndReads[index];

            do
            {
                // Starting a new contig
                if (deltasInCurrentContig.Count == 0)
                {
                    currentAlignmentStartOffset = lastDelta.FirstSequenceStart;
                    currentIndex = 0;
                    currentContig.Clear();
                }

                // loop through all deltas at current index and find consensus
                do
                {
                    // Proceed creating consensus till we find another delta stats aligning
                    while (lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta));

                        // Get next delta
                        index++;
                        if (alignmentBetweenReferenceAndReads.Count > index)
                        {
                            lastDelta = alignmentBetweenReferenceAndReads[index];
                            continue; // see if new delta starts from the same offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }

                    byte[] symbolsAtCurrentIndex = new byte[deltasInCurrentContig.Count];
                    int    symbolCounter         = 0;

                    foreach (var delta in deltasInCurrentContig)
                    {
                        long inDeltaIndex = currentIndex - (delta.Key.FirstSequenceStart - currentAlignmentStartOffset);
                        symbolsAtCurrentIndex[symbolCounter++] = delta.Value[inDeltaIndex];

                        if (inDeltaIndex == delta.Value.Count - 1)
                        {
                            deltasToRemove.Add(delta.Key);
                        }
                    }

                    if (deltasToRemove.Count > 0)
                    {
                        for (int i = 0; i < deltasToRemove.Count; i++)
                        {
                            deltasInCurrentContig.Remove(deltasToRemove[i]);
                        }

                        deltasToRemove.Clear();
                    }

                    byte consensusSymbol = resolver.GetConsensus(symbolsAtCurrentIndex);
                    currentContig.Add(consensusSymbol);

                    currentIndex++;

                    // See if another delta is adjacent
                    if (deltasInCurrentContig.Count == 0 && lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta));

                        // check next delta
                        index++;
                        if (alignmentBetweenReferenceAndReads.Count > index)
                        {
                            lastDelta = alignmentBetweenReferenceAndReads[index];
                            continue; // read next delta to see if it starts from current reference sequence offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }
                }while (deltasInCurrentContig.Count > 0);

                yield return(new Sequence(AmbiguousDnaAlphabet.Instance, currentContig.ToArray(), false));
            }while (lastDelta != null);
        }