public IList <long> GetDeltas(SequenceModel[] sequenceData) { var sequences = sequenceData.Select(sequence => _provider.Provide(sequence?.FileName, sequence?.Content).First()).ToList(); _aligner = new DeltaAlignment(sequences.First(), sequences.Last()); return(_aligner.Deltas); }
/// <summary> /// Writes delta for query sequences. /// </summary> /// <param name="sorter">Sorter instance.</param> /// <param name="unsortedDeltaFilename">Unsorted Delta Filename.</param> /// <param name="queryParser">Query/read sequences parser.</param> /// <param name="outputfilename">Output file name.</param> public static void WriteSortedDelta(DeltaAlignmentSorter sorter, string unsortedDeltaFilename, FastASequencePositionParser queryParser, string outputfilename) { if (sorter == null) { throw new ArgumentNullException("sorter"); } using (DeltaAlignmentParser unsortedDeltaParser = new DeltaAlignmentParser(unsortedDeltaFilename, queryParser)) { using (StreamWriter writer = new StreamWriter(outputfilename)) { long deltaPositionInFile = 0; foreach (long id in sorter.GetSortedIds()) { DeltaAlignment deltaAlignment = unsortedDeltaParser.GetDeltaAlignmentAt(id); deltaAlignment.Id = deltaPositionInFile; string deltaString = Helper.GetString(deltaAlignment); deltaPositionInFile += deltaString.Length; writer.Write(deltaString); } writer.Flush(); } } }
public bool IsReverseQueryDirection(SequenceModel[] sequenceData) { var sequences = sequenceData.Select(sequence => _provider.Provide(sequence?.FileName, sequence?.Content).First()).ToList(); _aligner = new DeltaAlignment(sequences.First(), sequences.Last()); return(_aligner.IsReverseQueryDirection); }
public void ValidateDeltaAlignmentToString() { ISequence refSeq = new Sequence(Alphabets.DNA, "ATCGGGGGGGGAAAAAAATTTTCCCCGGGGG"); ISequence qrySeq = new Sequence(Alphabets.DNA, "GGGGG"); var delta = new DeltaAlignment(refSeq, qrySeq) { FirstSequenceEnd = 21, SecondSequenceEnd = 20 }; string actualString = delta.ToString(); string expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.DeltaAlignmentExpectedNode); Assert.AreEqual(expectedString, actualString); // Gets the expected sequence from the Xml List <ISequence> seqsList; string filePath = this.utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.FilePathNode); using (var reader = File.OpenRead(filePath)) { var parser = new FastAParser(); { parser.Alphabet = Alphabets.Protein; seqsList = parser.Parse(reader).ToList(); } } delta = new DeltaAlignment(seqsList[0], qrySeq) { FirstSequenceEnd = 21, SecondSequenceStart = 20, QueryDirection = Cluster.ReverseDirection }; actualString = delta.ToString(); expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.DeltaAlignmentExpected2Node); Assert.AreEqual(expectedString, actualString); }
/// <summary> /// Writes delta for query sequences. /// </summary> /// <param name="delta">The Deltas.</param> private void WriteDelta( DeltaAlignmentSorter sorter) { FastASequencePositionParser sequenceParser = null; DeltaAlignmentParser unsortedDeltaParser = null; TextWriter textWriterConsoleOutSave = Console.Out; StreamWriter streamWriterConsoleOut = null; try { sequenceParser = new FastASequencePositionParser(this.FilePath[1], true); unsortedDeltaParser = new DeltaAlignmentParser(UnsortedDeltaFile, sequenceParser); if (!string.IsNullOrEmpty(this.OutputFile)) { streamWriterConsoleOut = new StreamWriter(this.OutputFile); Console.SetOut(streamWriterConsoleOut); } long deltaPositionInFile = 0; foreach (long id in sorter.GetSortedIds()) { DeltaAlignment deltaAlignment = unsortedDeltaParser.GetDeltaAlignmentAt(id); deltaAlignment.Id = deltaPositionInFile; string deltaString = Helper.GetString(deltaAlignment); deltaPositionInFile += deltaString.Length; Console.Write(deltaString); } Console.Out.Flush(); } finally { if (streamWriterConsoleOut != null) { streamWriterConsoleOut.Dispose(); streamWriterConsoleOut = null; } if (sequenceParser != null) { sequenceParser.Dispose(); sequenceParser = null; } if (unsortedDeltaParser != null) { unsortedDeltaParser.Dispose(); unsortedDeltaParser = null; } Console.SetOut(textWriterConsoleOutSave); } }
public void TestDeltaAlignmentToString() { ISequence refSeq = new Sequence(Alphabets.DNA, "ATCGGGGGGGGAAAAAAATTTTCCCCGGGGG"); ISequence qrySeq = new Sequence(Alphabets.DNA, "GGGGG"); DeltaAlignment delta = new DeltaAlignment(refSeq, qrySeq); delta.FirstSequenceEnd = 21; delta.SecondSequenceEnd = 20; string actualString = delta.ToString(); string expectedString = "Ref ID= Query Id= Ref start=0 Ref End=21 Query start=0 Query End=20, Direction=FORWARD"; Assert.AreEqual(actualString, expectedString); }
/// <summary> /// Unloads a cache window. /// </summary> /// <returns>Returns unloaded deltas.</returns> private List <DeltaAlignment> Unload() { List <DeltaAlignment> unloadedDeltas = new List <DeltaAlignment>(); for (int i = 0; i < windowSize; i++) { DeltaAlignment delta = this.catchedDeltas[i]; unloadedDeltas.Add(delta); } this.catchedDeltas.RemoveRange(0, windowSize); startIndexInCatchedList += windowSize; return(unloadedDeltas); }
public void ValidateDeltaAlignmentToString() { ISequence refSeq = new Sequence(Alphabets.DNA, "ATCGGGGGGGGAAAAAAATTTTCCCCGGGGG"); ISequence qrySeq = new Sequence(Alphabets.DNA, "GGGGG"); DeltaAlignment delta = new DeltaAlignment(refSeq, qrySeq); delta.FirstSequenceEnd = 21; delta.SecondSequenceEnd = 20; string actualString = delta.ToString(); string expectedString = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.DeltaAlignmentExpectedNode); Assert.AreEqual(actualString, expectedString); // Gets the expected sequence from the Xml List <ISequence> seqsList; IEnumerable <ISequence> sequences = null; string filePath = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.FilePathNode); using (StreamReader reader = new StreamReader(filePath)) { using (FastAParser parser = new FastAParser()) { parser.Alphabet = Alphabets.Protein; sequences = parser.Parse(reader); //Create a list of sequences. seqsList = sequences.ToList(); } } DeltaAlignment delta1 = new DeltaAlignment(seqsList[0], qrySeq); delta1.FirstSequenceEnd = 21; delta1.SecondSequenceEnd = 20; string actualString1 = delta1.ToString(); string expectedString1 = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.DeltaAlignmentExpected2Node); Assert.AreEqual(expectedString1, actualString1); }
/// <summary> /// Resolve repeats between two sets of deltas coming from paired reads /// </summary> /// <param name="curReadDeltas">Deltas from a read</param> /// <param name="mateDeltas">Deltas from mate pair</param> /// <returns>Selected delta out of all given deltas</returns> private static List <DeltaAlignment> ResolveRepeatUsingMatePair(List <DeltaAlignment> curReadDeltas, List <DeltaAlignment> mateDeltas, string libraryName) { // Check if all mate pairs are completly aligned, else return null (cannot resolve) if (mateDeltas.Any(a => { return(a.SecondSequenceEnd != a.QuerySequence.Count - 1); })) { return(null); } // Get clone library information CloneLibraryInformation libraryInfo = CloneLibrary.Instance.GetLibraryInformation(libraryName); float mean = libraryInfo.MeanLengthOfInsert; float stdDeviation = libraryInfo.StandardDeviationOfInsert; // Find delta with a matching distance. for (int indexFR = 0; indexFR < curReadDeltas.Count; indexFR++) { DeltaAlignment pair1 = curReadDeltas[indexFR]; for (int indexRR = 0; indexRR < mateDeltas.Count; indexRR++) { DeltaAlignment pair2 = mateDeltas[indexRR]; long distance = Math.Abs(pair1.FirstSequenceStart - pair2.FirstSequenceEnd); // Find delta with matching distance. if (distance - mean <= stdDeviation) { List <DeltaAlignment> resolvedDeltas = new List <DeltaAlignment>(2); resolvedDeltas.Add(pair1); resolvedDeltas.Add(pair2); return(resolvedDeltas); } } } return(null); }
/// <summary> /// Writes delta for query sequences. /// </summary> /// <param name="sorter">Sorter instance.</param> /// <param name="unsortedDeltaFilename">Unsorted Delta Filename.</param> /// <param name="queryFilename">Query/read sequences filename.</param> /// <param name="outputfilename">Output file name.</param> private static void WriteSortedDelta(DeltaAlignmentSorter sorter, string unsortedDeltaFilename, FastASequencePositionParser queryParser, string outputfilename) { using (DeltaAlignmentParser unsortedDeltaParser = new DeltaAlignmentParser(unsortedDeltaFilename, queryParser)) { TextWriter textWriterConsoleOutSave = Console.Out; StreamWriter streamWriterConsoleOut = null; try { if (!string.IsNullOrEmpty(outputfilename)) { streamWriterConsoleOut = new StreamWriter(outputfilename); Console.SetOut(streamWriterConsoleOut); } long deltaPositionInFile = 0; foreach (long id in sorter.GetSortedIds()) { DeltaAlignment deltaAlignment = unsortedDeltaParser.GetDeltaAlignmentAt(id); deltaAlignment.Id = deltaPositionInFile; string deltaString = Helper.GetString(deltaAlignment); deltaPositionInFile += deltaString.Length; Console.Write(deltaString); } Console.Out.Flush(); } finally { if (streamWriterConsoleOut != null) { streamWriterConsoleOut.Dispose(); streamWriterConsoleOut = null; } Console.SetOut(textWriterConsoleOutSave); } } }
/// <summary> /// Gets the DeltaAlignment at specified position of the file. /// </summary> /// <param name="position">Position at which delta alignment is required.</param> /// <returns>Delta alignment.</returns> public DeltaAlignment GetDeltaAlignmentAt(long position) { bool skipBlankLine = true; if (this.deltaFileReader == null) { this.deltaFileReader = new StreamReader(new FileStream(this.DeltaFilename, FileMode.Open, FileAccess.Read)); } this.deltaFileReader.BaseStream.Position = position; this.deltaFileReader.DiscardBufferedData(); long deltaPosition = -1; string line = ReadNextLine(this.deltaFileReader); if (line == null || !line.StartsWith("@", StringComparison.OrdinalIgnoreCase)) { throw new FormatException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.CorruptedDeltaAlignmentFile, position, this.DeltaFilename)); } deltaPosition = long.Parse(line.Substring(1), CultureInfo.InvariantCulture); if (position != deltaPosition) { throw new FormatException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.DeltaAlignmentIDDoesnotMatch, deltaPosition, position, this.DeltaFilename)); } line = ReadNextLine(this.deltaFileReader); if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase)) { string message = string.Format( CultureInfo.InvariantCulture, Properties.Resource.INVALID_INPUT_FILE, this.DeltaFilename); throw new FileFormatException(message); } string referenceId = line.Substring(1); // Read next line. line = ReadNextLine(this.deltaFileReader); // Second line - Query sequence id string queryId = line; // fetch the query sequence from the query file ISequence querySequence = null; Sequence refEmpty = null; if (!string.IsNullOrEmpty(queryId)) { long sequencePosition = long.Parse(queryId.Substring(queryId.LastIndexOf("@", StringComparison.Ordinal) + 1), CultureInfo.InvariantCulture); querySequence = this.QueryParser.GetSequenceAt(sequencePosition); refEmpty = new Sequence(querySequence.Alphabet, "A", false); refEmpty.ID = referenceId; } DeltaAlignment deltaAlignment = new DeltaAlignment(refEmpty, querySequence); deltaAlignment.Id = deltaPosition; line = ReadNextLine(this.deltaFileReader); string[] deltaAlignmentProperties = line.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); if (deltaAlignmentProperties != null && deltaAlignmentProperties.Length == 7) { long temp; deltaAlignment.FirstSequenceStart = long.TryParse(deltaAlignmentProperties[0], out temp) ? temp : 0; deltaAlignment.FirstSequenceEnd = long.TryParse(deltaAlignmentProperties[1], out temp) ? temp : 0; deltaAlignment.SecondSequenceStart = long.TryParse(deltaAlignmentProperties[2], out temp) ? temp : 0; deltaAlignment.SecondSequenceEnd = long.TryParse(deltaAlignmentProperties[3], out temp) ? temp : 0; int error; deltaAlignment.Errors = int.TryParse(deltaAlignmentProperties[4], out error) ? error : 0; deltaAlignment.SimilarityErrors = int.TryParse(deltaAlignmentProperties[5], out error) ? error : 0; deltaAlignment.NonAlphas = int.TryParse(deltaAlignmentProperties[6], out error) ? error : 0; } // Fifth line - either a 0 - marks the end of the delta alignment or they are deltas while (line != null && !line.StartsWith("*", StringComparison.OrdinalIgnoreCase)) { long temp; if (long.TryParse(line, out temp)) { deltaAlignment.Deltas.Add(temp); } // Read next line. line = this.deltaFileReader.ReadLine(); // Continue reading if blank line found. while (skipBlankLine && line != null && string.IsNullOrEmpty(line)) { line = this.deltaFileReader.ReadLine(); } } return(deltaAlignment); }
/// <summary> /// Refines alignment layout by taking in consideration indels (insertions and deletions) and rearrangements between two genomes. /// Requires mate-pair information to resolve ambiguity. /// </summary> /// <param name="orderedDeltas">Order deltas.</param> public static void RefineLayout(IList <DeltaAlignment> orderedDeltas) { if (orderedDeltas == null) { throw new ArgumentNullException("orderedDeltas"); } if (orderedDeltas.Count == 0) { return; } List <DeltaAlignment> deltasOverlappingAtCurrentIndex = new List <DeltaAlignment>(); long currentProcessedOffset = 0; deltasOverlappingAtCurrentIndex.Add(orderedDeltas[0]); DeltaAlignment deltaWithLargestEndIndex = orderedDeltas[0]; for (int currentIndex = 0; currentIndex < orderedDeltas.Count - 1; currentIndex++) { DeltaAlignment nextDelta = orderedDeltas[currentIndex + 1]; nextDelta.FirstSequenceStart += currentProcessedOffset; nextDelta.FirstSequenceEnd += currentProcessedOffset; // Check if next delta is just adjacent if (nextDelta.FirstSequenceStart - 1 == deltaWithLargestEndIndex.FirstSequenceEnd) { // If next delta is adjacent there is a possible insertion in target (deletion in reference) // Try to extend the deltas from both sides and make them meet List <DeltaAlignment> leftSideDeltas = deltasOverlappingAtCurrentIndex.Where(a => a.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd).ToList(); // Find all deltas starting at the adjacent right side List <DeltaAlignment> rightSideDeltas = new List <DeltaAlignment>(4); rightSideDeltas.AddRange(orderedDeltas.Skip(currentIndex + 1).TakeWhile(a => a.FirstSequenceStart == nextDelta.FirstSequenceStart)); long offset = ExtendDeltas(leftSideDeltas, rightSideDeltas); nextDelta.FirstSequenceStart += offset; nextDelta.FirstSequenceEnd += offset; currentProcessedOffset += offset; } else if (nextDelta.FirstSequenceStart <= deltaWithLargestEndIndex.FirstSequenceEnd) { // Check if next delta overlaps with current overlap group deltasOverlappingAtCurrentIndex.Add(nextDelta); // Check if nextDelta is reaching farther than the current farthest delta if (nextDelta.FirstSequenceEnd > deltaWithLargestEndIndex.FirstSequenceEnd) { deltaWithLargestEndIndex = nextDelta; } } else { // No overlap with nextDelta, so there is a gap at the end of deltaWithLargestEndIndex // Try fix insertion in reference by pulling together two ends of deltas on both sides of the gap List <DeltaAlignment> leftSideDeltas = deltasOverlappingAtCurrentIndex.Where(a => a.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd).ToList(); // Find all deltas starting at the right end of the gap List <DeltaAlignment> rightSideDeltas = new List <DeltaAlignment>(4); rightSideDeltas.AddRange(orderedDeltas.Skip(currentIndex + 1).TakeWhile(a => a.FirstSequenceStart == nextDelta.FirstSequenceStart)); int score = 0; foreach (var l in leftSideDeltas) { foreach (var r in rightSideDeltas) { if (object.ReferenceEquals(l.QuerySequence, r.QuerySequence)) { score++; break; } else { score--; } } } // Score > 0 means most deltas share same query sequence at both ends, so close this gap if (score > 0) { long gaplength = (nextDelta.FirstSequenceStart - deltaWithLargestEndIndex.FirstSequenceEnd) - 1; currentProcessedOffset -= gaplength; // Pull deltas on right side to close the gap foreach (DeltaAlignment delta in rightSideDeltas) { delta.FirstSequenceStart -= gaplength; delta.FirstSequenceEnd -= gaplength; } } // Start a new group from the right side of the gap deltaWithLargestEndIndex = nextDelta; deltasOverlappingAtCurrentIndex.Clear(); deltasOverlappingAtCurrentIndex.Add(nextDelta); } } }
/// <summary> /// Starts parsing from the specified StreamReader. /// </summary> /// <param name="streamReader">Stream reader to parse.</param> /// <returns>IEnumerable of DeltaAlignments.</returns> private IEnumerable <DeltaAlignment> ParseFrom(StreamReader streamReader) { this.parsingReaders.Add(streamReader); string lastReadQuerySequenceId = string.Empty; ISequence sequence = null; if (streamReader.EndOfStream) { throw new Exception(Properties.Resource.INVALID_INPUT_FILE); } string line = ReadNextLine(streamReader); do { if (line == null || !line.StartsWith("@", StringComparison.OrdinalIgnoreCase)) { throw new Exception(Properties.Resource.INVALID_INPUT_FILE); } long deltaPosition = long.Parse(line.Substring(1)); line = ReadNextLine(streamReader); if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase)) { throw new Exception(Properties.Resource.INVALID_INPUT_FILE); } DeltaAlignment deltaAlignment = null; // First line - reference id string referenceId = line.Substring(1); // Read next line. line = ReadNextLine(streamReader); // Second line - Query sequence id string queryId = line; // fetch the query sequence from the query file if (!string.IsNullOrEmpty(queryId)) { if (queryId != lastReadQuerySequenceId) { // Get the id and remove any alphas - this can happen because the delta might // have "Reverse" appended to it when it's a reversed sequence. string id = queryId.Substring(queryId.LastIndexOf('@') + 1); int idx = Array.FindIndex(id.ToCharArray(), c => !Char.IsDigit(c)); if (idx > 0) { id = id.Substring(0, idx); } long seqPosition = long.Parse(id, CultureInfo.InvariantCulture); sequence = this.QueryParser.GetSequenceAt(seqPosition); lastReadQuerySequenceId = queryId; } Sequence refEmpty = new Sequence(sequence.Alphabet, "A", false) { ID = referenceId }; deltaAlignment = new DeltaAlignment(refEmpty, sequence); } deltaAlignment.Id = deltaPosition; // Fourth line - properties of delta alignment // Read next line. line = ReadNextLine(streamReader); string[] deltaAlignmentProperties = line.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); if (deltaAlignmentProperties != null && deltaAlignmentProperties.Length == 7) { long temp; deltaAlignment.FirstSequenceStart = long.TryParse(deltaAlignmentProperties[0], out temp) ? temp : 0; deltaAlignment.FirstSequenceEnd = long.TryParse(deltaAlignmentProperties[1], out temp) ? temp : 0; deltaAlignment.SecondSequenceStart = long.TryParse(deltaAlignmentProperties[2], out temp) ? temp : 0; deltaAlignment.SecondSequenceEnd = long.TryParse(deltaAlignmentProperties[3], out temp) ? temp : 0; // Look for a reversed sequence if (deltaAlignment.SecondSequenceEnd < deltaAlignment.SecondSequenceStart) { temp = deltaAlignment.SecondSequenceEnd; deltaAlignment.SecondSequenceEnd = deltaAlignment.SecondSequenceStart; deltaAlignment.SecondSequenceStart = temp; deltaAlignment.QueryDirection = Cluster.ReverseDirection; } int error; deltaAlignment.Errors = int.TryParse(deltaAlignmentProperties[4], out error) ? error : 0; deltaAlignment.SimilarityErrors = int.TryParse(deltaAlignmentProperties[5], out error) ? error : 0; deltaAlignment.NonAlphas = int.TryParse(deltaAlignmentProperties[6], out error) ? error : 0; } // Fifth line - either a 0 - marks the end of the delta alignment or they are deltas while (line != null && !line.StartsWith("*", StringComparison.OrdinalIgnoreCase)) { long temp; if (long.TryParse(line, out temp)) { deltaAlignment.Deltas.Add(temp); } // Read next line. line = streamReader.ReadLine(); // Continue reading if blank line found. while (line != null && string.IsNullOrEmpty(line)) { line = streamReader.ReadLine(); } } yield return(deltaAlignment); // Read the next line line = streamReader.ReadLine(); }while (line != null); }
/// <summary> /// Gets the DeltaAlignment at specified position of the file. /// </summary> /// <param name="position">Position at which delta alignment is required.</param> /// <returns>Delta alignment.</returns> public DeltaAlignment GetDeltaAlignmentAt(long position) { using (var reader = this.deltaStream.OpenRead()) { long deltaPosition = -1; string line = ReadNextLine(reader); if (line == null || !line.StartsWith("@", StringComparison.OrdinalIgnoreCase)) { throw new FormatException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.CorruptedDeltaAlignmentFile, position)); } deltaPosition = long.Parse(line.Substring(1), CultureInfo.InvariantCulture); if (position != deltaPosition) { throw new FormatException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.DeltaAlignmentIDDoesnotMatch, deltaPosition, position)); } line = ReadNextLine(reader); if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase)) { throw new Exception(Properties.Resource.INVALID_INPUT_FILE); } string referenceId = line.Substring(1); // Read next line. line = ReadNextLine(reader); // Second line - Query sequence id string queryId = line; // fetch the query sequence from the query file ISequence querySequence = null; Sequence refEmpty = null; if (!string.IsNullOrEmpty(queryId)) { // Get the id and remove any alphas - this can happen because the delta might // have "Reverse" appended to it when it's a reversed sequence. string id = queryId.Substring(queryId.LastIndexOf('@') + 1); int idx = Array.FindIndex(id.ToCharArray(), c => !Char.IsDigit(c)); if (idx > 0) { id = id.Substring(0, idx); } long sequencePosition = long.Parse(id, CultureInfo.InvariantCulture); querySequence = this.QueryParser.GetSequenceAt(sequencePosition); refEmpty = new Sequence(querySequence.Alphabet, "A", false) { ID = referenceId }; } DeltaAlignment deltaAlignment = new DeltaAlignment(refEmpty, querySequence) { Id = deltaPosition }; line = ReadNextLine(reader); string[] deltaAlignmentProperties = line.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); if (deltaAlignmentProperties != null && deltaAlignmentProperties.Length == 7) { long temp; deltaAlignment.FirstSequenceStart = long.TryParse(deltaAlignmentProperties[0], out temp) ? temp : 0; deltaAlignment.FirstSequenceEnd = long.TryParse(deltaAlignmentProperties[1], out temp) ? temp : 0; deltaAlignment.SecondSequenceStart = long.TryParse(deltaAlignmentProperties[2], out temp) ? temp : 0; deltaAlignment.SecondSequenceEnd = long.TryParse(deltaAlignmentProperties[3], out temp) ? temp : 0; // Look for a reversed sequence if (deltaAlignment.SecondSequenceEnd < deltaAlignment.SecondSequenceStart) { temp = deltaAlignment.SecondSequenceEnd; deltaAlignment.SecondSequenceEnd = deltaAlignment.SecondSequenceStart; deltaAlignment.SecondSequenceStart = temp; deltaAlignment.QueryDirection = Cluster.ReverseDirection; } int error; deltaAlignment.Errors = int.TryParse(deltaAlignmentProperties[4], out error) ? error : 0; deltaAlignment.SimilarityErrors = int.TryParse(deltaAlignmentProperties[5], out error) ? error : 0; deltaAlignment.NonAlphas = int.TryParse(deltaAlignmentProperties[6], out error) ? error : 0; } // Fifth line - either a 0 - marks the end of the delta alignment or they are deltas while (line != null && !line.StartsWith("*", StringComparison.OrdinalIgnoreCase)) { long temp; if (long.TryParse(line, out temp)) { deltaAlignment.Deltas.Add(temp); } // Read next line. line = reader.ReadLine(); // Continue reading if blank line found. while (line != null && string.IsNullOrEmpty(line)) { line = reader.ReadLine(); } } return(deltaAlignment); } }
/// <summary> /// Returns an IEnumerable of DeltaAlignment in the file being parsed. /// </summary> /// <returns>Returns DeltaAlignment collection.</returns> public IList <IEnumerable <DeltaAlignment> > Parse() { bool skipBlankLine = true; int currentBufferSize = BufferSize; byte[] buffer = new byte[currentBufferSize]; IAlphabet alphabet = null; List <IEnumerable <DeltaAlignment> > result = new List <IEnumerable <DeltaAlignment> >(); IList <DeltaAlignment> deltaAlignments = new List <DeltaAlignment>(); string message = string.Empty; using (StreamReader streamReader = new StreamReader(this.Filename)) { if (streamReader.EndOfStream) { message = string.Format( CultureInfo.InvariantCulture, Resources.INVALID_INPUT_FILE, Resources.Parser_Name); throw new FileFormatException(message); } ReadNextLine(streamReader); do { if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase)) { message = string.Format( CultureInfo.InvariantCulture, Resources.INVALID_INPUT_FILE, Resources.Parser_Name); throw new FileFormatException(message); } //First line - reference id string referenceId = line.Substring(1); int bufferPosition = 0; // Read next line. ReadNextLine(streamReader); //Second line - Query sequence id string queryId = line; //third line - query sequence // Read next line. ReadNextLine(streamReader); // For large files copy the data in memory mapped file. if ((((long)bufferPosition + line.Length) >= MaximumSequenceLength)) { throw new ArgumentOutOfRangeException( string.Format(CultureInfo.CurrentUICulture, Resources.SequenceDataGreaterthan2GB, queryId)); } if (((bufferPosition + line.Length) >= currentBufferSize)) { Array.Resize <byte>(ref buffer, buffer.Length + BufferSize); currentBufferSize += BufferSize; } byte[] symbols = ASCIIEncoding.ASCII.GetBytes(line); // Array.Copy -- for performance improvement. Array.Copy(symbols, 0, buffer, bufferPosition, symbols.Length); alphabet = Alphabets.AutoDetectAlphabet(buffer, bufferPosition, bufferPosition + line.Length, alphabet); if (alphabet == null) { throw new FileFormatException(string.Format(Resources.InvalidSymbolInString, line)); } bufferPosition += line.Length; // Truncate buffer to remove trailing 0's byte[] tmpBuffer = new byte[bufferPosition]; Array.Copy(buffer, tmpBuffer, bufferPosition); Sequence sequence = null; // In memory sequence sequence = new Sequence(alphabet, tmpBuffer, false); sequence.ID = queryId; Sequence refEmpty = new Sequence(sequence.Alphabet, "A", false); refEmpty.ID = referenceId; DeltaAlignment deltaAlignment = new DeltaAlignment(refEmpty, sequence); //Fourth line - properties of deltaalignment // Read next line. ReadNextLine(streamReader); string[] deltaAlignmentProperties = line.Split(' '); if (deltaAlignmentProperties != null && deltaAlignmentProperties.Length == 7) { long temp; deltaAlignment.FirstSequenceStart = long.TryParse(deltaAlignmentProperties[0], out temp) ? temp : 0; deltaAlignment.FirstSequenceEnd = long.TryParse(deltaAlignmentProperties[1], out temp) ? temp : 0; deltaAlignment.SecondSequenceStart = long.TryParse(deltaAlignmentProperties[2], out temp) ? temp : 0; deltaAlignment.SecondSequenceEnd = long.TryParse(deltaAlignmentProperties[3], out temp) ? temp : 0; int error; deltaAlignment.Errors = int.TryParse(deltaAlignmentProperties[4], out error) ? error : 0; deltaAlignment.SimilarityErrors = int.TryParse(deltaAlignmentProperties[5], out error) ? error : 0; deltaAlignment.NonAlphas = int.TryParse(deltaAlignmentProperties[6], out error) ? error : 0; } //Fifth line - either a 0 - marks the end of the delta alignment or they are deltas while (line != null && !line.StartsWith("*", StringComparison.OrdinalIgnoreCase)) { long temp; if (long.TryParse(line, out temp)) { deltaAlignment.Deltas.Add(temp); } // Read next line. line = streamReader.ReadLine(); // Continue reading if blank line found. while (skipBlankLine && line != null && string.IsNullOrEmpty(line)) { line = streamReader.ReadLine(); } } deltaAlignments.Add(deltaAlignment); //Read the next line line = streamReader.ReadLine(); if (line.StartsWith("--", StringComparison.OrdinalIgnoreCase)) { result.Add(deltaAlignments); //clear the inner list deltaAlignments = new List <DeltaAlignment>(); //skip until the next valid delta is found do { line = streamReader.ReadLine(); }while (line != null && line.StartsWith("--", StringComparison.OrdinalIgnoreCase)); } }while (line != null); } return(result); }
/// <summary> /// Reads ambiguously placed due to genomic reads. /// This step requires mate pair information to resolve the ambiguity about placements of repeated sequences. /// </summary> /// <param name="alignmentBetweenReferenceAndReads">Alignment between reference genome and reads.</param> /// <returns>List of DeltaAlignments after resolving repeating reads.</returns> public static List <DeltaAlignment> ResolveAmbiguity(IList <IEnumerable <DeltaAlignment> > alignmentBetweenReferenceAndReads) { if (alignmentBetweenReferenceAndReads == null) { throw new ArgumentNullException("alignmentBetweenReferenceAndReads"); } List <DeltaAlignment> result = new List <DeltaAlignment>(); List <IEnumerable <DeltaAlignment> > readDeltas = alignmentBetweenReferenceAndReads.ToList(); // Process reads and add to result list. // Loop till all reads are processed while (readDeltas.Count > 0) { IEnumerable <DeltaAlignment> curReadDeltas = readDeltas[0]; readDeltas.RemoveAt(0); // remove currently processing item from the list // If curReadDeltas has only one delta, then there are no repeats so add it to result // Or if any delta is a partial alignment, dont try to resolve, add all deltas to result if (curReadDeltas.Count() == 1 || curReadDeltas.Any(a => { return(a.SecondSequenceEnd != a.QuerySequence.Count - 1); })) { foreach (DeltaAlignment curDelta in curReadDeltas) { result.Add(curDelta); } } else { // Resolve repeats DeltaAlignment firstDelta = curReadDeltas.ElementAt(0); string[] readMetadata = firstDelta.QuerySequence.ID.Split('.', ':'); // If read is not having proper ID, ignore the read if (readMetadata.Length != 3 || (readMetadata[1] != "F" && readMetadata[1] != "R")) { foreach (DeltaAlignment curDelta in curReadDeltas) { result.Add(curDelta); } continue; } // Find mate pair IEnumerable <DeltaAlignment> mateDeltas = alignmentBetweenReferenceAndReads.FirstOrDefault(a => { string[] matepairMetadata = a.ElementAt(0).QuerySequence.ID.Split('.', ':'); if (matepairMetadata.Length == 3 && matepairMetadata[0] == readMetadata[0] && matepairMetadata[2] == readMetadata[2] && matepairMetadata[1] == (readMetadata[1] == "F" ? "R" : "F")) { return(true); } else { return(false); } }); // If mate pair not found, ignore current read if (mateDeltas == null) { foreach (DeltaAlignment curDelta in curReadDeltas) { result.Add(curDelta); } continue; } // Resolve using distance method List <DeltaAlignment> resolvedDeltas = ResolveRepeatUsingMatePair(curReadDeltas, mateDeltas); if (resolvedDeltas != null) { readDeltas.Remove(mateDeltas); result.AddRange(resolvedDeltas); } } } return(result); }
/// <summary> /// Starts parsing from the specified StreamReader. /// </summary> /// <param name="streamReader">Stream reader to parse.</param> /// <returns>IEnumerable of DeltaAlignments.</returns> private IEnumerable <DeltaAlignment> ParseFrom(StreamReader streamReader) { string lastReadQuerySequenceId = string.Empty; ISequence sequence = null; bool skipBlankLine = true; string message = string.Empty; if (streamReader.EndOfStream) { message = string.Format( CultureInfo.InvariantCulture, Properties.Resource.INVALID_INPUT_FILE, this.DeltaFilename); throw new FileFormatException(message); } string line = ReadNextLine(streamReader); do { if (line == null || !line.StartsWith("@", StringComparison.OrdinalIgnoreCase)) { message = string.Format( CultureInfo.InvariantCulture, Properties.Resource.INVALID_INPUT_FILE, this.DeltaFilename); throw new FileFormatException(message); } long deltaPosition = long.Parse(line.Substring(1)); line = ReadNextLine(streamReader); if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase)) { message = string.Format( CultureInfo.InvariantCulture, Properties.Resource.INVALID_INPUT_FILE, this.DeltaFilename); throw new FileFormatException(message); } DeltaAlignment deltaAlignment = null; // First line - reference id string referenceId = line.Substring(1); // Read next line. line = ReadNextLine(streamReader); // Second line - Query sequence id string queryId = line; // fetch the query sequence from the query file if (!string.IsNullOrEmpty(queryId)) { if (queryId != lastReadQuerySequenceId) { long seqPosition = long.Parse(queryId.Substring(queryId.LastIndexOf('@') + 1)); sequence = this.QueryParser.GetSequenceAt(seqPosition); lastReadQuerySequenceId = queryId; } Sequence refEmpty = new Sequence(sequence.Alphabet, "A", false); refEmpty.ID = referenceId; deltaAlignment = new DeltaAlignment(refEmpty, sequence); } deltaAlignment.Id = deltaPosition; // Fourth line - properties of deltaalignment // Read next line. line = ReadNextLine(streamReader); string[] deltaAlignmentProperties = line.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); if (deltaAlignmentProperties != null && deltaAlignmentProperties.Length == 7) { long temp; deltaAlignment.FirstSequenceStart = long.TryParse(deltaAlignmentProperties[0], out temp) ? temp : 0; deltaAlignment.FirstSequenceEnd = long.TryParse(deltaAlignmentProperties[1], out temp) ? temp : 0; deltaAlignment.SecondSequenceStart = long.TryParse(deltaAlignmentProperties[2], out temp) ? temp : 0; deltaAlignment.SecondSequenceEnd = long.TryParse(deltaAlignmentProperties[3], out temp) ? temp : 0; int error; deltaAlignment.Errors = int.TryParse(deltaAlignmentProperties[4], out error) ? error : 0; deltaAlignment.SimilarityErrors = int.TryParse(deltaAlignmentProperties[5], out error) ? error : 0; deltaAlignment.NonAlphas = int.TryParse(deltaAlignmentProperties[6], out error) ? error : 0; } // Fifth line - either a 0 - marks the end of the delta alignment or they are deltas while (line != null && !line.StartsWith("*", StringComparison.OrdinalIgnoreCase)) { long temp; if (long.TryParse(line, out temp)) { deltaAlignment.Deltas.Add(temp); } // Read next line. line = streamReader.ReadLine(); // Continue reading if blank line found. while (skipBlankLine && line != null && string.IsNullOrEmpty(line)) { line = streamReader.ReadLine(); } } yield return(deltaAlignment); // Read the next line line = streamReader.ReadLine(); }while (line != null); }
/// <summary> /// Refines alignment layout by taking in consideration indels (insertions and deletions) and rearrangements between two genomes. /// Requires mate-pair information to resolve ambiguity. /// </summary> /// <param name="orderedDeltas">Order deltas.</param> public static IEnumerable <DeltaAlignment> RefineLayout(DeltaAlignmentCollection orderedDeltas) { if (orderedDeltas == null) { throw new ArgumentNullException("orderedDeltas"); } if (orderedDeltas.Count == 0) { yield break; } // As we dont know what is the maximum posible insert and deltes, // assuming 1,000,000 deltas are sufficient for operation. int windowSize = 1000; VirtualDeltaAlignmentCollection deltaCatche = new VirtualDeltaAlignmentCollection(orderedDeltas, windowSize); List <DeltaAlignment> deltasOverlappingAtCurrentIndex = null; List <DeltaAlignment> leftSideDeltas = null; List <DeltaAlignment> rightSideDeltas = null; List <DeltaAlignment> unloadedDeltas = null; try { deltasOverlappingAtCurrentIndex = new List <DeltaAlignment>(); leftSideDeltas = new List <DeltaAlignment>(); rightSideDeltas = new List <DeltaAlignment>(); long currentProcessedOffset = 0; DeltaAlignment alignment = deltaCatche[0]; deltasOverlappingAtCurrentIndex.Add(alignment); DeltaAlignment deltaWithLargestEndIndex = alignment; for (int currentIndex = 0; currentIndex < deltaCatche.Count - 1; currentIndex++) { DeltaAlignment nextDelta = deltaCatche[currentIndex + 1]; unloadedDeltas = null; if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas)) { for (int i = 0; i < unloadedDeltas.Count; i++) { yield return(unloadedDeltas[i]); } unloadedDeltas.Clear(); } if (currentProcessedOffset != 0) { nextDelta.FirstSequenceStart += currentProcessedOffset; nextDelta.FirstSequenceEnd += currentProcessedOffset; } // Check if next delta is just adjacent if (nextDelta.FirstSequenceStart - 1 == deltaWithLargestEndIndex.FirstSequenceEnd) { // If next delta is adjacent there is a possible insertion in target (deletion in reference) // Try to extend the deltas from both sides and make them meet leftSideDeltas.Clear(); for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++) { DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index]; if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd) { leftSideDeltas.Add(delta); } } // Find all deltas starting at the adjacent right side rightSideDeltas.Clear(); for (long index = currentIndex + 1; index < deltaCatche.Count; index++) { DeltaAlignment delta = deltaCatche[index]; unloadedDeltas = null; if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas)) { for (int i = 0; i < unloadedDeltas.Count; i++) { yield return(unloadedDeltas[i]); } unloadedDeltas.Clear(); } if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart) { break; } rightSideDeltas.Add(delta); } long offset = ExtendDeltas(leftSideDeltas, rightSideDeltas); if (offset != 0) { nextDelta.FirstSequenceStart += offset; nextDelta.FirstSequenceEnd += offset; } currentProcessedOffset += offset; } else if (nextDelta.FirstSequenceStart <= deltaWithLargestEndIndex.FirstSequenceEnd) { // Check if next delta overlaps with current overlap group deltasOverlappingAtCurrentIndex.Add(nextDelta); // Check if nextDelta is reaching farther than the current farthest delta if (nextDelta.FirstSequenceEnd > deltaWithLargestEndIndex.FirstSequenceEnd) { deltaWithLargestEndIndex = nextDelta; } if (deltasOverlappingAtCurrentIndex.Count > windowSize) { for (int i = deltasOverlappingAtCurrentIndex.Count - 1; i >= 0; i--) { if (deltasOverlappingAtCurrentIndex[i].FirstSequenceEnd < deltaWithLargestEndIndex.FirstSequenceEnd) { deltasOverlappingAtCurrentIndex.RemoveAt(i); } } } } else { // No overlap with nextDelta, so there is a gap at the end of deltaWithLargestEndIndex // Try fix insertion in reference by pulling together two ends of deltas on both sides of the gap leftSideDeltas.Clear(); for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++) { DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index]; if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd) { leftSideDeltas.Add(delta); } } // Find all deltas starting at the right end of the gap rightSideDeltas.Clear(); for (long index = currentIndex + 1; index < deltaCatche.Count; index++) { DeltaAlignment delta = deltaCatche[index]; unloadedDeltas = null; if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas)) { for (int i = 0; i < unloadedDeltas.Count; i++) { yield return(unloadedDeltas[i]); } unloadedDeltas.Clear(); } if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart) { break; } rightSideDeltas.Add(delta); } int score = 0; for (int i = 0; i < leftSideDeltas.Count; i++) { var l = leftSideDeltas[i]; int j = 0; for (; j < rightSideDeltas.Count; j++) { var r = rightSideDeltas[j]; // if (object.ReferenceEquals(l.QuerySequence, r.QuerySequence)) // As reference check is not posible, verifying ids here. as id are unique for a given read. if (l.QuerySequence.ID == r.QuerySequence.ID) { score++; break; } } if (j == rightSideDeltas.Count) { score--; } } // Score > 0 means most deltas share same query sequence at both ends, so close this gap if (score > 0) { long gaplength = (nextDelta.FirstSequenceStart - deltaWithLargestEndIndex.FirstSequenceEnd) - 1; currentProcessedOffset -= gaplength; // Pull deltas on right side to close the gap for (int i = 0; i < rightSideDeltas.Count; i++) { DeltaAlignment delta = rightSideDeltas[i]; delta.FirstSequenceStart -= gaplength; delta.FirstSequenceEnd -= gaplength; // deltaCatche.Update(delta.Id); } } // Start a new group from the right side of the gap deltaWithLargestEndIndex = nextDelta; deltasOverlappingAtCurrentIndex.Clear(); deltasOverlappingAtCurrentIndex.Add(nextDelta); } } unloadedDeltas = deltaCatche.GetCachedDeltas(); for (int i = 0; i < unloadedDeltas.Count; i++) { yield return(unloadedDeltas[i]); } unloadedDeltas.Clear(); } finally { if (deltasOverlappingAtCurrentIndex != null) { deltasOverlappingAtCurrentIndex.Clear(); deltasOverlappingAtCurrentIndex = null; } if (leftSideDeltas != null) { leftSideDeltas.Clear(); leftSideDeltas = null; } if (rightSideDeltas != null) { rightSideDeltas.Clear(); rightSideDeltas = null; } if (deltaCatche != null) { deltaCatche = null; } } }
/// <summary> /// Extended Deltas. /// </summary> /// <param name="leftSideDeltas">Left Side Deltas.</param> /// <param name="rightSideDeltas">Right Side Deltas.</param> /// <returns>Returns Extend Deltas.</returns> private static long ExtendDeltas(List <DeltaAlignment> leftSideDeltas, List <DeltaAlignment> rightSideDeltas) { long extendedIndex = 1; int[] symbolCount = new int[255]; List <byte> leftExtension = new List <byte>(); List <byte> rightExtension = new List <byte>(); #region left extension // Left extension do { symbolCount['A'] = symbolCount['C'] = symbolCount['G'] = symbolCount['T'] = 0; // loop through all queries at current index and find symbol counts for (int index = 0; index < leftSideDeltas.Count; index++) { DeltaAlignment da = leftSideDeltas[index]; if (da.QuerySequence.Count > da.SecondSequenceEnd + extendedIndex) { char symbol = (char)da.QuerySequence[da.SecondSequenceEnd + extendedIndex]; symbolCount[char.ToUpperInvariant(symbol)]++; } } // no symbols at current position, then break; if (symbolCount['A'] == 0 && symbolCount['C'] == 0 && symbolCount['G'] == 0 && symbolCount['T'] == 0) { break; } // find symbol with max occurence byte indexLargest, indexSecond; FindLargestAndSecondLargest(symbolCount, out indexLargest, out indexSecond); // Dont extend if largest symbol count is higher than double of second largest symbol count if (symbolCount[indexSecond] > symbolCount[indexLargest] / 2) { return(0); } leftExtension.Add(indexLargest); // index will be the byte value of the appropriate symbol extendedIndex++; } while (true); #endregion #region Right extension // Right extension extendedIndex = 1; do { symbolCount['A'] = symbolCount['C'] = symbolCount['G'] = symbolCount['T'] = 0; // loop through all queries at current index and find symbol counts for (int index = 0; index < rightSideDeltas.Count; index++) { DeltaAlignment da = rightSideDeltas[index]; if (da.SecondSequenceStart - extendedIndex >= 0) { char symbol = (char)da.QuerySequence[da.SecondSequenceStart - extendedIndex]; symbolCount[char.ToUpperInvariant(symbol)]++; } } // no symbols at current position, then break; if (symbolCount['A'] == 0 && symbolCount['C'] == 0 && symbolCount['G'] == 0 && symbolCount['T'] == 0) { break; } // find symbol with max occurence byte indexLargest, indexSecond; FindLargestAndSecondLargest(symbolCount, out indexLargest, out indexSecond); // Dont extend if largest symbol count is higher than double of second largest symbol count if (symbolCount[indexSecond] > symbolCount[indexLargest] / 2) { return(0); } rightExtension.Insert(0, indexLargest); // index will be the byte value of the appropriate symbol extendedIndex++; } while (true); #endregion // One of the side cannot be extended, so cancel extension if (leftExtension.Count == 0 || rightExtension.Count == 0) { return(0); } int overlapStart = FindMaxOverlap(leftExtension, rightExtension); if (overlapStart == -1) { return(0); } else { // Update left side deltas for (int index = 0; index < leftSideDeltas.Count; index++) { var d = leftSideDeltas[index]; d.FirstSequenceEnd += (d.QuerySequence.Count - 1) - d.SecondSequenceEnd; d.SecondSequenceEnd = d.QuerySequence.Count - 1; } // Update right side deltas int toRightOffset = rightExtension.Count + overlapStart; for (int index = 0; index < rightSideDeltas.Count; index++) { var d = rightSideDeltas[index]; d.FirstSequenceStart += toRightOffset - d.SecondSequenceStart; // Subtracting as all these deltas will be processed in the outer loop d.FirstSequenceStart -= toRightOffset; d.SecondSequenceStart = 0; } return(toRightOffset); } }
/// <summary> /// Gets the error removed sequence from the delta. /// </summary> /// <param name="deltaAlignment">DeltaAlignment instance.</param> private static ISequence GetSequenceFromDelta(DeltaAlignment deltaAlignment) { int indelListIndex = 0; long indelIndex = 0; long nextIndelPosition = 0; long indelCount = deltaAlignment.Deltas.Count; if (indelListIndex < indelCount) { indelIndex = deltaAlignment.Deltas[indelListIndex++]; } nextIndelPosition = deltaAlignment.SecondSequenceStart - 1; nextIndelPosition += indelIndex >= 0 ? indelIndex : -indelIndex; long symbolsCount = deltaAlignment.SecondSequenceEnd - deltaAlignment.SecondSequenceStart + 1 + deltaAlignment.Deltas.Count(I => I > 0) - deltaAlignment.Deltas.Count(I => I < 0); long symbolIndex = 0; byte[] symbols = new byte[symbolsCount]; for (long index = deltaAlignment.SecondSequenceStart; index <= deltaAlignment.SecondSequenceEnd;) { if (indelIndex != 0 && index == nextIndelPosition) { if (indelIndex > 0) { // a symbol is deleted from the query, thus insert a gap symbol in query. symbols[symbolIndex] = AmbiguousDnaAlphabet.Instance.Gap; symbolIndex++; nextIndelPosition--; } else { // a symbol is inserted to query, thus delete the symbol from query. // skip one symbol from the query sequence. index++; } // Get nextIndelPosition. if (indelListIndex < indelCount) { indelIndex = deltaAlignment.Deltas[indelListIndex++]; } else { indelIndex = 0; } nextIndelPosition += indelIndex >= 0 ? indelIndex : -indelIndex; } else { symbols[symbolIndex] = deltaAlignment.QuerySequence[index]; symbolIndex++; index++; } } return(new Sequence(AmbiguousDnaAlphabet.Instance, symbols) { ID = deltaAlignment.QuerySequence.ID }); }
/// <summary> /// Generates consensus sequences from alignment layout. /// </summary> /// <param name="alignmentBetweenReferenceAndReads">Input list of reads.</param> /// <returns>List of contigs.</returns> public static IEnumerable <ISequence> GenerateConsensus(DeltaAlignmentCollection alignmentBetweenReferenceAndReads) { if (alignmentBetweenReferenceAndReads == null) { throw new ArgumentNullException("alignmentBetweenReferenceAndReads"); } SimpleConsensusResolver resolver = new SimpleConsensusResolver(AmbiguousDnaAlphabet.Instance, 49); // this dictionary will not grow more than a few hundread in worst scenario, // as this stores delta and its corresponding sequences Dictionary <DeltaAlignment, ISequence> deltasInCurrentContig = new Dictionary <DeltaAlignment, ISequence>(); long currentAlignmentStartOffset = 0; long currentIndex = 0; List <byte> currentContig = new List <byte>(); List <DeltaAlignment> deltasToRemove = new List <DeltaAlignment>(); // no deltas if (alignmentBetweenReferenceAndReads.Count == 0) { yield break; } long index = 0; DeltaAlignment lastDelta = alignmentBetweenReferenceAndReads[index]; do { // Starting a new contig if (deltasInCurrentContig.Count == 0) { currentAlignmentStartOffset = lastDelta.FirstSequenceStart; currentIndex = 0; currentContig.Clear(); } // loop through all deltas at current index and find consensus do { // Proceed creating consensus till we find another delta stats aligning while (lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex) { deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta)); // Get next delta index++; if (alignmentBetweenReferenceAndReads.Count > index) { lastDelta = alignmentBetweenReferenceAndReads[index]; continue; // see if new delta starts from the same offset } else { lastDelta = null; } } byte[] symbolsAtCurrentIndex = new byte[deltasInCurrentContig.Count]; int symbolCounter = 0; foreach (var delta in deltasInCurrentContig) { long inDeltaIndex = currentIndex - (delta.Key.FirstSequenceStart - currentAlignmentStartOffset); symbolsAtCurrentIndex[symbolCounter++] = delta.Value[inDeltaIndex]; if (inDeltaIndex == delta.Value.Count - 1) { deltasToRemove.Add(delta.Key); } } if (deltasToRemove.Count > 0) { for (int i = 0; i < deltasToRemove.Count; i++) { deltasInCurrentContig.Remove(deltasToRemove[i]); } deltasToRemove.Clear(); } byte consensusSymbol = resolver.GetConsensus(symbolsAtCurrentIndex); currentContig.Add(consensusSymbol); currentIndex++; // See if another delta is adjacent if (deltasInCurrentContig.Count == 0 && lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex) { deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta)); // check next delta index++; if (alignmentBetweenReferenceAndReads.Count > index) { lastDelta = alignmentBetweenReferenceAndReads[index]; continue; // read next delta to see if it starts from current reference sequence offset } else { lastDelta = null; } } }while (deltasInCurrentContig.Count > 0); yield return(new Sequence(AmbiguousDnaAlphabet.Instance, currentContig.ToArray(), false)); }while (lastDelta != null); }