/// <summary> /// Find the previous eligible sequence for alignment/extension /// </summary> /// <param name="alignments">List of alignment</param> /// <param name="currentAlignment">Current alignment</param> /// <returns>Reverse alignment</returns> private DeltaAlignment GetPreviousAlignment( IEnumerable <DeltaAlignment> alignments, DeltaAlignment currentAlignment) { long alignmentFirstStart = currentAlignment.FirstSequenceStart; long alignmentSecondStart = currentAlignment.SecondSequenceStart; long distance = (alignmentFirstStart < alignmentSecondStart) ? alignmentFirstStart : alignmentSecondStart; DeltaAlignment deltaAlignment = alignments.Last(); foreach (DeltaAlignment alignment in alignments) { if (currentAlignment.QueryDirection == alignment.QueryDirection) { long alignmentFirstEnd = alignment.FirstSequenceEnd; long alignmentSecondEnd = alignment.SecondSequenceEnd; if (alignmentFirstEnd <= alignmentFirstStart && alignmentSecondEnd <= alignmentSecondStart) { long gapHigh; long gapLow; if ((alignmentFirstStart - alignmentFirstEnd) > (alignmentSecondStart - alignmentSecondEnd)) { gapHigh = alignmentFirstStart - alignmentFirstEnd; gapLow = alignmentSecondStart - alignmentSecondEnd; } else { gapLow = alignmentFirstStart - alignmentFirstEnd; gapHigh = alignmentSecondStart - alignmentSecondEnd; } if (gapHigh < BreakLength || ((gapLow * _nucmerAligner.ValidScore) + ((gapHigh - gapLow) * _nucmerAligner.GapExtensionScore)) >= 0) { deltaAlignment = alignment; break; } else if ((gapHigh << 1) - gapLow < distance) { deltaAlignment = alignment; distance = (gapHigh << 1) - gapLow; } } } } return(deltaAlignment); }
/// <summary> /// Find the previous eligible sequence for alignment/extension /// </summary> /// <param name="alignments">List of alignment</param> /// <param name="currentAlignment">Current alignment</param> /// <returns>Reverse alignment</returns> private DeltaAlignment GetPreviousAlignment( IList <DeltaAlignment> alignments, DeltaAlignment currentAlignment) { DeltaAlignment deltaAlignment = null; int alignmentFirstEnd, alignmentSecondEnd, gapHigh, gapLow; int alignmentFirstStart = currentAlignment.FirstSequenceStart; int alignmentSecondStart = currentAlignment.SecondSequenceStart; int distance = (alignmentFirstStart < alignmentSecondStart) ? alignmentFirstStart : alignmentSecondStart; deltaAlignment = null; foreach (DeltaAlignment alignment in alignments) { if (currentAlignment.QueryDirection == alignment.QueryDirection) { alignmentFirstEnd = alignment.FirstSequenceEnd; alignmentSecondEnd = alignment.SecondSequenceEnd; if (alignmentFirstEnd <= alignmentFirstStart && alignmentSecondEnd <= alignmentSecondStart) { if ((alignmentFirstStart - alignmentFirstEnd) > (alignmentSecondStart - alignmentSecondEnd)) { gapHigh = alignmentFirstStart - alignmentFirstEnd; gapLow = alignmentSecondStart - alignmentSecondEnd; } else { gapLow = alignmentFirstStart - alignmentFirstEnd; gapHigh = alignmentSecondStart - alignmentSecondEnd; } if (gapHigh < BreakLength || ((gapLow * nucmerAligner.ValidScore) + ((gapHigh - gapLow) * nucmerAligner.SubstitutionScore)) >= 0) { deltaAlignment = alignment; break; } else if ((gapHigh << 1) - gapLow < distance) { deltaAlignment = alignment; distance = (gapHigh << 1) - gapLow; } } } } return(deltaAlignment); }
/// <summary> /// Create a new delta alignment /// </summary> /// <param name="referenceSequence">Reference sequence</param> /// <param name="querySequence">Query sequence</param> /// <param name="cluster">Cluster object</param> /// <param name="match">Match object</param> /// <returns>Newly created DeltaAlignment object</returns> internal static DeltaAlignment NewAlignment( ISequence referenceSequence, ISequence querySequence, Cluster cluster, MatchExtension match) { DeltaAlignment deltaAlignment = new DeltaAlignment(referenceSequence, querySequence) { FirstSequenceStart = match.ReferenceSequenceOffset, SecondSequenceStart = match.QuerySequenceOffset, FirstSequenceEnd = match.ReferenceSequenceOffset + match.Length - 1, SecondSequenceEnd = match.QuerySequenceOffset + match.Length - 1, QueryDirection = cluster.QueryDirection }; return(deltaAlignment); }
/// <summary> /// Create a new delta alignment /// </summary> /// <param name="referenceSequence">Reference sequence</param> /// <param name="querySequence">Query sequence</param> /// <param name="cluster">Cluster object</param> /// <param name="match">Match object</param> /// <returns>Newly created DeltaAlignment object</returns> internal static DeltaAlignment NewAlignment( ISequence referenceSequence, ISequence querySequence, Cluster cluster, MaxUniqueMatchExtension match) { DeltaAlignment deltaAlignment = new DeltaAlignment(referenceSequence, querySequence); deltaAlignment.FirstSequenceStart = match.FirstSequenceStart; deltaAlignment.SecondSequenceStart = match.SecondSequenceStart; deltaAlignment.FirstSequenceEnd = match.FirstSequenceStart + match.Length - 1; deltaAlignment.SecondSequenceEnd = match.SecondSequenceStart + match.Length - 1; deltaAlignment.QueryDirection = cluster.QueryDirection; return(deltaAlignment); }
/// <summary> /// Check if the cluster is shadowed (contained in alignment) /// </summary> /// <param name="alignments">List of alignment</param> /// <param name="currentCluster">current cluster</param> /// <param name="currentDeltaAlignment">Current delta alignment</param> /// <returns>Is cluster contained in alignment</returns> private static bool IsClusterShadowed( List <DeltaAlignment> alignments, Cluster currentCluster, DeltaAlignment currentDeltaAlignment) { DeltaAlignment alignment; long firstSequenceStart = currentCluster.Matches.First().ReferenceSequenceOffset; long firstSequenceEnd = currentCluster.Matches.Last().ReferenceSequenceOffset + currentCluster.Matches.Last().Length - 1; long secondSequenceStart = currentCluster.Matches.First().QuerySequenceOffset; long secondSequenceEnd = currentCluster.Matches.Last().QuerySequenceOffset + currentCluster.Matches.Last().Length - 1; if (0 < alignments.Count) { int counter; for (counter = alignments.IndexOf(currentDeltaAlignment); counter >= 0; counter--) { alignment = alignments[counter]; if (alignment.QueryDirection == currentCluster.QueryDirection) { if ((alignment.FirstSequenceEnd >= firstSequenceEnd) && alignment.SecondSequenceEnd >= secondSequenceEnd && alignment.FirstSequenceStart <= firstSequenceStart && alignment.SecondSequenceStart <= secondSequenceStart) { break; } } } if (counter >= 0) { return(true); } } return(false); }
/// <summary> /// Extend the cluster backward /// </summary> /// <param name="referenceSequence">Reference sequence</param> /// <param name="querySequence">Query sequence</param> /// <param name="alignments">List of alignments</param> /// <param name="currentAlignment">current alignment object</param> /// <param name="targetAlignment">target alignment object</param> /// <returns>Was clusted extended backward</returns> private bool ExtendToPreviousSequence( ISequence referenceSequence, ISequence querySequence, IList<DeltaAlignment> alignments, DeltaAlignment currentAlignment, DeltaAlignment targetAlignment) { bool isOverflow = false; long targetReference; long targetQuery; int methodName = ModifiedSmithWaterman.BackwardAlignFlag; if (alignments.Last() != targetAlignment) { targetReference = targetAlignment.FirstSequenceEnd; targetQuery = targetAlignment.SecondSequenceEnd; } else { // If the target alignment is not found then extend till the // start of sequence (0th Symbol) targetReference = 0; targetQuery = 0; methodName |= ModifiedSmithWaterman.OptimalFlag; } // If the length in first sequence exceeds maximum length then extend // till score is optimized irrespective of length. if ((currentAlignment.FirstSequenceStart - targetReference + 1) > ModifiedSmithWaterman.MaximumAlignmentLength) { targetReference = currentAlignment.FirstSequenceStart - ModifiedSmithWaterman.MaximumAlignmentLength + 1; isOverflow = true; methodName |= ModifiedSmithWaterman.OptimalFlag; } // If the length in second sequence exceeds maximum length then extend // till score is optimized irrespective of length. if ((currentAlignment.SecondSequenceStart - targetQuery + 1) > ModifiedSmithWaterman.MaximumAlignmentLength) { targetQuery = currentAlignment.SecondSequenceStart - ModifiedSmithWaterman.MaximumAlignmentLength + 1; if (!isOverflow) { isOverflow = true; } methodName |= ModifiedSmithWaterman.OptimalFlag; } // Extend the sequence to previous sequence (aligned/extended sequence) bool isClusterExtended = _nucmerAligner.ExtendSequence( referenceSequence, currentAlignment.FirstSequenceStart, ref targetReference, querySequence, currentAlignment.SecondSequenceStart, ref targetQuery, currentAlignment.Deltas, methodName); if (isOverflow || alignments.Last() == targetAlignment) { isClusterExtended = false; } if (isClusterExtended) { // Extend the sequence to next sequence (aligned/extended sequence) ExtendToNextSequence( referenceSequence, querySequence, targetAlignment, currentAlignment.FirstSequenceStart, currentAlignment.SecondSequenceStart, ModifiedSmithWaterman.ForcedForwardAlignFlag); targetAlignment.FirstSequenceEnd = currentAlignment.FirstSequenceEnd; targetAlignment.SecondSequenceEnd = currentAlignment.SecondSequenceEnd; alignments.RemoveAt(alignments.Count - 1); } else { long startReference = currentAlignment.FirstSequenceStart; long startQuery = currentAlignment.SecondSequenceStart; _nucmerAligner.ExtendSequence( referenceSequence, targetReference, ref startReference, querySequence, targetQuery, ref startQuery, currentAlignment.Deltas, ModifiedSmithWaterman.ForcedForwardAlignFlag); currentAlignment.FirstSequenceStart = targetReference; currentAlignment.SecondSequenceStart = targetQuery; // Adjust the delta reference position foreach (int deltaPosition in currentAlignment.Deltas) { currentAlignment.DeltaReferencePosition += (deltaPosition > 0) ? deltaPosition : Math.Abs(deltaPosition) - 1; } } return isClusterExtended; }
/// <summary> /// Check if the cluster is shadowed (contained in alignment) /// </summary> /// <param name="alignments">List of alignment</param> /// <param name="currentCluster">current cluster</param> /// <param name="currentDeltaAlignment">Current delta alignment</param> /// <returns>Is cluster contained in alignment</returns> private static bool IsClusterShadowed( List<DeltaAlignment> alignments, Cluster currentCluster, DeltaAlignment currentDeltaAlignment) { DeltaAlignment alignment; long firstSequenceStart = currentCluster.Matches.First().ReferenceSequenceOffset; long firstSequenceEnd = currentCluster.Matches.Last().ReferenceSequenceOffset + currentCluster.Matches.Last().Length - 1; long secondSequenceStart = currentCluster.Matches.First().QuerySequenceOffset; long secondSequenceEnd = currentCluster.Matches.Last().QuerySequenceOffset + currentCluster.Matches.Last().Length - 1; if (0 < alignments.Count) { int counter; for (counter = alignments.IndexOf(currentDeltaAlignment); counter >= 0; counter--) { alignment = alignments[counter]; if (alignment.QueryDirection == currentCluster.QueryDirection) { if ((alignment.FirstSequenceEnd >= firstSequenceEnd) && alignment.SecondSequenceEnd >= secondSequenceEnd && alignment.FirstSequenceStart <= firstSequenceStart && alignment.SecondSequenceStart <= secondSequenceStart) { break; } } } if (counter >= 0) { return true; } } return false; }
/// <summary> /// Extend the cluster forward /// </summary> /// <param name="referenceSequence">Reference sequence</param> /// <param name="querySequence">Query sequence</param> /// <param name="currentAlignment">current alignment object</param> /// <param name="targetReference">target position in reference sequence</param> /// <param name="targetQuery">target position in query sequence</param> /// <param name="methodName">Name of the method to be implemented</param> /// <returns>Was cluster extended forward</returns> private bool ExtendToNextSequence( ISequence referenceSequence, ISequence querySequence, DeltaAlignment currentAlignment, long targetReference, long targetQuery, int methodName) { bool isOverflow = false; bool isDouble = false; int diagonal = currentAlignment.Deltas.Count; long referenceDistance = targetReference - currentAlignment.FirstSequenceEnd + 1; long queryDistance = targetQuery - currentAlignment.SecondSequenceEnd + 1; // If the length in first sequence exceeds maximum length then extend // till score is optimized irrespective of length. if (referenceDistance > ModifiedSmithWaterman.MaximumAlignmentLength) { targetReference = currentAlignment.FirstSequenceEnd + ModifiedSmithWaterman.MaximumAlignmentLength + 1; isOverflow = true; methodName |= ModifiedSmithWaterman.OptimalFlag; } // If the length in second sequence exceeds maximum length then extend // till score is optimized irrespective of length. if (queryDistance > ModifiedSmithWaterman.MaximumAlignmentLength) { targetQuery = currentAlignment.SecondSequenceEnd + ModifiedSmithWaterman.MaximumAlignmentLength + 1; if (isOverflow) { isDouble = true; } else { isOverflow = true; } methodName |= ModifiedSmithWaterman.OptimalFlag; } if (isDouble) { methodName &= ~ModifiedSmithWaterman.SeqendFlag; } // Extend the sequence to next sequence (aligned/extended sequence) bool isClusterExtended = _nucmerAligner.ExtendSequence( referenceSequence, currentAlignment.FirstSequenceEnd, ref targetReference, querySequence, currentAlignment.SecondSequenceEnd, ref targetQuery, currentAlignment.Deltas, methodName); if (isClusterExtended && isOverflow) { isClusterExtended = false; } if (diagonal < currentAlignment.Deltas.Count) { referenceDistance = (currentAlignment.FirstSequenceEnd - currentAlignment.FirstSequenceStart + 1) - currentAlignment.DeltaReferencePosition - 1; currentAlignment.Deltas[diagonal] += (currentAlignment.Deltas[diagonal] > 0) ? referenceDistance : -referenceDistance; // Adjust the delta reference position for (int index = diagonal; index < currentAlignment.Deltas.Count; index++) { int deltaPosition = (int)currentAlignment.Deltas[index]; currentAlignment.DeltaReferencePosition += (deltaPosition > 0) ? deltaPosition : Math.Abs(deltaPosition) - 1; } } currentAlignment.FirstSequenceEnd = targetReference; currentAlignment.SecondSequenceEnd = targetQuery; return(isClusterExtended); }
/// <summary> /// Extend the cluster backward /// </summary> /// <param name="referenceSequence">Reference sequence</param> /// <param name="querySequence">Query sequence</param> /// <param name="alignments">List of alignments</param> /// <param name="currentAlignment">current alignment object</param> /// <param name="targetAlignment">target alignment object</param> /// <returns>Was clusted extended backward</returns> private bool ExtendToPreviousSequence( ISequence referenceSequence, ISequence querySequence, IList <DeltaAlignment> alignments, DeltaAlignment currentAlignment, DeltaAlignment targetAlignment) { bool isOverflow = false; long targetReference; long targetQuery; int methodName = ModifiedSmithWaterman.BackwardAlignFlag; if (alignments.Last() != targetAlignment) { targetReference = targetAlignment.FirstSequenceEnd; targetQuery = targetAlignment.SecondSequenceEnd; } else { // If the target alignment is not found then extend till the // start of sequence (0th Symbol) targetReference = 0; targetQuery = 0; methodName |= ModifiedSmithWaterman.OptimalFlag; } // If the length in first sequence exceeds maximum length then extend // till score is optimized irrespective of length. if ((currentAlignment.FirstSequenceStart - targetReference + 1) > ModifiedSmithWaterman.MaximumAlignmentLength) { targetReference = currentAlignment.FirstSequenceStart - ModifiedSmithWaterman.MaximumAlignmentLength + 1; isOverflow = true; methodName |= ModifiedSmithWaterman.OptimalFlag; } // If the length in second sequence exceeds maximum length then extend // till score is optimized irrespective of length. if ((currentAlignment.SecondSequenceStart - targetQuery + 1) > ModifiedSmithWaterman.MaximumAlignmentLength) { // //targetQuery = currentAlignment.SecondSequenceStart = ModifiedSmithWaterman.MaximumAlignmentLength + 1; targetQuery = currentAlignment.SecondSequenceStart - ModifiedSmithWaterman.MaximumAlignmentLength + 1; if (!isOverflow) { isOverflow = true; } methodName |= ModifiedSmithWaterman.OptimalFlag; } // Extend the sequence to previous sequence (aligned/extended sequence) bool isClusterExtended = _nucmerAligner.ExtendSequence( referenceSequence, currentAlignment.FirstSequenceStart, ref targetReference, querySequence, currentAlignment.SecondSequenceStart, ref targetQuery, currentAlignment.Deltas, methodName); if (isOverflow || alignments.Last() == targetAlignment) { isClusterExtended = false; } if (isClusterExtended) { // Extend the sequence to next sequence (aligned/extended sequence) ExtendToNextSequence( referenceSequence, querySequence, targetAlignment, currentAlignment.FirstSequenceStart, currentAlignment.SecondSequenceStart, ModifiedSmithWaterman.ForcedForwardAlignFlag); targetAlignment.FirstSequenceEnd = currentAlignment.FirstSequenceEnd; targetAlignment.SecondSequenceEnd = currentAlignment.SecondSequenceEnd; alignments.RemoveAt(alignments.Count - 1); } else { long startReference = currentAlignment.FirstSequenceStart; long startQuery = currentAlignment.SecondSequenceStart; _nucmerAligner.ExtendSequence( referenceSequence, targetReference, ref startReference, querySequence, targetQuery, ref startQuery, currentAlignment.Deltas, ModifiedSmithWaterman.ForcedForwardAlignFlag); currentAlignment.FirstSequenceStart = targetReference; currentAlignment.SecondSequenceStart = targetQuery; // Adjust the delta reference position foreach (int deltaPosition in currentAlignment.Deltas) { currentAlignment.DeltaReferencePosition += (deltaPosition > 0) ? deltaPosition : Math.Abs(deltaPosition) - 1; } } return(isClusterExtended); }
/// <summary> /// Extend the cluster in synteny /// </summary> /// <param name="synteny">Synteny in which cluster needs to be extened.</param> /// <returns>List of delta alignments</returns> private List <DeltaAlignment> ExtendClusters(Synteny synteny) { bool isClusterExtended = false; List <DeltaAlignment> deltaAlignments = new List <DeltaAlignment>(); DeltaAlignment deltaAlignment = null; Cluster currentCluster; IList <Cluster> clusters = synteny.Clusters; // Sort the cluster by first sequence start clusters = SortCluster(clusters, FirstSequenceStart); Cluster targetCluster = synteny.Clusters.Last(); IEnumerator <Cluster> previousCluster = clusters.GetEnumerator(); previousCluster.MoveNext(); IEnumerator <Cluster> cluster = clusters.GetEnumerator(); while (cluster.MoveNext()) { currentCluster = cluster.Current; if (!isClusterExtended && (currentCluster.IsFused || IsClusterShadowed(deltaAlignments, currentCluster, deltaAlignment))) { currentCluster.IsFused = true; previousCluster.MoveNext(); currentCluster = previousCluster.Current; continue; } // Extend the match foreach (MatchExtension match in currentCluster.Matches) { if (isClusterExtended) { if (deltaAlignment.FirstSequenceEnd != match.ReferenceSequenceOffset || deltaAlignment.SecondSequenceEnd != match.QuerySequenceOffset) { continue; } deltaAlignment.FirstSequenceEnd += match.Length - 1; deltaAlignment.SecondSequenceEnd += match.Length - 1; } else { //TODO: Do we need sequence here? Changed to sequence id. deltaAlignment = DeltaAlignment.NewAlignment( synteny.ReferenceSequence, synteny.QuerySequence, currentCluster, match); deltaAlignments.Add(deltaAlignment); // Find the MUM which is a good candidate for extension in reverse direction DeltaAlignment targetAlignment = GetPreviousAlignment(deltaAlignments, deltaAlignment); if (targetAlignment != deltaAlignment && //TODO: NEED TO VERIFY THIS!!! //if ( ExtendToPreviousSequence( synteny.ReferenceSequence, synteny.QuerySequence, deltaAlignments, deltaAlignment, targetAlignment)) { deltaAlignment = targetAlignment; } } int methodName = ModifiedSmithWaterman.ForwardAlignFlag; long targetReference; long targetQuery; if (currentCluster.Matches.IndexOf(match) < currentCluster.Matches.Count - 1) { // extend till the match in the current cluster MatchExtension nextMatch = currentCluster.Matches[currentCluster.Matches.IndexOf(match) + 1]; targetReference = nextMatch.ReferenceSequenceOffset; targetQuery = nextMatch.QuerySequenceOffset; isClusterExtended = ExtendToNextSequence( synteny.ReferenceSequence, synteny.QuerySequence, deltaAlignment, targetReference, targetQuery, methodName); } else { // extend till next cluster targetReference = synteny.ReferenceSequence.Count - 1; targetQuery = synteny.QuerySequence.Count() - 1; targetCluster = GetNextCluster( clusters, currentCluster, ref targetReference, ref targetQuery); if (!synteny.Clusters.Contains(targetCluster)) { methodName |= ModifiedSmithWaterman.OptimalFlag; } isClusterExtended = ExtendToNextSequence( synteny.ReferenceSequence, synteny.QuerySequence, deltaAlignment, targetReference, targetQuery, methodName); } } if (!synteny.Clusters.Contains(targetCluster)) { isClusterExtended = false; } currentCluster.IsFused = true; if (!isClusterExtended) { previousCluster.MoveNext(); currentCluster = previousCluster.Current; } else { currentCluster = targetCluster; } } return(deltaAlignments); }
/// <summary> /// Gets the DeltaAlignment at specified position of the file. /// </summary> /// <param name="position">Position at which delta alignment is required.</param> /// <returns>Delta alignment.</returns> public DeltaAlignment GetDeltaAlignmentAt(long position) { using (var reader = this.deltaStream.OpenRead()) { long deltaPosition = -1; string line = ReadNextLine(reader); if (line == null || !line.StartsWith("@", StringComparison.OrdinalIgnoreCase)) { throw new FormatException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.CorruptedDeltaAlignmentFile, position)); } deltaPosition = long.Parse(line.Substring(1), CultureInfo.InvariantCulture); if (position != deltaPosition) { throw new FormatException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.DeltaAlignmentIDDoesnotMatch, deltaPosition, position)); } line = ReadNextLine(reader); if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase)) { throw new Exception(Properties.Resource.INVALID_INPUT_FILE); } string referenceId = line.Substring(1); // Read next line. line = ReadNextLine(reader); // Second line - Query sequence id string queryId = line; // fetch the query sequence from the query file ISequence querySequence = null; Sequence refEmpty = null; if (!string.IsNullOrEmpty(queryId)) { // Get the id and remove any alphas - this can happen because the delta might // have "Reverse" appended to it when it's a reversed sequence. string id = queryId.Substring(queryId.LastIndexOf('@') + 1); int idx = Array.FindIndex(id.ToCharArray(), c => !Char.IsDigit(c)); if (idx > 0) id = id.Substring(0, idx); long sequencePosition = long.Parse(id, CultureInfo.InvariantCulture); querySequence = this.QueryParser.GetSequenceAt(sequencePosition); refEmpty = new Sequence(querySequence.Alphabet, "A", false) { ID = referenceId }; } DeltaAlignment deltaAlignment = new DeltaAlignment(refEmpty, querySequence) { Id = deltaPosition }; line = ReadNextLine(reader); string[] deltaAlignmentProperties = line.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); if (deltaAlignmentProperties != null && deltaAlignmentProperties.Length == 7) { long temp; deltaAlignment.FirstSequenceStart = long.TryParse(deltaAlignmentProperties[0], out temp) ? temp : 0; deltaAlignment.FirstSequenceEnd = long.TryParse(deltaAlignmentProperties[1], out temp) ? temp : 0; deltaAlignment.SecondSequenceStart = long.TryParse(deltaAlignmentProperties[2], out temp) ? temp : 0; deltaAlignment.SecondSequenceEnd = long.TryParse(deltaAlignmentProperties[3], out temp) ? temp : 0; // Look for a reversed sequence if (deltaAlignment.SecondSequenceEnd < deltaAlignment.SecondSequenceStart) { temp = deltaAlignment.SecondSequenceEnd; deltaAlignment.SecondSequenceEnd = deltaAlignment.SecondSequenceStart; deltaAlignment.SecondSequenceStart = temp; deltaAlignment.QueryDirection = Cluster.ReverseDirection; } int error; deltaAlignment.Errors = int.TryParse(deltaAlignmentProperties[4], out error) ? error : 0; deltaAlignment.SimilarityErrors = int.TryParse(deltaAlignmentProperties[5], out error) ? error : 0; deltaAlignment.NonAlphas = int.TryParse(deltaAlignmentProperties[6], out error) ? error : 0; } // Fifth line - either a 0 - marks the end of the delta alignment or they are deltas while (line != null && !line.StartsWith("*", StringComparison.OrdinalIgnoreCase)) { long temp; if (long.TryParse(line, out temp)) { deltaAlignment.Deltas.Add(temp); } // Read next line. line = reader.ReadLine(); // Continue reading if blank line found. while (line != null && string.IsNullOrEmpty(line)) { line = reader.ReadLine(); } } return deltaAlignment; } }
public void ValidateDeltaAlignmentToString() { ISequence refSeq = new Sequence(Alphabets.DNA, "ATCGGGGGGGGAAAAAAATTTTCCCCGGGGG"); ISequence qrySeq = new Sequence(Alphabets.DNA, "GGGGG"); var delta = new DeltaAlignment(refSeq, qrySeq) {FirstSequenceEnd = 21, SecondSequenceEnd = 20}; string actualString = delta.ToString(); string expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.DeltaAlignmentExpectedNode); Assert.AreEqual(expectedString, actualString); // Gets the expected sequence from the Xml List<ISequence> seqsList; string filePath = this.utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.FilePathNode); using (var reader = File.OpenRead(filePath)) { var parser = new FastAParser(); { parser.Alphabet = Alphabets.Protein; seqsList = parser.Parse(reader).ToList(); } } delta = new DeltaAlignment(seqsList[0], qrySeq) {FirstSequenceEnd = 21, SecondSequenceStart = 20, QueryDirection = Cluster.ReverseDirection}; actualString = delta.ToString(); expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.DeltaAlignmentExpected2Node); Assert.AreEqual(expectedString, actualString); }
/// <summary> /// Create a new delta alignment /// </summary> /// <param name="referenceSequence">Reference sequence</param> /// <param name="querySequence">Query sequence</param> /// <param name="cluster">Cluster object</param> /// <param name="match">Match object</param> /// <returns>Newly created DeltaAlignment object</returns> internal static DeltaAlignment NewAlignment( ISequence referenceSequence, ISequence querySequence, Cluster cluster, MatchExtension match) { DeltaAlignment deltaAlignment = new DeltaAlignment(referenceSequence, querySequence) { FirstSequenceStart = match.ReferenceSequenceOffset, SecondSequenceStart = match.QuerySequenceOffset, FirstSequenceEnd = match.ReferenceSequenceOffset + match.Length - 1, SecondSequenceEnd = match.QuerySequenceOffset + match.Length - 1, QueryDirection = cluster.QueryDirection }; return deltaAlignment; }
/// <summary> /// Gets the error removed sequence from the delta. /// </summary> /// <param name="deltaAlignment">DeltaAlignment instance.</param> private static ISequence GetSequenceFromDelta(DeltaAlignment deltaAlignment) { int indelListIndex = 0; long indelIndex = 0; long nextIndelPosition = 0; long indelCount = deltaAlignment.Deltas.Count; if (indelListIndex < indelCount) { indelIndex = deltaAlignment.Deltas[indelListIndex++]; } nextIndelPosition = deltaAlignment.SecondSequenceStart - 1; nextIndelPosition += indelIndex >= 0 ? indelIndex : -indelIndex; long symbolsCount = deltaAlignment.SecondSequenceEnd - deltaAlignment.SecondSequenceStart + 1 + deltaAlignment.Deltas.Count(I => I > 0) - deltaAlignment.Deltas.Count(I => I < 0); long symbolIndex = 0; byte[] symbols = new byte[symbolsCount]; for (long index = deltaAlignment.SecondSequenceStart; index <= deltaAlignment.SecondSequenceEnd; ) { if (indelIndex != 0 && index == nextIndelPosition) { if (indelIndex > 0) { // a symbol is deleted from the query, thus insert a gap symbol in query. symbols[symbolIndex] = AmbiguousDnaAlphabet.Instance.Gap; symbolIndex++; nextIndelPosition--; } else { // a symbol is inserted to query, thus delete the symbol from query. // skip one symbol from the query sequence. index++; } // Get nextIndelPosition. if (indelListIndex < indelCount) { indelIndex = deltaAlignment.Deltas[indelListIndex++]; } else { indelIndex = 0; } nextIndelPosition += indelIndex >= 0 ? indelIndex : -indelIndex; } else { symbols[symbolIndex] = deltaAlignment.QuerySequence[index]; symbolIndex++; index++; } } return new Sequence(AmbiguousDnaAlphabet.Instance, symbols) { ID = deltaAlignment.QuerySequence.ID }; }
public void TestDeltaAlignmentToString() { ISequence refSeq = new Sequence(Alphabets.DNA, "ATCGGGGGGGGAAAAAAATTTTCCCCGGGGG"); ISequence qrySeq = new Sequence(Alphabets.DNA, "GGGGG"); DeltaAlignment delta = new DeltaAlignment(refSeq, qrySeq); delta.FirstSequenceEnd = 21; delta.SecondSequenceEnd = 20; string actualString = delta.ToString(); string expectedString = "Ref ID= Query Id= Ref start=0 Ref End=21 Query start=0 Query End=20, Direction=FORWARD"; Assert.AreEqual(actualString, expectedString); }
/// <summary> /// Extend the cluster forward /// </summary> /// <param name="referenceSequence">Reference sequence</param> /// <param name="querySequence">Query sequence</param> /// <param name="currentAlignment">current alignment object</param> /// <param name="targetReference">target position in reference sequence</param> /// <param name="targetQuery">target position in query sequence</param> /// <param name="methodName">Name of the method to be implemented</param> /// <returns>Was cluster extended forward</returns> private bool ExtendToNextSequence( ISequence referenceSequence, ISequence querySequence, DeltaAlignment currentAlignment, long targetReference, long targetQuery, int methodName) { bool isOverflow = false; bool isDouble = false; int diagonal = currentAlignment.Deltas.Count; long referenceDistance = targetReference - currentAlignment.FirstSequenceEnd + 1; long queryDistance = targetQuery - currentAlignment.SecondSequenceEnd + 1; // If the length in first sequence exceeds maximum length then extend // till score is optimized irrespective of length. if (referenceDistance > ModifiedSmithWaterman.MaximumAlignmentLength) { targetReference = currentAlignment.FirstSequenceEnd + ModifiedSmithWaterman.MaximumAlignmentLength + 1; isOverflow = true; methodName |= ModifiedSmithWaterman.OptimalFlag; } // If the length in second sequence exceeds maximum length then extend // till score is optimized irrespective of length. if (queryDistance > ModifiedSmithWaterman.MaximumAlignmentLength) { targetQuery = currentAlignment.SecondSequenceEnd + ModifiedSmithWaterman.MaximumAlignmentLength + 1; if (isOverflow) { isDouble = true; } else { isOverflow = true; } methodName |= ModifiedSmithWaterman.OptimalFlag; } if (isDouble) { methodName &= ~ModifiedSmithWaterman.SeqendFlag; } // Extend the sequence to next sequence (aligned/extended sequence) bool isClusterExtended = _nucmerAligner.ExtendSequence( referenceSequence, currentAlignment.FirstSequenceEnd, ref targetReference, querySequence, currentAlignment.SecondSequenceEnd, ref targetQuery, currentAlignment.Deltas, methodName); if (isClusterExtended && isOverflow) { isClusterExtended = false; } if (diagonal < currentAlignment.Deltas.Count) { referenceDistance = (currentAlignment.FirstSequenceEnd - currentAlignment.FirstSequenceStart + 1) - currentAlignment.DeltaReferencePosition - 1; currentAlignment.Deltas[diagonal] += (currentAlignment.Deltas[diagonal] > 0) ? referenceDistance : -referenceDistance; // Adjust the delta reference position for (int index = diagonal; index < currentAlignment.Deltas.Count;index++ ) { int deltaPosition = (int)currentAlignment.Deltas[index]; currentAlignment.DeltaReferencePosition += (deltaPosition > 0) ? deltaPosition : Math.Abs(deltaPosition) - 1; } } currentAlignment.FirstSequenceEnd = targetReference; currentAlignment.SecondSequenceEnd = targetQuery; return isClusterExtended; }
/// <summary> /// Find the previous eligible sequence for alignment/extension /// </summary> /// <param name="alignments">List of alignment</param> /// <param name="currentAlignment">Current alignment</param> /// <returns>Reverse alignment</returns> private DeltaAlignment GetPreviousAlignment( IEnumerable<DeltaAlignment> alignments, DeltaAlignment currentAlignment) { long alignmentFirstStart = currentAlignment.FirstSequenceStart; long alignmentSecondStart = currentAlignment.SecondSequenceStart; long distance = (alignmentFirstStart < alignmentSecondStart) ? alignmentFirstStart : alignmentSecondStart; DeltaAlignment deltaAlignment = alignments.Last(); foreach (DeltaAlignment alignment in alignments) { if (currentAlignment.QueryDirection == alignment.QueryDirection) { long alignmentFirstEnd = alignment.FirstSequenceEnd; long alignmentSecondEnd = alignment.SecondSequenceEnd; if (alignmentFirstEnd <= alignmentFirstStart && alignmentSecondEnd <= alignmentSecondStart) { long gapHigh; long gapLow; if ((alignmentFirstStart - alignmentFirstEnd) > (alignmentSecondStart - alignmentSecondEnd)) { gapHigh = alignmentFirstStart - alignmentFirstEnd; gapLow = alignmentSecondStart - alignmentSecondEnd; } else { gapLow = alignmentFirstStart - alignmentFirstEnd; gapHigh = alignmentSecondStart - alignmentSecondEnd; } if (gapHigh < BreakLength || ((gapLow * _nucmerAligner.ValidScore) + ((gapHigh - gapLow) * _nucmerAligner.GapExtensionScore)) >= 0) { deltaAlignment = alignment; break; } else if ((gapHigh << 1) - gapLow < distance) { deltaAlignment = alignment; distance = (gapHigh << 1) - gapLow; } } } } return deltaAlignment; }
/// <summary> /// Starts parsing from the specified StreamReader. /// </summary> /// <param name="streamReader">Stream reader to parse.</param> /// <returns>IEnumerable of DeltaAlignments.</returns> private IEnumerable<DeltaAlignment> ParseFrom(StreamReader streamReader) { this.parsingReaders.Add(streamReader); string lastReadQuerySequenceId = string.Empty; ISequence sequence = null; if (streamReader.EndOfStream) { throw new Exception(Properties.Resource.INVALID_INPUT_FILE); } string line = ReadNextLine(streamReader); do { if (line == null || !line.StartsWith("@", StringComparison.OrdinalIgnoreCase)) { throw new Exception(Properties.Resource.INVALID_INPUT_FILE); } long deltaPosition = long.Parse(line.Substring(1)); line = ReadNextLine(streamReader); if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase)) { throw new Exception(Properties.Resource.INVALID_INPUT_FILE); } DeltaAlignment deltaAlignment = null; // First line - reference id string referenceId = line.Substring(1); // Read next line. line = ReadNextLine(streamReader); // Second line - Query sequence id string queryId = line; // fetch the query sequence from the query file if (!string.IsNullOrEmpty(queryId)) { if (queryId != lastReadQuerySequenceId) { // Get the id and remove any alphas - this can happen because the delta might // have "Reverse" appended to it when it's a reversed sequence. string id = queryId.Substring(queryId.LastIndexOf('@') + 1); int idx = Array.FindIndex(id.ToCharArray(), c => !Char.IsDigit(c)); if (idx > 0) id = id.Substring(0, idx); long seqPosition = long.Parse(id, CultureInfo.InvariantCulture); sequence = this.QueryParser.GetSequenceAt(seqPosition); lastReadQuerySequenceId = queryId; } Sequence refEmpty = new Sequence(sequence.Alphabet, "A", false) {ID = referenceId}; deltaAlignment = new DeltaAlignment(refEmpty, sequence); } deltaAlignment.Id = deltaPosition; // Fourth line - properties of delta alignment // Read next line. line = ReadNextLine(streamReader); string[] deltaAlignmentProperties = line.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); if (deltaAlignmentProperties != null && deltaAlignmentProperties.Length == 7) { long temp; deltaAlignment.FirstSequenceStart = long.TryParse(deltaAlignmentProperties[0], out temp) ? temp : 0; deltaAlignment.FirstSequenceEnd = long.TryParse(deltaAlignmentProperties[1], out temp) ? temp : 0; deltaAlignment.SecondSequenceStart = long.TryParse(deltaAlignmentProperties[2], out temp) ? temp : 0; deltaAlignment.SecondSequenceEnd = long.TryParse(deltaAlignmentProperties[3], out temp) ? temp : 0; // Look for a reversed sequence if (deltaAlignment.SecondSequenceEnd < deltaAlignment.SecondSequenceStart) { temp = deltaAlignment.SecondSequenceEnd; deltaAlignment.SecondSequenceEnd = deltaAlignment.SecondSequenceStart; deltaAlignment.SecondSequenceStart = temp; deltaAlignment.QueryDirection = Cluster.ReverseDirection; } int error; deltaAlignment.Errors = int.TryParse(deltaAlignmentProperties[4], out error) ? error : 0; deltaAlignment.SimilarityErrors = int.TryParse(deltaAlignmentProperties[5], out error) ? error : 0; deltaAlignment.NonAlphas = int.TryParse(deltaAlignmentProperties[6], out error) ? error : 0; } // Fifth line - either a 0 - marks the end of the delta alignment or they are deltas while (line != null && !line.StartsWith("*", StringComparison.OrdinalIgnoreCase)) { long temp; if (long.TryParse(line, out temp)) { deltaAlignment.Deltas.Add(temp); } // Read next line. line = streamReader.ReadLine(); // Continue reading if blank line found. while (line != null && string.IsNullOrEmpty(line)) { line = streamReader.ReadLine(); } } yield return deltaAlignment; // Read the next line line = streamReader.ReadLine(); } while (line != null); }