/// <summary> /// Get the sequence item percentage with possibility of occurences /// </summary> /// <param name="aper">Percentage of A occurences</param> /// <param name="tper">Percentage of C occurences</param> /// <param name="gper">Percentage of G occurences</param> /// <param name="cper">Percentage of T occurences</param> /// <returns></returns> private static string GetMoreOccurences( double aper, double tper, double gper, double cper) { HashSet <byte> symbols = new HashSet <byte>(); if (aper > 0.45) { symbols.Add(Alphabets.DNA.A); } if (tper > 0.45) { symbols.Add(Alphabets.DNA.T); } if (gper > 0.45) { symbols.Add(Alphabets.DNA.G); } if (cper > 0.45) { symbols.Add(Alphabets.DNA.C); } byte item = resolver.GetConsensus(symbols.ToArray()); return(new string(new[] { (char)item })); }
/// <summary> /// Generates consensus sequences from alignment layout. /// </summary> /// <param name="alignmentBetweenReferenceAndReads">Input list of reads.</param> /// <returns>List of contigs.</returns> public static IEnumerable <ISequence> GenerateConsensus(DeltaAlignmentCollection alignmentBetweenReferenceAndReads) { if (alignmentBetweenReferenceAndReads == null) { throw new ArgumentNullException("alignmentBetweenReferenceAndReads"); } SimpleConsensusResolver resolver = new SimpleConsensusResolver(AmbiguousDnaAlphabet.Instance, 49); // this dictionary will not grow more than a few hundread in worst scenario, // as this stores delta and its corresponding sequences Dictionary <DeltaAlignment, ISequence> deltasInCurrentContig = new Dictionary <DeltaAlignment, ISequence>(); long currentAlignmentStartOffset = 0; long currentIndex = 0; long inDeltaIndex = 0; DeltaAlignment lastDelta; List <byte> currentContig = new List <byte>(); List <DeltaAlignment> deltasToRemove = new List <DeltaAlignment>(); // no deltas if (alignmentBetweenReferenceAndReads.Count == 0) { yield break; } long index = 0; lastDelta = alignmentBetweenReferenceAndReads[index]; do { // Starting a new contig if (deltasInCurrentContig.Count == 0) { currentAlignmentStartOffset = lastDelta.FirstSequenceStart; currentIndex = 0; currentContig.Clear(); } // loop through all deltas at current index and find consensus do { // Proceed creating consensus till we find another delta stats aligning while (lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex) { deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta)); // Get next delta index++; if (alignmentBetweenReferenceAndReads.Count > index) { lastDelta = alignmentBetweenReferenceAndReads[index]; continue; // see if new delta starts from the same offset } else { lastDelta = null; } } byte[] symbolsAtCurrentIndex = new byte[deltasInCurrentContig.Count]; int symbolCounter = 0; foreach (var delta in deltasInCurrentContig) { inDeltaIndex = currentIndex - (delta.Key.FirstSequenceStart - currentAlignmentStartOffset); symbolsAtCurrentIndex[symbolCounter++] = delta.Value[inDeltaIndex]; if (inDeltaIndex == delta.Value.Count - 1) { deltasToRemove.Add(delta.Key); } } if (deltasToRemove.Count > 0) { for (int i = 0; i < deltasToRemove.Count; i++) { deltasInCurrentContig.Remove(deltasToRemove[i]); } deltasToRemove.Clear(); } byte consensusSymbol = resolver.GetConsensus(symbolsAtCurrentIndex); currentContig.Add(consensusSymbol); currentIndex++; // See if another delta is adjacent if (deltasInCurrentContig.Count == 0 && lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex) { deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta)); // check next delta index++; if (alignmentBetweenReferenceAndReads.Count > index) { lastDelta = alignmentBetweenReferenceAndReads[index]; continue; // read next delta to see if it starts from current reference sequence offset } else { lastDelta = null; } } }while (deltasInCurrentContig.Count > 0); yield return(new Sequence(AmbiguousDnaAlphabet.Instance, currentContig.ToArray(), false)); }while (lastDelta != null); }
/// <summary> /// Generates consensus sequences from alignment layout. /// </summary> /// <param name="alignmentBetweenReferenceAndReads">Input list of reads.</param> /// <returns>List of contigs.</returns> public static IEnumerable <ISequence> GenerateConsensus(IEnumerable <DeltaAlignment> alignmentBetweenReferenceAndReads) { if (alignmentBetweenReferenceAndReads == null) { throw new ArgumentNullException("alignmentBetweenReferenceAndReads"); } SimpleConsensusResolver resolver = new SimpleConsensusResolver(AmbiguousDnaAlphabet.Instance); Dictionary <long, Sequence> outputSequences = new Dictionary <long, Sequence>(); Dictionary <DeltaAlignment, ISequence> deltasInCurrentContig = new Dictionary <DeltaAlignment, ISequence>(); IEnumerator <DeltaAlignment> deltaEnumerator = alignmentBetweenReferenceAndReads.GetEnumerator(); long currentAlignmentStartOffset = 0; long currentIndex = 0; long inDeltaIndex = 0; DeltaAlignment lastDelta; List <byte> currentContig = new List <byte>(); List <DeltaAlignment> deltasToRemove = new List <DeltaAlignment>(); // no deltas if (!deltaEnumerator.MoveNext()) { return(outputSequences.Values); } lastDelta = deltaEnumerator.Current; do { // Starting a new contig if (deltasInCurrentContig.Count == 0) { currentAlignmentStartOffset = lastDelta.FirstSequenceStart; currentIndex = 0; currentContig.Clear(); } // loop through all deltas at current index and find consensus do { // Proceed creating consensus till we find another delta stats aligning while (lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex) { deltasInCurrentContig.Add(lastDelta, lastDelta.QuerySequence.GetSubSequence(lastDelta.SecondSequenceStart, (lastDelta.SecondSequenceEnd - lastDelta.SecondSequenceStart) + 1)); // Get next delta if (deltaEnumerator.MoveNext()) { lastDelta = deltaEnumerator.Current; continue; // see if new delta starts from the same offset } else { lastDelta = null; } } byte[] symbolsAtCurrentIndex = new byte[deltasInCurrentContig.Count]; int symbolCounter = 0; foreach (var delta in deltasInCurrentContig) { inDeltaIndex = currentIndex - (delta.Key.FirstSequenceStart - currentAlignmentStartOffset); symbolsAtCurrentIndex[symbolCounter++] = delta.Value[inDeltaIndex]; if (inDeltaIndex == delta.Value.Count - 1) { deltasToRemove.Add(delta.Key); } } if (deltasToRemove.Count > 0) { foreach (var deltaToRemove in deltasToRemove) { deltasInCurrentContig.Remove(deltaToRemove); } deltasToRemove.Clear(); } byte consensusSymbol = resolver.GetConsensus(symbolsAtCurrentIndex); currentContig.Add(consensusSymbol); currentIndex++; // See if another delta is adjacent if (deltasInCurrentContig.Count == 0 && lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex) { deltasInCurrentContig.Add(lastDelta, lastDelta.QuerySequence.GetSubSequence(lastDelta.SecondSequenceStart, (lastDelta.SecondSequenceEnd - lastDelta.SecondSequenceStart) + 1)); // check next delta if (deltaEnumerator.MoveNext()) { lastDelta = deltaEnumerator.Current; continue; // read next delta to see if it starts from current reference sequence offset } else { lastDelta = null; } } }while (deltasInCurrentContig.Count > 0); outputSequences.Add(currentAlignmentStartOffset, new Sequence(AmbiguousDnaAlphabet.Instance, currentContig.ToArray(), false)); }while (lastDelta != null); return(outputSequences.Values); }
/// <summary> /// Generates consensus sequences from alignment layout. /// </summary> /// <param name="alignmentBetweenReferenceAndReads">Input list of reads.</param> /// <returns>List of contigs.</returns> public static IEnumerable<ISequence> GenerateConsensus(DeltaAlignmentCollection alignmentBetweenReferenceAndReads) { if (alignmentBetweenReferenceAndReads == null) { throw new ArgumentNullException("alignmentBetweenReferenceAndReads"); } SimpleConsensusResolver resolver = new SimpleConsensusResolver(AmbiguousDnaAlphabet.Instance, 49); // this dictionary will not grow more than a few hundread in worst scenario, // as this stores delta and its corresponding sequences Dictionary<DeltaAlignment, ISequence> deltasInCurrentContig = new Dictionary<DeltaAlignment, ISequence>(); long currentAlignmentStartOffset = 0; long currentIndex = 0; List<byte> currentContig = new List<byte>(); List<DeltaAlignment> deltasToRemove = new List<DeltaAlignment>(); // no deltas if (alignmentBetweenReferenceAndReads.Count == 0) { yield break; } long index = 0; DeltaAlignment lastDelta = alignmentBetweenReferenceAndReads[index]; do { // Starting a new contig if (deltasInCurrentContig.Count == 0) { currentAlignmentStartOffset = lastDelta.FirstSequenceStart; currentIndex = 0; currentContig.Clear(); } // loop through all deltas at current index and find consensus do { // Proceed creating consensus till we find another delta stats aligning while (lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex) { deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta)); // Get next delta index++; if (alignmentBetweenReferenceAndReads.Count > index) { lastDelta = alignmentBetweenReferenceAndReads[index]; continue; // see if new delta starts from the same offset } else { lastDelta = null; } } byte[] symbolsAtCurrentIndex = new byte[deltasInCurrentContig.Count]; int symbolCounter = 0; foreach (var delta in deltasInCurrentContig) { long inDeltaIndex = currentIndex - (delta.Key.FirstSequenceStart - currentAlignmentStartOffset); symbolsAtCurrentIndex[symbolCounter++] = delta.Value[inDeltaIndex]; if (inDeltaIndex == delta.Value.Count - 1) { deltasToRemove.Add(delta.Key); } } if (deltasToRemove.Count > 0) { for (int i = 0; i < deltasToRemove.Count; i++) { deltasInCurrentContig.Remove(deltasToRemove[i]); } deltasToRemove.Clear(); } byte consensusSymbol = resolver.GetConsensus(symbolsAtCurrentIndex); currentContig.Add(consensusSymbol); currentIndex++; // See if another delta is adjacent if (deltasInCurrentContig.Count == 0 && lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex) { deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta)); // check next delta index++; if (alignmentBetweenReferenceAndReads.Count > index) { lastDelta = alignmentBetweenReferenceAndReads[index]; continue; // read next delta to see if it starts from current reference sequence offset } else { lastDelta = null; } } } while (deltasInCurrentContig.Count > 0); yield return new Sequence(AmbiguousDnaAlphabet.Instance, currentContig.ToArray(), false); } while (lastDelta != null); }