Example #1
0
        /// <summary>
        /// Get the sequence item percentage with possibility of occurences
        /// </summary>
        /// <param name="aper">Percentage of A occurences</param>
        /// <param name="tper">Percentage of C occurences</param>
        /// <param name="gper">Percentage of G occurences</param>
        /// <param name="cper">Percentage of T occurences</param>
        /// <returns></returns>
        private static string GetMoreOccurences(
            double aper, double tper, double gper, double cper)
        {
            HashSet <byte> symbols = new HashSet <byte>();

            if (aper > 0.45)
            {
                symbols.Add(Alphabets.DNA.A);
            }

            if (tper > 0.45)
            {
                symbols.Add(Alphabets.DNA.T);
            }
            if (gper > 0.45)
            {
                symbols.Add(Alphabets.DNA.G);
            }
            if (cper > 0.45)
            {
                symbols.Add(Alphabets.DNA.C);
            }

            byte item = resolver.GetConsensus(symbols.ToArray());

            return(new string(new[] { (char)item }));
        }
Example #2
0
        /// <summary>
        /// Generates consensus sequences from alignment layout.
        /// </summary>
        /// <param name="alignmentBetweenReferenceAndReads">Input list of reads.</param>
        /// <returns>List of contigs.</returns>
        public static IEnumerable <ISequence> GenerateConsensus(DeltaAlignmentCollection alignmentBetweenReferenceAndReads)
        {
            if (alignmentBetweenReferenceAndReads == null)
            {
                throw new ArgumentNullException("alignmentBetweenReferenceAndReads");
            }

            SimpleConsensusResolver resolver = new SimpleConsensusResolver(AmbiguousDnaAlphabet.Instance, 49);

            // this dictionary will not grow more than a few hundread in worst scenario,
            // as this stores delta and its corresponding sequences
            Dictionary <DeltaAlignment, ISequence> deltasInCurrentContig = new Dictionary <DeltaAlignment, ISequence>();

            long           currentAlignmentStartOffset = 0;
            long           currentIndex = 0;
            long           inDeltaIndex = 0;
            DeltaAlignment lastDelta;

            List <byte>           currentContig  = new List <byte>();
            List <DeltaAlignment> deltasToRemove = new List <DeltaAlignment>();

            // no deltas
            if (alignmentBetweenReferenceAndReads.Count == 0)
            {
                yield break;
            }

            long index = 0;

            lastDelta = alignmentBetweenReferenceAndReads[index];
            do
            {
                // Starting a new contig
                if (deltasInCurrentContig.Count == 0)
                {
                    currentAlignmentStartOffset = lastDelta.FirstSequenceStart;
                    currentIndex = 0;
                    currentContig.Clear();
                }

                // loop through all deltas at current index and find consensus
                do
                {
                    // Proceed creating consensus till we find another delta stats aligning
                    while (lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta));

                        // Get next delta
                        index++;
                        if (alignmentBetweenReferenceAndReads.Count > index)
                        {
                            lastDelta = alignmentBetweenReferenceAndReads[index];
                            continue; // see if new delta starts from the same offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }

                    byte[] symbolsAtCurrentIndex = new byte[deltasInCurrentContig.Count];
                    int    symbolCounter         = 0;

                    foreach (var delta in deltasInCurrentContig)
                    {
                        inDeltaIndex = currentIndex - (delta.Key.FirstSequenceStart - currentAlignmentStartOffset);
                        symbolsAtCurrentIndex[symbolCounter++] = delta.Value[inDeltaIndex];

                        if (inDeltaIndex == delta.Value.Count - 1)
                        {
                            deltasToRemove.Add(delta.Key);
                        }
                    }

                    if (deltasToRemove.Count > 0)
                    {
                        for (int i = 0; i < deltasToRemove.Count; i++)
                        {
                            deltasInCurrentContig.Remove(deltasToRemove[i]);
                        }

                        deltasToRemove.Clear();
                    }

                    byte consensusSymbol = resolver.GetConsensus(symbolsAtCurrentIndex);
                    currentContig.Add(consensusSymbol);

                    currentIndex++;

                    // See if another delta is adjacent
                    if (deltasInCurrentContig.Count == 0 && lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta));

                        // check next delta
                        index++;
                        if (alignmentBetweenReferenceAndReads.Count > index)
                        {
                            lastDelta = alignmentBetweenReferenceAndReads[index];
                            continue; // read next delta to see if it starts from current reference sequence offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }
                }while (deltasInCurrentContig.Count > 0);

                yield return(new Sequence(AmbiguousDnaAlphabet.Instance, currentContig.ToArray(), false));
            }while (lastDelta != null);
        }
Example #3
0
        /// <summary>
        /// Generates consensus sequences from alignment layout.
        /// </summary>
        /// <param name="alignmentBetweenReferenceAndReads">Input list of reads.</param>
        /// <returns>List of contigs.</returns>
        public static IEnumerable <ISequence> GenerateConsensus(IEnumerable <DeltaAlignment> alignmentBetweenReferenceAndReads)
        {
            if (alignmentBetweenReferenceAndReads == null)
            {
                throw new ArgumentNullException("alignmentBetweenReferenceAndReads");
            }

            SimpleConsensusResolver                resolver              = new SimpleConsensusResolver(AmbiguousDnaAlphabet.Instance);
            Dictionary <long, Sequence>            outputSequences       = new Dictionary <long, Sequence>();
            Dictionary <DeltaAlignment, ISequence> deltasInCurrentContig = new Dictionary <DeltaAlignment, ISequence>();
            IEnumerator <DeltaAlignment>           deltaEnumerator       = alignmentBetweenReferenceAndReads.GetEnumerator();

            long           currentAlignmentStartOffset = 0;
            long           currentIndex = 0;
            long           inDeltaIndex = 0;
            DeltaAlignment lastDelta;

            List <byte>           currentContig  = new List <byte>();
            List <DeltaAlignment> deltasToRemove = new List <DeltaAlignment>();

            // no deltas
            if (!deltaEnumerator.MoveNext())
            {
                return(outputSequences.Values);
            }

            lastDelta = deltaEnumerator.Current;
            do
            {
                // Starting a new contig
                if (deltasInCurrentContig.Count == 0)
                {
                    currentAlignmentStartOffset = lastDelta.FirstSequenceStart;
                    currentIndex = 0;
                    currentContig.Clear();
                }

                // loop through all deltas at current index and find consensus
                do
                {
                    // Proceed creating consensus till we find another delta stats aligning
                    while (lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, lastDelta.QuerySequence.GetSubSequence(lastDelta.SecondSequenceStart, (lastDelta.SecondSequenceEnd - lastDelta.SecondSequenceStart) + 1));

                        // Get next delta
                        if (deltaEnumerator.MoveNext())
                        {
                            lastDelta = deltaEnumerator.Current;
                            continue; // see if new delta starts from the same offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }

                    byte[] symbolsAtCurrentIndex = new byte[deltasInCurrentContig.Count];
                    int    symbolCounter         = 0;

                    foreach (var delta in deltasInCurrentContig)
                    {
                        inDeltaIndex = currentIndex - (delta.Key.FirstSequenceStart - currentAlignmentStartOffset);
                        symbolsAtCurrentIndex[symbolCounter++] = delta.Value[inDeltaIndex];

                        if (inDeltaIndex == delta.Value.Count - 1)
                        {
                            deltasToRemove.Add(delta.Key);
                        }
                    }

                    if (deltasToRemove.Count > 0)
                    {
                        foreach (var deltaToRemove in deltasToRemove)
                        {
                            deltasInCurrentContig.Remove(deltaToRemove);
                        }

                        deltasToRemove.Clear();
                    }

                    byte consensusSymbol = resolver.GetConsensus(symbolsAtCurrentIndex);
                    currentContig.Add(consensusSymbol);

                    currentIndex++;

                    // See if another delta is adjacent
                    if (deltasInCurrentContig.Count == 0 && lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, lastDelta.QuerySequence.GetSubSequence(lastDelta.SecondSequenceStart, (lastDelta.SecondSequenceEnd - lastDelta.SecondSequenceStart) + 1));

                        // check next delta
                        if (deltaEnumerator.MoveNext())
                        {
                            lastDelta = deltaEnumerator.Current;
                            continue; // read next delta to see if it starts from current reference sequence offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }
                }while (deltasInCurrentContig.Count > 0);

                outputSequences.Add(currentAlignmentStartOffset, new Sequence(AmbiguousDnaAlphabet.Instance, currentContig.ToArray(), false));
            }while (lastDelta != null);

            return(outputSequences.Values);
        }
Example #4
0
        /// <summary>
        /// Generates consensus sequences from alignment layout.
        /// </summary>
        /// <param name="alignmentBetweenReferenceAndReads">Input list of reads.</param>
        /// <returns>List of contigs.</returns>
        public static IEnumerable<ISequence> GenerateConsensus(DeltaAlignmentCollection alignmentBetweenReferenceAndReads)
        {
            if (alignmentBetweenReferenceAndReads == null)
            {
                throw new ArgumentNullException("alignmentBetweenReferenceAndReads");
            }

            SimpleConsensusResolver resolver = new SimpleConsensusResolver(AmbiguousDnaAlphabet.Instance, 49);

            // this dictionary will not grow more than a few hundread in worst scenario,
            // as this stores delta and its corresponding sequences 
            Dictionary<DeltaAlignment, ISequence> deltasInCurrentContig = new Dictionary<DeltaAlignment, ISequence>();

            long currentAlignmentStartOffset = 0;
            long currentIndex = 0;

            List<byte> currentContig = new List<byte>();
            List<DeltaAlignment> deltasToRemove = new List<DeltaAlignment>();

            // no deltas
            if (alignmentBetweenReferenceAndReads.Count == 0)
            {
                yield break;
            }

            long index = 0;

            DeltaAlignment lastDelta = alignmentBetweenReferenceAndReads[index];
            do
            {
                // Starting a new contig
                if (deltasInCurrentContig.Count == 0)
                {
                    currentAlignmentStartOffset = lastDelta.FirstSequenceStart;
                    currentIndex = 0;
                    currentContig.Clear();
                }

                // loop through all deltas at current index and find consensus
                do
                {
                    // Proceed creating consensus till we find another delta stats aligning
                    while (lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta));

                        // Get next delta
                        index++;
                        if (alignmentBetweenReferenceAndReads.Count > index)
                        {
                            lastDelta = alignmentBetweenReferenceAndReads[index];
                            continue; // see if new delta starts from the same offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }

                    byte[] symbolsAtCurrentIndex = new byte[deltasInCurrentContig.Count];
                    int symbolCounter = 0;

                    foreach (var delta in deltasInCurrentContig)
                    {
                        long inDeltaIndex = currentIndex - (delta.Key.FirstSequenceStart - currentAlignmentStartOffset);
                        symbolsAtCurrentIndex[symbolCounter++] = delta.Value[inDeltaIndex];

                        if (inDeltaIndex == delta.Value.Count - 1)
                        {
                            deltasToRemove.Add(delta.Key);
                        }
                    }

                    if (deltasToRemove.Count > 0)
                    {
                        for (int i = 0; i < deltasToRemove.Count; i++)
                        {
                            deltasInCurrentContig.Remove(deltasToRemove[i]);
                        }

                        deltasToRemove.Clear();
                    }

                    byte consensusSymbol = resolver.GetConsensus(symbolsAtCurrentIndex);
                    currentContig.Add(consensusSymbol);

                    currentIndex++;

                    // See if another delta is adjacent
                    if (deltasInCurrentContig.Count == 0 && lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta));

                        // check next delta
                        index++;
                        if (alignmentBetweenReferenceAndReads.Count > index)
                        {
                            lastDelta = alignmentBetweenReferenceAndReads[index];
                            continue; // read next delta to see if it starts from current reference sequence offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }
                }
                while (deltasInCurrentContig.Count > 0);

                yield return new Sequence(AmbiguousDnaAlphabet.Instance, currentContig.ToArray(), false);
            }
            while (lastDelta != null);
        }