Пример #1
0
        /// <summary>
        /// Get chromoses with orphan regions
        /// </summary>
        /// <param name="filename">Path of the BAM file</param>
        /// <param name="mean">Mean value</param>
        /// <param name="deviation">Standard deviation</param>
        /// <returns></returns>
        private void DisplayOrphans(string filename)
        {
            SequenceAlignmentMap alignmentMapobj = null;

            if (!SAMInput)
            {
                BAMParser bamParser = new BAMParser();
                alignmentMapobj = bamParser.Parse(filename);
            }
            else
            {
                SAMParser samParser = new SAMParser();
                alignmentMapobj = samParser.Parse(filename);
            }

            // get reads from sequence alignment map object.
            IList <PairedRead> pairedReads = null;

            // Get Aligned sequences
            IList <SAMAlignedSequence> alignedSeqs = alignmentMapobj.QuerySequences;

            pairedReads = alignmentMapobj.GetPairedReads(0, 0);


            // Get the orphan regions.
            var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan);

            if (orphans.Count() == 0)
            {
                Console.WriteLine("No Orphans to display");
            }

            List <ISequenceRange> orphanRegions = new List <ISequenceRange>(orphans.Count());

            foreach (PairedRead orphanRead in orphans)
            {
                orphanRegions.Add(GetRegion(orphanRead.Read1));
            }

            // Get sequence range grouping object.
            SequenceRangeGrouping rangeGroup = new SequenceRangeGrouping(orphanRegions);

            if (rangeGroup.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Orphan reads to display");
            }
            else
            {
                Console.Write("Region of Orphan reads:");
                DisplaySequenceRange(rangeGroup);
            }

            SequenceRangeGrouping mergedRegions = rangeGroup.MergeOverlaps();

            if (mergedRegions.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo hot spots to display");
            }
            else
            {
                Console.Write("\r\nChromosomal hot spot:");
                DisplaySequenceRange(mergedRegions);
            }
        }
Пример #2
0
        /// <summary>
        /// Indentify hot spot chromosomes for length anamoly regions.
        /// </summary>
        /// <param name="inputFile"> Input file</param>
        /// <param name="mean">Mean value</param>
        /// <param name="standardDeviation">Standard deviation</param>
        private void IdentifyLentghAnamolies(string filename,
                                             float mean = -1, float deviation = -1)
        {
            bool calculateMeanNdeviation = false;

            if (mean == -1 || deviation == -1)
            {
                calculateMeanNdeviation = true;
            }

            SequenceAlignmentMap alignmentMapobj = null;

            if (!SAMInput)
            {
                BAMParser bamParser = new BAMParser();
                alignmentMapobj = bamParser.Parse(filename);
            }
            else
            {
                SAMParser samParser = new SAMParser();
                alignmentMapobj = samParser.Parse(filename);
            }

            // get reads from sequence alignment map object.
            IList <PairedRead> pairedReads = null;

            if (calculateMeanNdeviation)
            {
                pairedReads = alignmentMapobj.GetPairedReads();
            }
            else
            {
                pairedReads = alignmentMapobj.GetPairedReads(mean, deviation);
            }

            // Get the orphan regions.
            var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan);


            if (orphans.Count() == 0)
            {
                Console.WriteLine("No Orphans to display");
            }

            List <ISequenceRange> orphanRegions = new List <ISequenceRange>(orphans.Count());

            foreach (PairedRead orphanRead in orphans)
            {
                orphanRegions.Add(GetRegion(orphanRead.Read1));
            }

            // Get sequence range grouping for Orphan regions.
            SequenceRangeGrouping orphanRangegroup = new SequenceRangeGrouping(orphanRegions);

            // Get the Length anomalies regions.
            var lengthAnomalies = pairedReads.Where(PE => PE.PairedType == PairedReadType.LengthAnomaly);

            if (lengthAnomalies.Count() == 0)
            {
                Console.WriteLine("No Anomalies to display");
            }

            List <ISequenceRange> lengthAnomalyRegions = new List <ISequenceRange>(lengthAnomalies.Count());

            foreach (PairedRead laRead in lengthAnomalies)
            {
                SequenceRange range = new SequenceRange();
                range.ID    = laRead.Read1.RName;
                range.Start = laRead.Read1.Pos;
                range.End   = laRead.Read1.Pos + laRead.InsertLength;
                lengthAnomalyRegions.Add(range);
            }

            // Get sequence range grouping for length anomaly regions.
            SequenceRangeGrouping lengthAnomalyRangegroup =
                new SequenceRangeGrouping(lengthAnomalyRegions);

            if (lengthAnomalyRangegroup.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Length anomalies reads to display");
            }
            else
            {
                Console.Write("Region of length anomaly:");
                DisplaySequenceRange(lengthAnomalyRangegroup);
            }

            if (orphanRangegroup.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Orphan reads to display");
            }
            else
            {
                Console.Write("\r\nRegion of Orphan reads:");
                DisplaySequenceRange(orphanRangegroup);
            }

            SequenceRangeGrouping intersectedRegions =
                lengthAnomalyRangegroup.Intersect(orphanRangegroup);

            if (intersectedRegions.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Hot spots found");
            }
            else
            {
                Console.Write("\r\nChromosomal Hot spot of length anomaly and Orphan region:");
                DisplaySequenceRange(intersectedRegions);
            }
        }
Пример #3
0
        /// <summary>
        ///  Initializes required parsers, formatters, input and output files based on user option.
        /// </summary>
        private void Initialize()
        {
            bamparser    = new BAMParser();
            bamformatter = new BAMFormatter();

            bamUncompressedOutStream = null;
            bamCompressedOutStream   = null;

            if (string.IsNullOrEmpty(OutputFilename))
            {
                writer = Console.Out;
            }
            else
            {
                if (UnCompressedBAM || BAMOutput)
                {
                    writer = null;

                    if (UnCompressedBAM)
                    {
                        bamUncompressedOutStream = new FileStream(OutputFilename, FileMode.Create, FileAccess.ReadWrite);
                    }
                    else
                    {
                        bamCompressedOutStream = new FileStream(OutputFilename, FileMode.Create, FileAccess.ReadWrite);
                    }
                }
                else
                {
                    writer = new StreamWriter(OutputFilename);
                }
            }

            #region Intialize temp files
            long inputfileSize    = (new FileInfo(InputFilePath)).Length;
            long unCompressedSize = inputfileSize;

            if (!SAMInput)
            {
                unCompressedSize = inputfileSize * 4; // as uncompressed bam file will be Aprox 4 times that of the compressed file.
            }

            long compressedSize = unCompressedSize / 4;

            // uncompressed file is required for both uncompressed and compressed outputs.
            if ((UnCompressedBAM || BAMOutput) && bamUncompressedOutStream == null)
            {
                if (HeaderOnly || (MemStreamLimit >= unCompressedSize))
                {
                    bamUncompressedOutStream = new MemoryStream();
                }
                else
                {
                    uncompressedTempfile     = Path.GetTempFileName();
                    bamUncompressedOutStream = new FileStream(uncompressedTempfile, FileMode.Open, FileAccess.ReadWrite);
                }
            }

            if (BAMOutput && !UnCompressedBAM && bamCompressedOutStream == null)
            {
                if (HeaderOnly || (MemStreamLimit >= compressedSize))
                {
                    bamCompressedOutStream = new MemoryStream((int)(inputfileSize));
                }
                else
                {
                    compressedTempfile     = Path.GetTempFileName();
                    bamCompressedOutStream = new FileStream(compressedTempfile, FileMode.Open, FileAccess.ReadWrite);
                }
            }
            #endregion Intialize temp files
        }
Пример #4
0
        /// <summary>
        /// Merge multiple sorted alignments.
        /// SAMUtil.exe out.bam in1.bam in2.bam
        /// </summary>
        public void DoMerge()
        {
            if (FilePaths == null)
            {
                throw new InvalidOperationException("FilePath");
            }

            if (FilePaths.Length < 3)
            {
                throw new InvalidOperationException(Resources.MergeHelp);
            }

            IList <IList <BAMSortedIndex> > sortedIndexes         = new List <IList <BAMSortedIndex> >();
            IList <SequenceAlignmentMap>    sequenceAlignmentMaps = new List <SequenceAlignmentMap>();
            IList <int> help = new List <int>();

            Parallel.For(1, FilePaths.Length, (int index) =>
            {
                IList <BAMSortedIndex> sortedIndex;
                BAMParser parser = new BAMParser();;
                SequenceAlignmentMap map;
                if (index == 1)
                {
                    try
                    {
                        map = parser.Parse(FilePaths[1]);
                    }
                    catch
                    {
                        throw new InvalidOperationException(Resources.InvalidBAMFile);
                    }

                    if (map == null)
                    {
                        throw new InvalidOperationException(Resources.EmptyFile);
                    }

                    if (string.IsNullOrEmpty(HeaderFile) && map.Header.RecordFields.Count == 0)
                    {
                        throw new InvalidOperationException(Resources.HeaderMissing);
                    }

                    if (!string.IsNullOrEmpty(HeaderFile))
                    {
                        SAMParser parse = new SAMParser();
                        SequenceAlignmentMap head;
                        try
                        {
                            head = parse.Parse(HeaderFile);
                        }
                        catch
                        {
                            throw new InvalidOperationException(Resources.IncorrectHeaderFile);
                        }

                        if (head == null)
                        {
                            throw new InvalidOperationException(Resources.EmptyFile);
                        }

                        header = head.Header;
                    }
                    else
                    {
                        header = map.Header;
                    }

                    sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates);
                }
                else
                {
                    try
                    {
                        map = parser.Parse(FilePaths[index]);
                    }
                    catch
                    {
                        throw new InvalidOperationException(Resources.InvalidBAMFile);
                    }

                    if (map == null)
                    {
                        throw new InvalidOperationException(Resources.EmptyFile);
                    }

                    sortedIndex = Sort(map, SortByReadName ? BAMSortByFields.ReadNames : BAMSortByFields.ChromosomeCoordinates);
                }

                lock (sortedIndexes)
                {
                    sortedIndexes.Add(sortedIndex);
                    sequenceAlignmentMaps.Add(map);
                }
            });

            string filePath = Path.GetTempFileName();

            using (FileStream fstemp = new FileStream(filePath, FileMode.Create, FileAccess.ReadWrite))
            {
                BAMFormatter formatter = new BAMFormatter();
                formatter.WriteHeader(header, fstemp);
                int[] indexes = new int[sortedIndexes.Count];

                if (SortByReadName)
                {
                    IList <BAMSortedIndex> sortedIndex = sortedIndexes.Select(a => a.First()).ToList();
                    WriteMergeFileSortedByReadName(sortedIndex, fstemp, formatter, sequenceAlignmentMaps);
                }
                else
                {
                    WriteMergeFile(sortedIndexes, fstemp, formatter, sequenceAlignmentMaps);
                }

                using (FileStream fsoutput = new FileStream(FilePaths[0], FileMode.Create, FileAccess.Write))
                {
                    fstemp.Seek(0, SeekOrigin.Begin);
                    formatter.CompressBAMFile(fstemp, fsoutput);
                }
            }

            File.Delete(filePath);
        }
        } = new Dictionary <string, int>();                                                          // key: mappingStrand + strandFromGene

        /// <summary>
        /// Given a BAM file, try to guess the RNA-Seq experiment:
        ///	1) single-end or pair-end
        ///	2) strand_specific or not
        ///	3) if it is strand-specific, what's the strand_ness of the protocol
        /// </summary>
        /// <param name="spritzDirectory"></param>
        /// <param name="bamPath"></param>
        /// <param name="geneModelPath"></param>
        /// <param name="minFractionStrandSpecific"></param>
        /// <returns></returns>
        private void CheckProperties(string bamPath, string geneModelPath, Genome genome, double minFractionStrandSpecific)
        {
            GeneModel gm = new GeneModel(genome, geneModelPath);

            using (var reader = File.OpenRead(bamPath))
            {
                Console.WriteLine("Reading BAM file.");

                // read bam, and filter out reads that are QC failures, unmapped, duplicates, or secondary
                BAMParser bam             = new BAMParser();
                var       unfilteredReads = bam.Parse(reader).ToList();
                var       reads           = unfilteredReads.Where(read =>
                                                                  !read.Flag.HasFlag(SAMFlags.QualityCheckFailure) && !read.Flag.HasFlag(SAMFlags.UnmappedQuery) &&
                                                                  !read.Flag.HasFlag(SAMFlags.Duplicate) && !read.Flag.HasFlag(SAMFlags.NonPrimeAlignment)).ToList();

                Console.WriteLine("Evaluating reads.");

                Parallel.ForEach(reads, read =>
                {
                    // set the interval contained by this read, and get the gene regions nearby
                    bool isReversed       = read.Flag.HasFlag(SAMFlags.QueryOnReverseStrand);
                    Interval readInterval = new Interval(null, read.RName, "source", isReversed ? "-" : "+", read.Pos, read.RefEndPos, null);
                    bool hasNearbyRegion  = gm.GenomeForest.Forest.TryGetValue(readInterval.ChromosomeID, out IntervalTree nearbyGeneTree);
                    if (hasNearbyRegion)
                    {
                        List <Interval> nearbyGeneRegions = nearbyGeneTree.Query(readInterval);
                        if (nearbyGeneRegions.Count > 0)
                        {
                            // count up paired-end or single-end read properties
                            string mapStrand = isReversed ? "-" : "+";
                            bool isPaired    = read.Flag.HasFlag(SAMFlags.PairedRead);
                            bool isRead1     = read.Flag.HasFlag(SAMFlags.FirstReadInPair);
                            bool isRead2     = read.Flag.HasFlag(SAMFlags.SecondReadInPair);
                            string readId    = isRead1 ? "1" : isRead2 ? "2" : null;
                            HashSet <string> strandFromGene = new HashSet <string>(nearbyGeneRegions.Select(x => x.Strand));
                            foreach (string strand in strandFromGene)
                            {
                                Dictionary <string, int> dict = isPaired ? PairedStrandedness : SingleStrandedness;
                                string key = isPaired ?
                                             readId + mapStrand + strand :
                                             mapStrand + strand;
                                lock (dict)
                                {
                                    if (dict.TryGetValue(key, out int count))
                                    {
                                        count++;
                                    }
                                    else
                                    {
                                        dict[key] = 1;
                                    }
                                }
                            }
                        }
                    }
                });

                // From RSeQC:
                //      Not strand specific:
                // This is PairEnd Data
                // Fraction of reads failed to determine: 0.0172
                // Fraction of reads explained by "1++,1--,2+-,2-+": 0.4903
                // Fraction of reads explained by "1+-,1-+,2++,2--": 0.4925
                //      Strand specific:
                // This is PairEnd Data
                // Fraction of reads failed to determine: 0.0072
                // Fraction of reads explained by "1++,1--,2+-,2-+": 0.9441
                // Fraction of reads explained by "1+-,1-+,2++,2--": 0.0487
                SingleStrandedness.TryGetValue("++", out int sForward1);
                SingleStrandedness.TryGetValue("--", out int sForward2);

                SingleStrandedness.TryGetValue("+-", out int sReverse1);
                SingleStrandedness.TryGetValue("-+", out int sReverse2);

                PairedStrandedness.TryGetValue("1++", out int pForward1);
                PairedStrandedness.TryGetValue("1--", out int pForward2);
                PairedStrandedness.TryGetValue("2+-", out int pForward3);
                PairedStrandedness.TryGetValue("2-+", out int pForward4);

                PairedStrandedness.TryGetValue("1+-", out int pReverse1);
                PairedStrandedness.TryGetValue("1-+", out int pReverse2);
                PairedStrandedness.TryGetValue("2++", out int pReverse3);
                PairedStrandedness.TryGetValue("2--", out int pReverse4);

                if (PairedStrandedness.Count > 0 && SingleStrandedness.Count == 0)
                {
                    Protocol = RnaSeqProtocol.PairedEnd;
                    FractionForwardStranded = (double)(pForward1 + pForward2 + pForward3 + pForward4) / (double)PairedStrandedness.Values.Sum();
                    FractionReverseStranded = (double)(pReverse1 + pReverse2 + pReverse3 + pReverse4) / (double)PairedStrandedness.Values.Sum();
                    FractionUndetermined    = 1 - FractionForwardStranded - FractionReverseStranded;
                    if (FractionUndetermined > 0.5)
                    {
                        throw new ArgumentException("A large number of reads failed to determine the standedness of the protocol within " + bamPath);
                    }
                    Strandedness = FractionForwardStranded >= minFractionStrandSpecific ? Strandedness.Forward :
                                   FractionReverseStranded >= minFractionStrandSpecific ? Strandedness.Reverse :
                                   Strandedness.None;
                }
                else if (SingleStrandedness.Count > 0 && PairedStrandedness.Count == 0)
                {
                    Protocol = RnaSeqProtocol.SingleEnd;
                    FractionForwardStranded = (double)(sForward1 + sForward2) / (double)SingleStrandedness.Values.Sum();
                    FractionReverseStranded = (double)(sReverse1 + sReverse2) / (double)SingleStrandedness.Values.Sum();
                    FractionUndetermined    = 1 - FractionForwardStranded - FractionReverseStranded;
                    if (FractionUndetermined > 0.5)
                    {
                        throw new ArgumentException("A large number of reads failed to determine the standedness of the protocol within " + bamPath);
                    }
                    Strandedness = FractionForwardStranded >= minFractionStrandSpecific ? Strandedness.Forward :
                                   FractionReverseStranded >= minFractionStrandSpecific ? Strandedness.Reverse :
                                   Strandedness.None;
                }
                else
                {
                    Protocol                = RnaSeqProtocol.Mixture;
                    Strandedness            = Strandedness.None;
                    FractionForwardStranded = (double)(sForward1 + sForward2 + pForward1 + pForward2 + pForward3 + pForward4) / (double)PairedStrandedness.Values.Sum();
                    FractionReverseStranded = (double)(sReverse1 + sReverse2 + pReverse1 + pReverse2 + pReverse3 + pReverse4) / (double)PairedStrandedness.Values.Sum();
                    FractionUndetermined    = 1 - FractionForwardStranded - FractionReverseStranded;
                    if (FractionUndetermined > 0.5)
                    {
                        throw new ArgumentException("A large number of reads failed to determine the standedness of the protocol within " + bamPath);
                    }
                    Strandedness = FractionForwardStranded >= minFractionStrandSpecific ? Strandedness.Forward :
                                   FractionReverseStranded >= minFractionStrandSpecific ? Strandedness.Reverse :
                                   Strandedness.None;
                }
            }
        }