Beispiel #1
0
        public virtual void MapReadToFeatureAndRemoveFromMap(List <FeatureLocation> allFeatures, Dictionary <string, Dictionary <char, List <SAMAlignedLocation> > > chrStrandReadMap)
        {
            var features = allFeatures.Where(l => Accept(l)).ToList();

            Progress.SetMessage("Mapping reads to {0} {1} entries.", features.Count, MapperName);
            if (features.Count > 0)
            {
                MapReadToFeature(features, chrStrandReadMap);

                var reads = SmallRNAUtils.GetMappedReads(features);
                Progress.SetMessage("There are {0} SAM entries mapped to {1} entries.", reads.Count, MapperName);

                SmallRNAUtils.RemoveReadsFromMap(chrStrandReadMap, reads);
            }
            else
            {
                Progress.SetMessage("There are 0 SAM entries mapped to {0} entries.", MapperName);
            }
        }
        public override IEnumerable <string> Process()
        {
            var result = new List <string>();

            //read regions
            var featureLocations = options.GetSequenceRegions();

            Progress.SetMessage("There are {0} coordinate entries", featureLocations.Count);
            if (featureLocations.Count == 0)
            {
                throw new Exception(string.Format("No coordinate found in file {1}", options.CoordinateFile));
            }

            var trnaLocations    = featureLocations.Where(l => l.Category.Equals(SmallRNAConsts.tRNA)).ToList();
            var mirnaLocations   = featureLocations.Where(l => l.Category.Equals(SmallRNAConsts.miRNA)).ToList();
            var notTrnaLocations = featureLocations.Where(l => !l.Category.Equals(SmallRNAConsts.tRNA)).ToList();

            var resultFilename = options.OutputFile;

            result.Add(resultFilename);

            Progress.SetMessage("Parsing tRNA alignment result ...");

            //Parsing reads
            List <QueryInfo> trnaQueries;
            var trnaReads = ParseCandidates(options.InputFiles, resultFilename, out trnaQueries);

            SmallRNAUtils.InitializeSmallRnaNTA(trnaReads);

            var hasNTA = trnaReads.Any(l => l.NTA.Length > 0);

            List <QueryInfo> otherrnaQueries;
            var otherRNAReads = ParseCandidates(options.OtherFile, resultFilename + ".other", out otherrnaQueries);

            SmallRNAUtils.InitializeSmallRnaNTA(otherRNAReads);

            var featureGroups = new List <FeatureItemGroup>();
            var mappedfile    = resultFilename + ".mapped.xml";

            if (File.Exists(mappedfile) && options.NotOverwrite)
            {
                Progress.SetMessage("Reading mapped feature items...");
                featureGroups = new FeatureItemGroupXmlFormat().ReadFromFile(mappedfile);
            }
            else
            {
                Progress.SetMessage("Mapping to tRNA...");

                //Draw tRNA mapping position graph
                Progress.SetMessage("Drawing tRNA position pictures...");
                var tRNAPositionFile = Path.ChangeExtension(options.OutputFile, SmallRNAConsts.tRNA + ".position");
                if (!options.NotOverwrite || !File.Exists(tRNAPositionFile))
                {
                    DrawPositionImage(trnaReads, trnaLocations, "tRNA", tRNAPositionFile);
                }

                //Map reads to tRNA
                MapReadToSequenceRegion(trnaLocations, trnaReads, hasNTA);

                var trnaMapped = trnaLocations.GroupByName();
                trnaMapped.RemoveAll(m => m.GetEstimatedCount() == 0);
                trnaMapped.ForEach(m => m.CombineLocations());

                var trnaGroups = trnaMapped.GroupByIdenticalQuery();
                if (trnaGroups.Count > 0)
                {
                    Progress.SetMessage("Writing tRNA count ...");
                    var trnaCountFile = Path.ChangeExtension(resultFilename, "." + SmallRNAConsts.tRNA + ".count");

                    OrderFeatureItemGroup(trnaGroups);
                    new FeatureItemGroupTIGRTCountWriter().WriteToFile(trnaCountFile, trnaGroups);
                    result.Add(trnaCountFile);

                    featureGroups.AddRange(trnaGroups);
                }

                //Get all queries mapped to tRNA
                var tRNAreads = new HashSet <string>(from read in SmallRNAUtils.GetMappedReads(trnaLocations)
                                                     select read.OriginalQname);

                //Remove all reads mapped to tRNA
                otherRNAReads.RemoveAll(m => tRNAreads.Contains(m.OriginalQname));

                //Draw miRNA mapping position graph
                Progress.SetMessage("Drawing miRNA position pictures...");
                var miRNAPositionFile = Path.ChangeExtension(options.OutputFile, SmallRNAConsts.miRNA + ".position");
                if (!options.NotOverwrite || !File.Exists(miRNAPositionFile))
                {
                    DrawPositionImage(otherRNAReads, mirnaLocations, "miRNA", miRNAPositionFile);
                }

                //Map reads to not tRNA
                MapReadToSequenceRegion(notTrnaLocations, otherRNAReads, hasNTA);

                var notTrnaMapped = notTrnaLocations.GroupByName();
                notTrnaMapped.RemoveAll(m => m.GetEstimatedCount() == 0);
                notTrnaMapped.ForEach(m => m.CombineLocations());

                var mirnaGroups = notTrnaMapped.Where(m => m.Name.StartsWith(SmallRNAConsts.miRNA)).GroupBySequence();
                if (mirnaGroups.Count > 0)
                {
                    Progress.SetMessage("writing miRNA count ...");
                    OrderFeatureItemGroup(mirnaGroups);

                    var mirnaCountFile = Path.ChangeExtension(resultFilename, "." + SmallRNAConsts.miRNA + ".count");
                    new SmallRNACountMicroRNAWriter(options.Offsets).WriteToFile(mirnaCountFile, mirnaGroups);
                    result.Add(mirnaCountFile);
                    featureGroups.AddRange(mirnaGroups);
                }

                var otherGroups = notTrnaMapped.Where(m => !m.Name.StartsWith(SmallRNAConsts.miRNA)).GroupByIdenticalQuery();
                if (otherGroups.Count > 0)
                {
                    Progress.SetMessage("writing other smallRNA count ...");
                    var otherCountFile = Path.ChangeExtension(resultFilename, ".other.count");

                    OrderFeatureItemGroup(otherGroups);
                    new FeatureItemGroupTIGRTCountWriter().WriteToFile(otherCountFile, otherGroups);
                    result.Add(otherCountFile);

                    featureGroups.AddRange(otherGroups);
                }

                Progress.SetMessage("writing all smallRNA count ...");
                new FeatureItemGroupTIGRTCountWriter().WriteToFile(resultFilename, featureGroups);
                result.Add(resultFilename);

                Progress.SetMessage("writing mapping details...");
                new FeatureItemGroupXmlFormat().WriteToFile(mappedfile, featureGroups);
                result.Add(mappedfile);
            }

            var readSummary = GetReadSummary(featureGroups, new HashSet <string>(), trnaReads.Union(otherRNAReads).ToList(), trnaQueries.Union(otherrnaQueries).ToList());

            var totalQueryCount  = (from q in trnaQueries.Union(otherrnaQueries) select q.Name.StringBefore(SmallRNAConsts.NTA_TAG)).Distinct().Sum(m => Counts.GetCount(m));
            var totalMappedCount = (from q in trnaReads select q.OriginalQname).Union(from q in otherRNAReads select q.OriginalQname).Distinct().Sum(m => Counts.GetCount(m));

            var infoFile = Path.ChangeExtension(resultFilename, ".info");

            WriteSummaryFile(infoFile, readSummary, featureGroups);
            result.Add(infoFile);

            Progress.End();

            return(result);
        }