public virtual void MapReadToFeatureAndRemoveFromMap(List <FeatureLocation> allFeatures, Dictionary <string, Dictionary <char, List <SAMAlignedLocation> > > chrStrandReadMap) { var features = allFeatures.Where(l => Accept(l)).ToList(); Progress.SetMessage("Mapping reads to {0} {1} entries.", features.Count, MapperName); if (features.Count > 0) { MapReadToFeature(features, chrStrandReadMap); var reads = SmallRNAUtils.GetMappedReads(features); Progress.SetMessage("There are {0} SAM entries mapped to {1} entries.", reads.Count, MapperName); SmallRNAUtils.RemoveReadsFromMap(chrStrandReadMap, reads); } else { Progress.SetMessage("There are 0 SAM entries mapped to {0} entries.", MapperName); } }
public override IEnumerable <string> Process() { var result = new List <string>(); //read regions var featureLocations = options.GetSequenceRegions(); Progress.SetMessage("There are {0} coordinate entries", featureLocations.Count); if (featureLocations.Count == 0) { throw new Exception(string.Format("No coordinate found in file {1}", options.CoordinateFile)); } var trnaLocations = featureLocations.Where(l => l.Category.Equals(SmallRNAConsts.tRNA)).ToList(); var mirnaLocations = featureLocations.Where(l => l.Category.Equals(SmallRNAConsts.miRNA)).ToList(); var notTrnaLocations = featureLocations.Where(l => !l.Category.Equals(SmallRNAConsts.tRNA)).ToList(); var resultFilename = options.OutputFile; result.Add(resultFilename); Progress.SetMessage("Parsing tRNA alignment result ..."); //Parsing reads List <QueryInfo> trnaQueries; var trnaReads = ParseCandidates(options.InputFiles, resultFilename, out trnaQueries); SmallRNAUtils.InitializeSmallRnaNTA(trnaReads); var hasNTA = trnaReads.Any(l => l.NTA.Length > 0); List <QueryInfo> otherrnaQueries; var otherRNAReads = ParseCandidates(options.OtherFile, resultFilename + ".other", out otherrnaQueries); SmallRNAUtils.InitializeSmallRnaNTA(otherRNAReads); var featureGroups = new List <FeatureItemGroup>(); var mappedfile = resultFilename + ".mapped.xml"; if (File.Exists(mappedfile) && options.NotOverwrite) { Progress.SetMessage("Reading mapped feature items..."); featureGroups = new FeatureItemGroupXmlFormat().ReadFromFile(mappedfile); } else { Progress.SetMessage("Mapping to tRNA..."); //Draw tRNA mapping position graph Progress.SetMessage("Drawing tRNA position pictures..."); var tRNAPositionFile = Path.ChangeExtension(options.OutputFile, SmallRNAConsts.tRNA + ".position"); if (!options.NotOverwrite || !File.Exists(tRNAPositionFile)) { DrawPositionImage(trnaReads, trnaLocations, "tRNA", tRNAPositionFile); } //Map reads to tRNA MapReadToSequenceRegion(trnaLocations, trnaReads, hasNTA); var trnaMapped = trnaLocations.GroupByName(); trnaMapped.RemoveAll(m => m.GetEstimatedCount() == 0); trnaMapped.ForEach(m => m.CombineLocations()); var trnaGroups = trnaMapped.GroupByIdenticalQuery(); if (trnaGroups.Count > 0) { Progress.SetMessage("Writing tRNA count ..."); var trnaCountFile = Path.ChangeExtension(resultFilename, "." + SmallRNAConsts.tRNA + ".count"); OrderFeatureItemGroup(trnaGroups); new FeatureItemGroupTIGRTCountWriter().WriteToFile(trnaCountFile, trnaGroups); result.Add(trnaCountFile); featureGroups.AddRange(trnaGroups); } //Get all queries mapped to tRNA var tRNAreads = new HashSet <string>(from read in SmallRNAUtils.GetMappedReads(trnaLocations) select read.OriginalQname); //Remove all reads mapped to tRNA otherRNAReads.RemoveAll(m => tRNAreads.Contains(m.OriginalQname)); //Draw miRNA mapping position graph Progress.SetMessage("Drawing miRNA position pictures..."); var miRNAPositionFile = Path.ChangeExtension(options.OutputFile, SmallRNAConsts.miRNA + ".position"); if (!options.NotOverwrite || !File.Exists(miRNAPositionFile)) { DrawPositionImage(otherRNAReads, mirnaLocations, "miRNA", miRNAPositionFile); } //Map reads to not tRNA MapReadToSequenceRegion(notTrnaLocations, otherRNAReads, hasNTA); var notTrnaMapped = notTrnaLocations.GroupByName(); notTrnaMapped.RemoveAll(m => m.GetEstimatedCount() == 0); notTrnaMapped.ForEach(m => m.CombineLocations()); var mirnaGroups = notTrnaMapped.Where(m => m.Name.StartsWith(SmallRNAConsts.miRNA)).GroupBySequence(); if (mirnaGroups.Count > 0) { Progress.SetMessage("writing miRNA count ..."); OrderFeatureItemGroup(mirnaGroups); var mirnaCountFile = Path.ChangeExtension(resultFilename, "." + SmallRNAConsts.miRNA + ".count"); new SmallRNACountMicroRNAWriter(options.Offsets).WriteToFile(mirnaCountFile, mirnaGroups); result.Add(mirnaCountFile); featureGroups.AddRange(mirnaGroups); } var otherGroups = notTrnaMapped.Where(m => !m.Name.StartsWith(SmallRNAConsts.miRNA)).GroupByIdenticalQuery(); if (otherGroups.Count > 0) { Progress.SetMessage("writing other smallRNA count ..."); var otherCountFile = Path.ChangeExtension(resultFilename, ".other.count"); OrderFeatureItemGroup(otherGroups); new FeatureItemGroupTIGRTCountWriter().WriteToFile(otherCountFile, otherGroups); result.Add(otherCountFile); featureGroups.AddRange(otherGroups); } Progress.SetMessage("writing all smallRNA count ..."); new FeatureItemGroupTIGRTCountWriter().WriteToFile(resultFilename, featureGroups); result.Add(resultFilename); Progress.SetMessage("writing mapping details..."); new FeatureItemGroupXmlFormat().WriteToFile(mappedfile, featureGroups); result.Add(mappedfile); } var readSummary = GetReadSummary(featureGroups, new HashSet <string>(), trnaReads.Union(otherRNAReads).ToList(), trnaQueries.Union(otherrnaQueries).ToList()); var totalQueryCount = (from q in trnaQueries.Union(otherrnaQueries) select q.Name.StringBefore(SmallRNAConsts.NTA_TAG)).Distinct().Sum(m => Counts.GetCount(m)); var totalMappedCount = (from q in trnaReads select q.OriginalQname).Union(from q in otherRNAReads select q.OriginalQname).Distinct().Sum(m => Counts.GetCount(m)); var infoFile = Path.ChangeExtension(resultFilename, ".info"); WriteSummaryFile(infoFile, readSummary, featureGroups); result.Add(infoFile); Progress.End(); return(result); }