public void DrawPositionImage(List <SAMAlignedItem> notNTAreads, List <FeatureLocation> features, string category, string positionFile, bool positionByPercentage = false, int minimumReadCount = 5) { if (features.Count > 0) { Progress.SetMessage("mapping reads overlapped to {0} {1} entries.", features.Count, category); //map reads to miRNA without considering NTA and offset. var map = BuildStrandMap(notNTAreads); new SmallRNAMapper(category, options, m => true).MapReadToFeature(features, map); SmallRNAMappedPositionBuilder.Build(features.GroupByName().GroupBySequence(), Path.GetFileNameWithoutExtension(options.OutputFile), positionFile, m => m[0].Name.StringAfter(":"), positionByPercentage, minimumReadCount); //clear all mapping information foreach (var read in notNTAreads) { foreach (var loc in read.Locations) { loc.Features.Clear(); } } foreach (var feature in features) { feature.SamLocations.Clear(); } } }
private void WriteGroups(List <string> result, string resultFilename, List <FeatureItemGroup> allmapped, List <FeatureItem> otherFeatures, string biotype) { List <FeatureItemGroup> groups; FeatureItemGroupCountWriter writer; string displayBiotype; if (string.IsNullOrEmpty(biotype)) { displayBiotype = "other"; groups = otherFeatures.GroupByIdenticalQuery(); writer = new FeatureItemGroupCountWriter(m => m.DisplayName); } else { displayBiotype = biotype; groups = otherFeatures.Where(m => m.Name.StartsWith(biotype)).GroupByIdenticalQuery(); writer = new FeatureItemGroupCountWriter(m => m.DisplayNameWithoutCategory); } if (groups.Count > 0) { OrderFeatureItemGroup(groups); Progress.SetMessage("writing {0} count ...", displayBiotype); var file = Path.ChangeExtension(resultFilename, "." + displayBiotype + ".count"); writer.WriteToFile(file, groups); result.Add(file); allmapped.AddRange(groups); //if (!string.IsNullOrEmpty(biotype)) //{ var positionFile = Path.ChangeExtension(options.OutputFile, displayBiotype + ".position"); SmallRNAMappedPositionBuilder.Build(groups, Path.GetFileNameWithoutExtension(options.OutputFile), positionFile, m => m[0].Name.StringAfter(":"), true); var absolutePositionFile = Path.ChangeExtension(options.OutputFile, displayBiotype + ".absolutePosition"); SmallRNAMappedPositionBuilder.Build(groups, Path.GetFileNameWithoutExtension(options.OutputFile), absolutePositionFile, m => m[0].Name.StringAfter(":"), false); //} } groups.Clear(); }
public override IEnumerable <string> Process() { var result = new List <string>(); //read regions var featureLocations = options.GetSequenceRegions(); Progress.SetMessage("There are {0} coordinate entries", featureLocations.Count); if (featureLocations.Count == 0) { throw new Exception(string.Format("No coordinate found in file {0}", options.CoordinateFile)); } var fGroups = featureLocations.GroupBy(l => l.Category).OrderByDescending(l => l.Count()).ToList(); foreach (var fg in fGroups) { Console.WriteLine("{0} = {1}", fg.Key, fg.Count()); } var featureChroms = new HashSet <string>(from feature in featureLocations select feature.Seqname); var resultFilename = options.OutputFile; result.Add(resultFilename); HashSet <string> cca = new HashSet <string>(); if (File.Exists(options.CCAFile)) { cca = new HashSet <string>(File.ReadAllLines(options.CCAFile)); } //parsing reads List <QueryInfo> totalQueries; var reads = ParseCandidates(options.InputFiles, resultFilename, out totalQueries); if (reads.Count == 0) { throw new ArgumentException("No read found in file " + options.InputFiles.Merge(",")); } HashSet <string> excludeQueries = new HashSet <string>(); if (!string.IsNullOrEmpty(options.ExcludeXml)) { Progress.SetMessage("Excluding queries in {0} ...", options.ExcludeXml); excludeQueries = new HashSet <string>(from q in MappedItemGroupXmlFileFormat.ReadQueries(options.ExcludeXml) select q.StringBefore(SmallRNAConsts.NTA_TAG)); reads.RemoveAll(m => excludeQueries.Contains(m.Locations.First().Parent.Qname.StringBefore(SmallRNAConsts.NTA_TAG))); Progress.SetMessage("Total candidate {0} for mapping ...", reads.Count); } var hasMicroRnaNTA = reads.Any(l => l.NTA.Length > 0); var hasTrnaNTA = hasMicroRnaNTA || File.Exists(options.CCAFile); if (!options.NoCategory) { //First of all, draw candidate mapping position graph var miRNAPositionFile = Path.ChangeExtension(options.OutputFile, SmallRNAConsts.miRNA + ".candidates.position"); if (!options.NotOverwrite || !File.Exists(miRNAPositionFile)) { Progress.SetMessage("Drawing microRNA candidates position pictures..."); var notNTAreads = hasMicroRnaNTA ? reads.Where(m => m.NTA.Length == 0).ToList() : reads; DrawPositionImage(notNTAreads, featureLocations.Where(m => m.Category.Equals(SmallRNAConsts.miRNA)).ToList(), SmallRNABiotype.miRNA.ToString(), miRNAPositionFile); } } var featureGroups = new List <FeatureItemGroup>(); var mappedfile = resultFilename + ".mapped.xml"; if (File.Exists(mappedfile) && options.NotOverwrite) { Progress.SetMessage("Reading mapped feature items..."); featureGroups = new FeatureItemGroupXmlFormat().ReadFromFile(mappedfile); } else { Progress.SetMessage("Mapping feature items..."); //mapping reads to features based on miRNA, tRNA, mt_tRNA and other smallRNA priority MapReadToSequenceRegion(featureLocations, reads, cca, hasMicroRnaNTA, hasTrnaNTA); var featureMapped = featureLocations.GroupByName(); featureMapped.RemoveAll(m => m.GetEstimatedCount() == 0); featureMapped.ForEach(m => m.CombineLocations()); if (options.NoCategory) { featureGroups = featureMapped.GroupByIdenticalQuery(); } else { var mirnaGroups = featureMapped.Where(m => m.Name.StartsWith(SmallRNAConsts.miRNA)).GroupBySequence(); if (mirnaGroups.Count > 0) { OrderFeatureItemGroup(mirnaGroups); Progress.SetMessage("writing miRNA count ..."); var mirnaCountFile = Path.ChangeExtension(resultFilename, "." + SmallRNAConsts.miRNA + ".count"); new SmallRNACountMicroRNAWriter(options.Offsets).WriteToFile(mirnaCountFile, mirnaGroups); result.Add(mirnaCountFile); featureGroups.AddRange(mirnaGroups); var positionFile = Path.ChangeExtension(options.OutputFile, SmallRNAConsts.miRNA + ".position"); SmallRNAMappedPositionBuilder.Build(mirnaGroups, Path.GetFileNameWithoutExtension(options.OutputFile), positionFile, m => m[0].Name.StringAfter(":")); } mirnaGroups.Clear(); var trnaCodeGroups = featureMapped.Where(m => m.Name.StartsWith(SmallRNAConsts.tRNA)).GroupByFunction(SmallRNAUtils.GetTrnaAnticodon, false); if (trnaCodeGroups.Count > 0) { OrderFeatureItemGroup(trnaCodeGroups); Progress.SetMessage("writing tRNA code count ..."); var trnaCodeCountFile = Path.ChangeExtension(resultFilename, "." + SmallRNAConsts.tRNA + ".count"); new FeatureItemGroupCountWriter(m => m.DisplayNameWithoutCategory).WriteToFile(trnaCodeCountFile, trnaCodeGroups); result.Add(trnaCodeCountFile); featureGroups.AddRange(trnaCodeGroups); var positionFile = Path.ChangeExtension(options.OutputFile, SmallRNAConsts.tRNA + ".position"); SmallRNAMappedPositionBuilder.Build(trnaCodeGroups, Path.GetFileName(options.OutputFile), positionFile, m => m[0].Name.StringAfter(":")); } trnaCodeGroups.Clear(); var otherFeatures = featureMapped.Where(m => !m.Name.StartsWith(SmallRNAConsts.miRNA) && !m.Name.StartsWith(SmallRNAConsts.tRNA)).ToList(); var exportBiotypes = SmallRNAUtils.GetOutputBiotypes(options); foreach (var biotype in exportBiotypes) { WriteGroups(result, resultFilename, featureGroups, otherFeatures, biotype); } var leftFeatures = otherFeatures.Where(l => !exportBiotypes.Any(b => l.Name.StartsWith(b))).ToList(); WriteGroups(result, resultFilename, featureGroups, leftFeatures, null); } Progress.SetMessage("writing all smallRNA count ..."); new FeatureItemGroupCountWriter().WriteToFile(resultFilename, featureGroups); result.Add(resultFilename); Progress.SetMessage("writing mapping details..."); new FeatureItemGroupXmlFormatHand().WriteToFile(mappedfile, featureGroups); } var readSummary = GetReadSummary(featureGroups, excludeQueries, reads, totalQueries); WriteInfoFile(result, resultFilename, readSummary, featureGroups); result.Add(mappedfile); Progress.End(); return(result); }