public void TestXml() { var items = new MappedItemGroupXmlFileFormat().ReadFromFile("../../../data/mappedgroup.xml"); Assert.AreEqual(2, items.Count); var query = items.GetQueries(); Assert.AreEqual(1, query.Count); var sam = query[0]; Assert.AreEqual(20, sam.Locations.Count); Assert.AreEqual(2, sam.QueryCount); Assert.AreEqual(2, items[0].Count); var s1 = items[0][0]; var s2 = items[0][1]; Assert.AreEqual(1, s1.MappedRegions.Count); Assert.AreEqual(2, s2.MappedRegions.Count); Assert.AreEqual(2, items[0].QueryCount); Assert.AreEqual(1.5, items[0].GetEstimatedCount()); Assert.AreEqual(2, items[1].QueryCount); Assert.AreEqual(0.5, items[1].GetEstimatedCount()); }
public override IEnumerable<string> Process() { var format = new MappedItemGroupXmlFileFormat(); Progress.SetMessage("reading mapped reads from " + _options.CountFile + " ..."); var mapped = format.ReadFromFile(_options.CountFile); var sequenceLocusSet = new HashSet<string>(from item in mapped from mi in item from mr in mi.MappedRegions from al in mr.AlignedLocations select string.Format("{0}:{1}:{2}", al.Parent.Sequence, al.Seqname, al.Start)); Progress.SetMessage("There are {0} unique sequence:locus", sequenceLocusSet.Count); using (var sw = new StreamWriter(_options.OutputFile)) { using (var sr = SAMFactory.GetReader(_options.BamFile, false)) { sr.ReadHeaders().ForEach(m => sw.WriteLine(m)); int count = 0; int accepted = 0; string line; while ((line = sr.ReadLine()) != null) { if (count % 1000 == 0) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } } if (count % 100000 == 0 && count > 0) { Progress.SetMessage("{0} candidates from {1} reads", accepted, count); } count++; var parts = line.Split('\t'); var locus = string.Format("{0}:{1}:{2}", parts[SAMFormatConst.SEQ_INDEX], parts[SAMFormatConst.RNAME_INDEX], parts[SAMFormatConst.POS_INDEX]); if (!sequenceLocusSet.Contains(locus)) { continue; } sw.WriteLine(line); accepted++; } } } return new[] { _options.OutputFile }; }
private static void SaveItems(List<MappedItemGroup> items1, string outputFile, MappedItemGroupSequenceWriter writer, MappedItemGroupXmlFileFormat format, List<string> result) { items1.RemoveAll(m => m.QueryCount == 0); var xml1 = outputFile + ".xml"; if (items1.Any(m => m.Name.Contains(".tRNA"))) { items1.SortTRna(); } writer.WriteToFile(outputFile, items1); format.WriteToFile(xml1, items1); result.Add(outputFile); result.Add(xml1); }
public override IEnumerable<string> Process() { var result = new List<string>(); var samformat = _options.GetEngineFormat(); var format = new MappedItemGroupXmlFileFormat(); Progress.SetMessage("reading mapped reads from " + _options.InputFile1 + " ..."); var items1 = format.ReadFromFile(_options.InputFile1); Progress.SetMessage("reading mapped reads from " + _options.InputFile2 + " ..."); var items2 = format.ReadFromFile(_options.InputFile2); var reads1 = items1.GetQueries().ToDictionary(m => m.Qname); var reads2 = items2.GetQueries().ToDictionary(m => m.Qname); var qnames = reads1.Keys.Union(reads2.Keys).Distinct().ToList(); foreach (var qname in qnames) { if (!reads1.ContainsKey(qname) || !reads2.ContainsKey(qname)) continue; var r1 = reads1[qname]; var r2 = reads2[qname]; var res = samformat.CompareScore(r1.AlignmentScore, r2.AlignmentScore); if (res == 0) { items1.RemoveRead(qname); items2.RemoveRead(qname); } else if (res < 0) { items2.RemoveRead(qname); } else { items1.RemoveRead(qname); } } var writer = new MappedItemGroupSequenceWriter(); SaveItems(items1, _options.OutputFile1, writer, format, result); SaveItems(items2, _options.OutputFile2, writer, format, result); return result; }
public override IEnumerable <string> Process() { var entries = (from line in File.ReadAllLines(options.InputFile) let parts = line.Split('\t') where parts.Length >= 3 let mirna = parts.Length == 3 ? string.Empty : parts[3] select new { GroupName = parts[0], SampleName = parts[1], SmallRNAFile = parts[2], MiRNAFile = mirna }).ToList(); if (entries.All(m => !File.Exists(m.MiRNAFile))) { return(new SmallRNACategoryGroupPlusBuilder(options) { Progress = this.Progress }.Process()); } var groups = entries.GroupBy(m => m.GroupName).ToList(); var result = new List <string>(); foreach (var group in groups) { var catfile = Path.Combine(options.OutputDirectory, group.Key + ".catcount"); result.Add(catfile); using (var sw = new StreamWriter(catfile)) { sw.WriteLine("SampleName\tCategory\tLevel\tCount"); foreach (var entry in group) { Progress.SetMessage("Reading smallRNA mapped file " + entry.SmallRNAFile + " ..."); var others = new MappedItemGroupXmlFileFormat().ReadFromFile(entry.SmallRNAFile); var otherQueries = (from g in others from m in g from mr in m.MappedRegions from loc in mr.AlignedLocations select new QueryRecord(loc.Parent.Qname, m.Name.StringBefore(":"), m.Name.StringAfter(":").StringBefore(":"), m.Name.StringAfter(":").StringAfter(":"), loc.Parent.QueryCount)).ToGroupDictionary(m => m.Query); Progress.SetMessage("Reading smallRNA mapped file finished, {0} queries mapped.", otherQueries.Count); //2570-KCV-01-19.bam.count.mapped.xml => 2570-KCV-01-19.bam.info var infofile = Path.Combine(Path.GetDirectoryName(entry.SmallRNAFile), Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(entry.SmallRNAFile))) + ".info"); if (File.Exists(entry.MiRNAFile)) { infofile = Path.Combine(Path.GetDirectoryName(entry.MiRNAFile), Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(entry.MiRNAFile))) + ".info"); Progress.SetMessage("Reading miRNA mapped file " + entry.MiRNAFile + " ..."); var mirnas = new MappedMirnaGroupXmlFileFormat().ReadFromFile(entry.MiRNAFile); var mirnaQueries = (from g in mirnas from m in g from mr in m.MappedRegions from mapped in mr.Mapped.Values from loc in mapped.AlignedLocations select new QueryRecord(loc.Parent.Qname.StringBefore(":CLIP_"), "miRNA", "miRNA", m.Name, loc.Parent.QueryCount)).ToGroupDictionary(m => m.Query); Progress.SetMessage("Reading miRNA mapped file finished, {0} queries mapped.", mirnaQueries.Count); foreach (var q in mirnaQueries) { List <QueryRecord> rec; if (!otherQueries.TryGetValue(q.Key, out rec)) { rec = q.Value; otherQueries[q.Key] = q.Value; } else { rec.AddRange(q.Value); } } Progress.SetMessage("Total {0} queries mapped.", otherQueries.Count); } var counts = new List <CategoryCount>(); FillCounts(counts, options.Categories, otherQueries); var othercategories = (from v in otherQueries.Values from item in v select item.Biotype).Distinct().OrderBy(m => m).ToList(); FillCounts(counts, othercategories, otherQueries); if (File.Exists(infofile)) { var lines = File.ReadAllLines(infofile); Progress.SetMessage("reading mapping information from " + infofile + " ..."); int totalReads = 0; int mappedReads = 0; foreach (var line in lines) { if (line.StartsWith("TotalReads")) { totalReads = int.Parse(line.StringAfter("\t")); } else if (line.StartsWith("MappedReads")) { mappedReads = int.Parse(line.StringAfter("\t")); } } var smallRNAReads = counts.Sum(m => m.Count); sw.WriteLine("{0}\tTotal Reads\t0\t{1}", entry.SampleName, totalReads); sw.WriteLine("{0}\tMapped Reads\t0\t{1}", entry.SampleName, mappedReads); sw.WriteLine("{0}\tsmall RNA\t0\t{1}", entry.SampleName, smallRNAReads); sw.WriteLine("{0}\tUnmapped\t1\t{1}", entry.SampleName, totalReads - mappedReads); sw.WriteLine("{0}\tOther Mapped\t1\t{1}", entry.SampleName, mappedReads - smallRNAReads); sw.WriteLine("{0}\tsmall RNA\t1\t{1}", entry.SampleName, smallRNAReads); } foreach (var rec in counts) { sw.WriteLine("{0}\t{1}\t{2}\t{3}", entry.SampleName, rec.Biotype, 2, rec.Count); } } } var data = (from line in File.ReadAllLines(catfile).Skip(1) where !string.IsNullOrWhiteSpace(line) let parts = line.Split('\t') let level = double.Parse(parts[2]) where !(parts[1].Equals("small RNA") && level == 1) select new { SampleName = parts[0], Category = parts[1], Level = level, Count = int.Parse(parts[3]) }).ToList(); var tablefile = catfile + ".tsv"; result.Add(tablefile); using (var sw = new StreamWriter(tablefile)) { var samples = (from d in data select d.SampleName).Distinct().OrderBy(m => m).ToList(); sw.WriteLine("Category\t{0}", samples.Merge("\t")); var categories = (from d in data where d.Level == 2 select d.Category).Distinct().OrderBy(m => m).ToList(); categories.Insert(0, "small RNA"); categories.Insert(0, "Other Mapped"); categories.Insert(0, "Unmapped"); categories.Insert(0, "Mapped Reads"); categories.Insert(0, "Total Reads"); Console.WriteLine(categories.Merge("\n")); var map = data.ToDoubleDictionary(m => m.SampleName, m => m.Category); foreach (var cat in categories) { sw.WriteLine("{0}\t{1}", cat, (from sample in samples let dic = map[sample] select dic.ContainsKey(cat) ? dic[cat].Count.ToString() : "").Merge("\t")); } } var rfile = new FileInfo(FileUtils.GetTemplateDir() + "/smallrna_category_group.r").FullName; if (File.Exists(rfile)) { var targetrfile = catfile + ".r"; using (var sw = new StreamWriter(targetrfile)) { sw.WriteLine("catfile<-\"{0}\"", catfile); sw.WriteLine("outputdir<-\"{0}\"", options.OutputDirectory); sw.WriteLine("ispdf<-{0}", options.PdfGraph ? "1" : "0"); string line = File.ReadAllText(rfile); using (var sr = new StreamReader(rfile)) { if (line.Contains("#predefine_end")) { while ((line = sr.ReadLine()) != null) { if (line.Contains("#predefine_end")) { break; } } } while ((line = sr.ReadLine()) != null) { sw.WriteLine(line); } } } SystemUtils.Execute("R", "--vanilla --slave -f \"" + targetrfile + "\""); } } return(result); }
public override IEnumerable<string> Process() { var format = new MappedItemGroupXmlFileFormat(); using (StreamWriter sw = new StreamWriter(options.OutputFile)) { sw.WriteLine("File\tFeature\tStrand\tCount\tPosition\tPercentage"); foreach (var file in options.GetCountFiles()) { var xmlfile = file.File.EndsWith(".xml") ? file.File : file.File + ".mapped.xml"; var count = format.ReadFromFile(xmlfile).OrderByDescending(m => m.GetEstimatedCount()).ToList(); foreach (var group in count) { var item = group[0]; Dictionary<long, double> positionCount = new Dictionary<long, double>(); foreach (var region in item.MappedRegions) { foreach (var loc in region.AlignedLocations) { for (long p = loc.Start; p <= loc.End; p++) { var offset = region.Region.Strand == '+' ? p - region.Region.Start + 1 : region.Region.End - p + 1; double v; if (!positionCount.TryGetValue(offset, out v)) { v = 0; } positionCount[offset] = v + loc.Parent.GetEstimatedCount(); } } } var allcount = item.GetEstimatedCount(); var keys = positionCount.Keys.ToList(); keys.Sort(); foreach (var key in keys) { sw.WriteLine("{0}\t{1}\t{2}\t{3:0.##}\t{4}\t{5:0.00}", file.Name, item.Name, item.MappedRegions.First().Region.Strand, item.GetEstimatedCount(), key, positionCount[key] / allcount); } } } } var rfile = new FileInfo(FileUtils.GetTemplateDir() + "/smallrna_position.r").FullName; if (File.Exists(rfile)) { var targetr = Path.ChangeExtension(options.OutputFile, ".r").Replace("\\", "/"); var content = File.ReadAllText(rfile).Replace("$$workspace", Path.GetDirectoryName(Path.GetFullPath(options.OutputFile)).Replace("\\", "/")) .Replace("$$positionfile", Path.GetFileName(options.OutputFile).Replace("\\", "/")); File.WriteAllText(targetr, content); if (File.Exists(targetr)) { SystemUtils.Execute("R", "--vanilla -f " + targetr); } } return new string[] { Path.GetFullPath(options.OutputFile) }; }
public override IEnumerable <string> Process() { var result = new List <string>(); //read regions var featureLocations = options.GetSequenceRegions(); Progress.SetMessage("There are {0} coordinate entries", featureLocations.Count); if (featureLocations.Count == 0) { throw new Exception(string.Format("No coordinate found in file {0}", options.CoordinateFile)); } var fGroups = featureLocations.GroupBy(l => l.Category).OrderByDescending(l => l.Count()).ToList(); foreach (var fg in fGroups) { Console.WriteLine("{0} = {1}", fg.Key, fg.Count()); } var featureChroms = new HashSet <string>(from feature in featureLocations select feature.Seqname); var resultFilename = options.OutputFile; result.Add(resultFilename); HashSet <string> cca = new HashSet <string>(); if (File.Exists(options.CCAFile)) { cca = new HashSet <string>(File.ReadAllLines(options.CCAFile)); } //parsing reads List <QueryInfo> totalQueries; var reads = ParseCandidates(options.InputFiles, resultFilename, out totalQueries); if (reads.Count == 0) { throw new ArgumentException("No read found in file " + options.InputFiles.Merge(",")); } HashSet <string> excludeQueries = new HashSet <string>(); if (!string.IsNullOrEmpty(options.ExcludeXml)) { Progress.SetMessage("Excluding queries in {0} ...", options.ExcludeXml); excludeQueries = new HashSet <string>(from q in MappedItemGroupXmlFileFormat.ReadQueries(options.ExcludeXml) select q.StringBefore(SmallRNAConsts.NTA_TAG)); reads.RemoveAll(m => excludeQueries.Contains(m.Locations.First().Parent.Qname.StringBefore(SmallRNAConsts.NTA_TAG))); Progress.SetMessage("Total candidate {0} for mapping ...", reads.Count); } var hasMicroRnaNTA = reads.Any(l => l.NTA.Length > 0); var hasTrnaNTA = hasMicroRnaNTA || File.Exists(options.CCAFile); if (!options.NoCategory) { //First of all, draw candidate mapping position graph var miRNAPositionFile = Path.ChangeExtension(options.OutputFile, SmallRNAConsts.miRNA + ".candidates.position"); if (!options.NotOverwrite || !File.Exists(miRNAPositionFile)) { Progress.SetMessage("Drawing microRNA candidates position pictures..."); var notNTAreads = hasMicroRnaNTA ? reads.Where(m => m.NTA.Length == 0).ToList() : reads; DrawPositionImage(notNTAreads, featureLocations.Where(m => m.Category.Equals(SmallRNAConsts.miRNA)).ToList(), SmallRNABiotype.miRNA.ToString(), miRNAPositionFile); } } var featureGroups = new List <FeatureItemGroup>(); var mappedfile = resultFilename + ".mapped.xml"; if (File.Exists(mappedfile) && options.NotOverwrite) { Progress.SetMessage("Reading mapped feature items..."); featureGroups = new FeatureItemGroupXmlFormat().ReadFromFile(mappedfile); } else { Progress.SetMessage("Mapping feature items..."); //mapping reads to features based on miRNA, tRNA, mt_tRNA and other smallRNA priority MapReadToSequenceRegion(featureLocations, reads, cca, hasMicroRnaNTA, hasTrnaNTA); var featureMapped = featureLocations.GroupByName(); featureMapped.RemoveAll(m => m.GetEstimatedCount() == 0); featureMapped.ForEach(m => m.CombineLocations()); if (options.NoCategory) { featureGroups = featureMapped.GroupByIdenticalQuery(); } else { var mirnaGroups = featureMapped.Where(m => m.Name.StartsWith(SmallRNAConsts.miRNA)).GroupBySequence(); if (mirnaGroups.Count > 0) { OrderFeatureItemGroup(mirnaGroups); Progress.SetMessage("writing miRNA count ..."); var mirnaCountFile = Path.ChangeExtension(resultFilename, "." + SmallRNAConsts.miRNA + ".count"); new SmallRNACountMicroRNAWriter(options.Offsets).WriteToFile(mirnaCountFile, mirnaGroups); result.Add(mirnaCountFile); featureGroups.AddRange(mirnaGroups); var positionFile = Path.ChangeExtension(options.OutputFile, SmallRNAConsts.miRNA + ".position"); SmallRNAMappedPositionBuilder.Build(mirnaGroups, Path.GetFileNameWithoutExtension(options.OutputFile), positionFile, m => m[0].Name.StringAfter(":")); } mirnaGroups.Clear(); var trnaCodeGroups = featureMapped.Where(m => m.Name.StartsWith(SmallRNAConsts.tRNA)).GroupByFunction(SmallRNAUtils.GetTrnaAnticodon, false); if (trnaCodeGroups.Count > 0) { OrderFeatureItemGroup(trnaCodeGroups); Progress.SetMessage("writing tRNA code count ..."); var trnaCodeCountFile = Path.ChangeExtension(resultFilename, "." + SmallRNAConsts.tRNA + ".count"); new FeatureItemGroupCountWriter(m => m.DisplayNameWithoutCategory).WriteToFile(trnaCodeCountFile, trnaCodeGroups); result.Add(trnaCodeCountFile); featureGroups.AddRange(trnaCodeGroups); var positionFile = Path.ChangeExtension(options.OutputFile, SmallRNAConsts.tRNA + ".position"); SmallRNAMappedPositionBuilder.Build(trnaCodeGroups, Path.GetFileName(options.OutputFile), positionFile, m => m[0].Name.StringAfter(":")); } trnaCodeGroups.Clear(); var otherFeatures = featureMapped.Where(m => !m.Name.StartsWith(SmallRNAConsts.miRNA) && !m.Name.StartsWith(SmallRNAConsts.tRNA)).ToList(); var exportBiotypes = SmallRNAUtils.GetOutputBiotypes(options); foreach (var biotype in exportBiotypes) { WriteGroups(result, resultFilename, featureGroups, otherFeatures, biotype); } var leftFeatures = otherFeatures.Where(l => !exportBiotypes.Any(b => l.Name.StartsWith(b))).ToList(); WriteGroups(result, resultFilename, featureGroups, leftFeatures, null); } Progress.SetMessage("writing all smallRNA count ..."); new FeatureItemGroupCountWriter().WriteToFile(resultFilename, featureGroups); result.Add(resultFilename); Progress.SetMessage("writing mapping details..."); new FeatureItemGroupXmlFormatHand().WriteToFile(mappedfile, featureGroups); } var readSummary = GetReadSummary(featureGroups, excludeQueries, reads, totalQueries); WriteInfoFile(result, resultFilename, readSummary, featureGroups); result.Add(mappedfile); Progress.End(); return(result); }
public override IEnumerable<string> Process() { var entries = (from line in File.ReadAllLines(options.InputFile) let parts = line.Split('\t') where parts.Length >= 3 let mirna = parts.Length == 3 ? string.Empty : parts[3] select new { GroupName = parts[0], SampleName = parts[1], SmallRNAFile = parts[2], MiRNAFile = mirna }).ToList(); if (entries.All(m => !File.Exists(m.MiRNAFile))) { return new SmallRNACategoryGroupPlusBuilder(options) { Progress = this.Progress }.Process(); } var groups = entries.GroupBy(m => m.GroupName).ToList(); var result = new List<string>(); foreach (var group in groups) { var catfile = Path.Combine(options.OutputDirectory, group.Key + ".catcount"); result.Add(catfile); using (var sw = new StreamWriter(catfile)) { sw.WriteLine("SampleName\tCategory\tLevel\tCount"); foreach (var entry in group) { Progress.SetMessage("Reading smallRNA mapped file " + entry.SmallRNAFile + " ..."); var others = new MappedItemGroupXmlFileFormat().ReadFromFile(entry.SmallRNAFile); var otherQueries = (from g in others from m in g from mr in m.MappedRegions from loc in mr.AlignedLocations select new QueryRecord(loc.Parent.Qname, m.Name.StringBefore(":"), m.Name.StringAfter(":").StringBefore(":"), m.Name.StringAfter(":").StringAfter(":"), loc.Parent.QueryCount)).ToGroupDictionary(m => m.Query); Progress.SetMessage("Reading smallRNA mapped file finished, {0} queries mapped.", otherQueries.Count); //2570-KCV-01-19.bam.count.mapped.xml => 2570-KCV-01-19.bam.info var infofile = Path.Combine(Path.GetDirectoryName(entry.SmallRNAFile), Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(entry.SmallRNAFile))) + ".info"); if (File.Exists(entry.MiRNAFile)) { infofile = Path.Combine(Path.GetDirectoryName(entry.MiRNAFile), Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(entry.MiRNAFile))) + ".info"); Progress.SetMessage("Reading miRNA mapped file " + entry.MiRNAFile + " ..."); var mirnas = new MappedMirnaGroupXmlFileFormat().ReadFromFile(entry.MiRNAFile); var mirnaQueries = (from g in mirnas from m in g from mr in m.MappedRegions from mapped in mr.Mapped.Values from loc in mapped.AlignedLocations select new QueryRecord(loc.Parent.Qname.StringBefore(":CLIP_"), "miRNA", "miRNA", m.Name, loc.Parent.QueryCount)).ToGroupDictionary(m => m.Query); Progress.SetMessage("Reading miRNA mapped file finished, {0} queries mapped.", mirnaQueries.Count); foreach (var q in mirnaQueries) { List<QueryRecord> rec; if (!otherQueries.TryGetValue(q.Key, out rec)) { rec = q.Value; otherQueries[q.Key] = q.Value; } else { rec.AddRange(q.Value); } } Progress.SetMessage("Total {0} queries mapped.", otherQueries.Count); } var counts = new List<CategoryCount>(); FillCounts(counts, options.Categories, otherQueries); var othercategories = (from v in otherQueries.Values from item in v select item.Biotype).Distinct().OrderBy(m => m).ToList(); FillCounts(counts, othercategories, otherQueries); if (File.Exists(infofile)) { var lines = File.ReadAllLines(infofile); Progress.SetMessage("reading mapping information from " + infofile + " ..."); int totalReads = 0; int mappedReads = 0; foreach (var line in lines) { if (line.StartsWith("TotalReads")) { totalReads = int.Parse(line.StringAfter("\t")); } else if (line.StartsWith("MappedReads")) { mappedReads = int.Parse(line.StringAfter("\t")); } } var smallRNAReads = counts.Sum(m => m.Count); sw.WriteLine("{0}\tTotal Reads\t0\t{1}", entry.SampleName, totalReads); sw.WriteLine("{0}\tMapped Reads\t0\t{1}", entry.SampleName, mappedReads); sw.WriteLine("{0}\tsmall RNA\t0\t{1}", entry.SampleName, smallRNAReads); sw.WriteLine("{0}\tUnmapped\t1\t{1}", entry.SampleName, totalReads - mappedReads); sw.WriteLine("{0}\tOther Mapped\t1\t{1}", entry.SampleName, mappedReads - smallRNAReads); sw.WriteLine("{0}\tsmall RNA\t1\t{1}", entry.SampleName, smallRNAReads); } foreach (var rec in counts) { sw.WriteLine("{0}\t{1}\t{2}\t{3}", entry.SampleName, rec.Biotype, 2, rec.Count); } } } var data = (from line in File.ReadAllLines(catfile).Skip(1) where !string.IsNullOrWhiteSpace(line) let parts = line.Split('\t') let level = double.Parse(parts[2]) where !(parts[1].Equals("small RNA") && level == 1) select new { SampleName = parts[0], Category = parts[1], Level = level, Count = int.Parse(parts[3]) }).ToList(); var tablefile = catfile + ".tsv"; result.Add(tablefile); using (var sw = new StreamWriter(tablefile)) { var samples = (from d in data select d.SampleName).Distinct().OrderBy(m => m).ToList(); sw.WriteLine("Category\t{0}", samples.Merge("\t")); var categories = (from d in data where d.Level == 2 select d.Category).Distinct().OrderBy(m => m).ToList(); categories.Insert(0, "small RNA"); categories.Insert(0, "Other Mapped"); categories.Insert(0, "Unmapped"); categories.Insert(0, "Mapped Reads"); categories.Insert(0, "Total Reads"); Console.WriteLine(categories.Merge("\n")); var map = data.ToDoubleDictionary(m => m.SampleName, m => m.Category); foreach (var cat in categories) { sw.WriteLine("{0}\t{1}", cat, (from sample in samples let dic = map[sample] select dic.ContainsKey(cat) ? dic[cat].Count.ToString() : "").Merge("\t")); } } var rfile = new FileInfo(FileUtils.GetTemplateDir() + "/smallrna_category_group.r").FullName; if (File.Exists(rfile)) { var targetrfile = catfile + ".r"; using (var sw = new StreamWriter(targetrfile)) { sw.WriteLine("catfile<-\"{0}\"", catfile); sw.WriteLine("outputdir<-\"{0}\"", options.OutputDirectory); sw.WriteLine("ispdf<-{0}", options.PdfGraph ? "1" : "0"); string line = File.ReadAllText(rfile); using (var sr = new StreamReader(rfile)) { if (line.Contains("#predefine_end")) { while ((line = sr.ReadLine()) != null) { if (line.Contains("#predefine_end")) { break; } } } while ((line = sr.ReadLine()) != null) { sw.WriteLine(line); } } } SystemUtils.Execute("R", "--vanilla --slave -f \"" + targetrfile + "\""); } } return result; }
public void Test() { var items = new MappedItemGroupXmlFileFormat().ReadFromFile("../../../data/2570-KCV-01-19.bam.count.mapped.trna.xml"); Assert.AreEqual(310, items.Count); }