public void TestXml()
    {
      var items = new MappedItemGroupXmlFileFormat().ReadFromFile("../../../data/mappedgroup.xml");
      Assert.AreEqual(2, items.Count);

      var query = items.GetQueries();
      Assert.AreEqual(1, query.Count);

      var sam = query[0];
      Assert.AreEqual(20, sam.Locations.Count);
      Assert.AreEqual(2, sam.QueryCount);

      Assert.AreEqual(2, items[0].Count);
      var s1 = items[0][0];
      var s2 = items[0][1];

      Assert.AreEqual(1, s1.MappedRegions.Count);
      Assert.AreEqual(2, s2.MappedRegions.Count);

      Assert.AreEqual(2, items[0].QueryCount);

      Assert.AreEqual(1.5, items[0].GetEstimatedCount());

      Assert.AreEqual(2, items[1].QueryCount);
      Assert.AreEqual(0.5, items[1].GetEstimatedCount());
    }
Exemple #2
0
    public override IEnumerable<string> Process()
    {
      var format = new MappedItemGroupXmlFileFormat();

      Progress.SetMessage("reading mapped reads from " + _options.CountFile + " ...");
      var mapped = format.ReadFromFile(_options.CountFile);

      var sequenceLocusSet = new HashSet<string>(from item in mapped
                                                 from mi in item
                                                 from mr in mi.MappedRegions
                                                 from al in mr.AlignedLocations
                                                 select string.Format("{0}:{1}:{2}", al.Parent.Sequence, al.Seqname, al.Start));
      Progress.SetMessage("There are {0} unique sequence:locus", sequenceLocusSet.Count);

      using (var sw = new StreamWriter(_options.OutputFile))
      {
        using (var sr = SAMFactory.GetReader(_options.BamFile, false))
        {
          sr.ReadHeaders().ForEach(m => sw.WriteLine(m));

          int count = 0;
          int accepted = 0;
          string line;
          while ((line = sr.ReadLine()) != null)
          {
            if (count % 1000 == 0)
            {
              if (Progress.IsCancellationPending())
              {
                throw new UserTerminatedException();
              }
            }

            if (count % 100000 == 0 && count > 0)
            {
              Progress.SetMessage("{0} candidates from {1} reads", accepted, count);
            }

            count++;

            var parts = line.Split('\t');

            var locus = string.Format("{0}:{1}:{2}", parts[SAMFormatConst.SEQ_INDEX], parts[SAMFormatConst.RNAME_INDEX], parts[SAMFormatConst.POS_INDEX]);
            if (!sequenceLocusSet.Contains(locus))
            {
              continue;
            }

            sw.WriteLine(line);
            accepted++;
          }
        }
      }

      return new[] { _options.OutputFile };
    }
    private static void SaveItems(List<MappedItemGroup> items1, string outputFile, MappedItemGroupSequenceWriter writer,
      MappedItemGroupXmlFileFormat format, List<string> result)
    {
      items1.RemoveAll(m => m.QueryCount == 0);
      var xml1 = outputFile + ".xml";

      if (items1.Any(m => m.Name.Contains(".tRNA")))
      {
        items1.SortTRna();
      }

      writer.WriteToFile(outputFile, items1);
      format.WriteToFile(xml1, items1);
      result.Add(outputFile);
      result.Add(xml1);
    }
    public override IEnumerable<string> Process()
    {
      var result = new List<string>();
      var samformat = _options.GetEngineFormat();

      var format = new MappedItemGroupXmlFileFormat();

      Progress.SetMessage("reading mapped reads from " + _options.InputFile1 + " ...");
      var items1 = format.ReadFromFile(_options.InputFile1);

      Progress.SetMessage("reading mapped reads from " + _options.InputFile2 + " ...");
      var items2 = format.ReadFromFile(_options.InputFile2);

      var reads1 = items1.GetQueries().ToDictionary(m => m.Qname);
      var reads2 = items2.GetQueries().ToDictionary(m => m.Qname);

      var qnames = reads1.Keys.Union(reads2.Keys).Distinct().ToList();
      foreach (var qname in qnames)
      {
        if (!reads1.ContainsKey(qname) || !reads2.ContainsKey(qname))
          continue;

        var r1 = reads1[qname];
        var r2 = reads2[qname];
        var res = samformat.CompareScore(r1.AlignmentScore, r2.AlignmentScore);
        if (res == 0)
        {
          items1.RemoveRead(qname);
          items2.RemoveRead(qname);
        }
        else if (res < 0)
        {
          items2.RemoveRead(qname);
        }
        else
        {
          items1.RemoveRead(qname);
        }
      }

      var writer = new MappedItemGroupSequenceWriter();

      SaveItems(items1, _options.OutputFile1, writer, format, result);
      SaveItems(items2, _options.OutputFile2, writer, format, result);

      return result;
    }
Exemple #5
0
        public override IEnumerable <string> Process()
        {
            var entries = (from line in File.ReadAllLines(options.InputFile)
                           let parts = line.Split('\t')
                                       where parts.Length >= 3
                                       let mirna = parts.Length == 3 ? string.Empty : parts[3]
                                                   select new { GroupName = parts[0], SampleName = parts[1], SmallRNAFile = parts[2], MiRNAFile = mirna }).ToList();

            if (entries.All(m => !File.Exists(m.MiRNAFile)))
            {
                return(new SmallRNACategoryGroupPlusBuilder(options)
                {
                    Progress = this.Progress
                }.Process());
            }

            var groups = entries.GroupBy(m => m.GroupName).ToList();

            var result = new List <string>();

            foreach (var group in groups)
            {
                var catfile = Path.Combine(options.OutputDirectory, group.Key + ".catcount");
                result.Add(catfile);
                using (var sw = new StreamWriter(catfile))
                {
                    sw.WriteLine("SampleName\tCategory\tLevel\tCount");

                    foreach (var entry in group)
                    {
                        Progress.SetMessage("Reading smallRNA mapped file " + entry.SmallRNAFile + " ...");
                        var others = new MappedItemGroupXmlFileFormat().ReadFromFile(entry.SmallRNAFile);

                        var otherQueries = (from g in others
                                            from m in g
                                            from mr in m.MappedRegions
                                            from loc in mr.AlignedLocations
                                            select new QueryRecord(loc.Parent.Qname,
                                                                   m.Name.StringBefore(":"),
                                                                   m.Name.StringAfter(":").StringBefore(":"),
                                                                   m.Name.StringAfter(":").StringAfter(":"),
                                                                   loc.Parent.QueryCount)).ToGroupDictionary(m => m.Query);
                        Progress.SetMessage("Reading smallRNA mapped file finished, {0} queries mapped.", otherQueries.Count);

                        //2570-KCV-01-19.bam.count.mapped.xml => 2570-KCV-01-19.bam.info
                        var infofile = Path.Combine(Path.GetDirectoryName(entry.SmallRNAFile), Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(entry.SmallRNAFile))) + ".info");
                        if (File.Exists(entry.MiRNAFile))
                        {
                            infofile = Path.Combine(Path.GetDirectoryName(entry.MiRNAFile), Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(entry.MiRNAFile))) + ".info");

                            Progress.SetMessage("Reading miRNA mapped file " + entry.MiRNAFile + " ...");
                            var mirnas       = new MappedMirnaGroupXmlFileFormat().ReadFromFile(entry.MiRNAFile);
                            var mirnaQueries = (from g in mirnas
                                                from m in g
                                                from mr in m.MappedRegions
                                                from mapped in mr.Mapped.Values
                                                from loc in mapped.AlignedLocations
                                                select new QueryRecord(loc.Parent.Qname.StringBefore(":CLIP_"),
                                                                       "miRNA",
                                                                       "miRNA",
                                                                       m.Name,
                                                                       loc.Parent.QueryCount)).ToGroupDictionary(m => m.Query);
                            Progress.SetMessage("Reading miRNA mapped file finished, {0} queries mapped.", mirnaQueries.Count);

                            foreach (var q in mirnaQueries)
                            {
                                List <QueryRecord> rec;
                                if (!otherQueries.TryGetValue(q.Key, out rec))
                                {
                                    rec = q.Value;
                                    otherQueries[q.Key] = q.Value;
                                }
                                else
                                {
                                    rec.AddRange(q.Value);
                                }
                            }
                            Progress.SetMessage("Total {0} queries mapped.", otherQueries.Count);
                        }

                        var counts = new List <CategoryCount>();
                        FillCounts(counts, options.Categories, otherQueries);

                        var othercategories = (from v in otherQueries.Values
                                               from item in v
                                               select item.Biotype).Distinct().OrderBy(m => m).ToList();

                        FillCounts(counts, othercategories, otherQueries);

                        if (File.Exists(infofile))
                        {
                            var lines = File.ReadAllLines(infofile);

                            Progress.SetMessage("reading mapping information from " + infofile + " ...");

                            int totalReads  = 0;
                            int mappedReads = 0;
                            foreach (var line in lines)
                            {
                                if (line.StartsWith("TotalReads"))
                                {
                                    totalReads = int.Parse(line.StringAfter("\t"));
                                }
                                else if (line.StartsWith("MappedReads"))
                                {
                                    mappedReads = int.Parse(line.StringAfter("\t"));
                                }
                            }

                            var smallRNAReads = counts.Sum(m => m.Count);

                            sw.WriteLine("{0}\tTotal Reads\t0\t{1}", entry.SampleName, totalReads);
                            sw.WriteLine("{0}\tMapped Reads\t0\t{1}", entry.SampleName, mappedReads);
                            sw.WriteLine("{0}\tsmall RNA\t0\t{1}", entry.SampleName, smallRNAReads);

                            sw.WriteLine("{0}\tUnmapped\t1\t{1}", entry.SampleName, totalReads - mappedReads);
                            sw.WriteLine("{0}\tOther Mapped\t1\t{1}", entry.SampleName, mappedReads - smallRNAReads);
                            sw.WriteLine("{0}\tsmall RNA\t1\t{1}", entry.SampleName, smallRNAReads);
                        }

                        foreach (var rec in counts)
                        {
                            sw.WriteLine("{0}\t{1}\t{2}\t{3}", entry.SampleName, rec.Biotype, 2, rec.Count);
                        }
                    }
                }

                var data = (from line in File.ReadAllLines(catfile).Skip(1)
                            where !string.IsNullOrWhiteSpace(line)
                            let parts = line.Split('\t')
                                        let level = double.Parse(parts[2])
                                                    where !(parts[1].Equals("small RNA") && level == 1)
                                                    select new
                {
                    SampleName = parts[0],
                    Category = parts[1],
                    Level = level,
                    Count = int.Parse(parts[3])
                }).ToList();

                var tablefile = catfile + ".tsv";
                result.Add(tablefile);
                using (var sw = new StreamWriter(tablefile))
                {
                    var samples = (from d in data
                                   select d.SampleName).Distinct().OrderBy(m => m).ToList();
                    sw.WriteLine("Category\t{0}", samples.Merge("\t"));

                    var categories = (from d in data
                                      where d.Level == 2
                                      select d.Category).Distinct().OrderBy(m => m).ToList();
                    categories.Insert(0, "small RNA");
                    categories.Insert(0, "Other Mapped");
                    categories.Insert(0, "Unmapped");
                    categories.Insert(0, "Mapped Reads");
                    categories.Insert(0, "Total Reads");

                    Console.WriteLine(categories.Merge("\n"));

                    var map = data.ToDoubleDictionary(m => m.SampleName, m => m.Category);
                    foreach (var cat in categories)
                    {
                        sw.WriteLine("{0}\t{1}", cat,
                                     (from sample in samples
                                      let dic = map[sample]
                                                select dic.ContainsKey(cat) ? dic[cat].Count.ToString() : "").Merge("\t"));
                    }
                }

                var rfile = new FileInfo(FileUtils.GetTemplateDir() + "/smallrna_category_group.r").FullName;
                if (File.Exists(rfile))
                {
                    var targetrfile = catfile + ".r";
                    using (var sw = new StreamWriter(targetrfile))
                    {
                        sw.WriteLine("catfile<-\"{0}\"", catfile);
                        sw.WriteLine("outputdir<-\"{0}\"", options.OutputDirectory);
                        sw.WriteLine("ispdf<-{0}", options.PdfGraph ? "1" : "0");
                        string line = File.ReadAllText(rfile);
                        using (var sr = new StreamReader(rfile))
                        {
                            if (line.Contains("#predefine_end"))
                            {
                                while ((line = sr.ReadLine()) != null)
                                {
                                    if (line.Contains("#predefine_end"))
                                    {
                                        break;
                                    }
                                }
                            }

                            while ((line = sr.ReadLine()) != null)
                            {
                                sw.WriteLine(line);
                            }
                        }
                    }
                    SystemUtils.Execute("R", "--vanilla --slave -f \"" + targetrfile + "\"");
                }
            }
            return(result);
        }
    public override IEnumerable<string> Process()
    {
      var format = new MappedItemGroupXmlFileFormat();

      using (StreamWriter sw = new StreamWriter(options.OutputFile))
      {
        sw.WriteLine("File\tFeature\tStrand\tCount\tPosition\tPercentage");
        foreach (var file in options.GetCountFiles())
        {
          var xmlfile = file.File.EndsWith(".xml") ? file.File : file.File + ".mapped.xml";

          var count = format.ReadFromFile(xmlfile).OrderByDescending(m => m.GetEstimatedCount()).ToList();

          foreach (var group in count)
          {
            var item = group[0];
            Dictionary<long, double> positionCount = new Dictionary<long, double>();
            foreach (var region in item.MappedRegions)
            {
              foreach (var loc in region.AlignedLocations)
              {
                for (long p = loc.Start; p <= loc.End; p++)
                {
                  var offset = region.Region.Strand == '+' ? p - region.Region.Start + 1 : region.Region.End - p + 1;
                  double v;
                  if (!positionCount.TryGetValue(offset, out v))
                  {
                    v = 0;
                  }
                  positionCount[offset] = v + loc.Parent.GetEstimatedCount();
                }
              }
            }

            var allcount = item.GetEstimatedCount();
            var keys = positionCount.Keys.ToList();
            keys.Sort();
            foreach (var key in keys)
            {
              sw.WriteLine("{0}\t{1}\t{2}\t{3:0.##}\t{4}\t{5:0.00}",
                file.Name,
                item.Name,
                item.MappedRegions.First().Region.Strand,
                item.GetEstimatedCount(),
                key,
                positionCount[key] / allcount);
            }
          }
        }
      }

      var rfile = new FileInfo(FileUtils.GetTemplateDir() + "/smallrna_position.r").FullName;
      if (File.Exists(rfile))
      {
        var targetr = Path.ChangeExtension(options.OutputFile, ".r").Replace("\\", "/");
        var content = File.ReadAllText(rfile).Replace("$$workspace", Path.GetDirectoryName(Path.GetFullPath(options.OutputFile)).Replace("\\", "/"))
          .Replace("$$positionfile", Path.GetFileName(options.OutputFile).Replace("\\", "/"));
        File.WriteAllText(targetr, content);

        if (File.Exists(targetr))
        {
          SystemUtils.Execute("R", "--vanilla -f " + targetr);
        }
      }

      return new string[] { Path.GetFullPath(options.OutputFile) };
    }
        public override IEnumerable <string> Process()
        {
            var result = new List <string>();

            //read regions
            var featureLocations = options.GetSequenceRegions();

            Progress.SetMessage("There are {0} coordinate entries", featureLocations.Count);
            if (featureLocations.Count == 0)
            {
                throw new Exception(string.Format("No coordinate found in file {0}", options.CoordinateFile));
            }

            var fGroups = featureLocations.GroupBy(l => l.Category).OrderByDescending(l => l.Count()).ToList();

            foreach (var fg in fGroups)
            {
                Console.WriteLine("{0} = {1}", fg.Key, fg.Count());
            }

            var featureChroms = new HashSet <string>(from feature in featureLocations
                                                     select feature.Seqname);

            var resultFilename = options.OutputFile;

            result.Add(resultFilename);

            HashSet <string> cca = new HashSet <string>();

            if (File.Exists(options.CCAFile))
            {
                cca = new HashSet <string>(File.ReadAllLines(options.CCAFile));
            }

            //parsing reads
            List <QueryInfo> totalQueries;
            var reads = ParseCandidates(options.InputFiles, resultFilename, out totalQueries);

            if (reads.Count == 0)
            {
                throw new ArgumentException("No read found in file " + options.InputFiles.Merge(","));
            }

            HashSet <string> excludeQueries = new HashSet <string>();

            if (!string.IsNullOrEmpty(options.ExcludeXml))
            {
                Progress.SetMessage("Excluding queries in {0} ...", options.ExcludeXml);
                excludeQueries = new HashSet <string>(from q in MappedItemGroupXmlFileFormat.ReadQueries(options.ExcludeXml)
                                                      select q.StringBefore(SmallRNAConsts.NTA_TAG));
                reads.RemoveAll(m => excludeQueries.Contains(m.Locations.First().Parent.Qname.StringBefore(SmallRNAConsts.NTA_TAG)));
                Progress.SetMessage("Total candidate {0} for mapping ...", reads.Count);
            }

            var hasMicroRnaNTA = reads.Any(l => l.NTA.Length > 0);

            var hasTrnaNTA = hasMicroRnaNTA || File.Exists(options.CCAFile);

            if (!options.NoCategory)
            {
                //First of all, draw candidate mapping position graph
                var miRNAPositionFile = Path.ChangeExtension(options.OutputFile, SmallRNAConsts.miRNA + ".candidates.position");
                if (!options.NotOverwrite || !File.Exists(miRNAPositionFile))
                {
                    Progress.SetMessage("Drawing microRNA candidates position pictures...");
                    var notNTAreads = hasMicroRnaNTA ? reads.Where(m => m.NTA.Length == 0).ToList() : reads;
                    DrawPositionImage(notNTAreads, featureLocations.Where(m => m.Category.Equals(SmallRNAConsts.miRNA)).ToList(), SmallRNABiotype.miRNA.ToString(), miRNAPositionFile);
                }
            }

            var featureGroups = new List <FeatureItemGroup>();
            var mappedfile    = resultFilename + ".mapped.xml";

            if (File.Exists(mappedfile) && options.NotOverwrite)
            {
                Progress.SetMessage("Reading mapped feature items...");
                featureGroups = new FeatureItemGroupXmlFormat().ReadFromFile(mappedfile);
            }
            else
            {
                Progress.SetMessage("Mapping feature items...");

                //mapping reads to features based on miRNA, tRNA, mt_tRNA and other smallRNA priority
                MapReadToSequenceRegion(featureLocations, reads, cca, hasMicroRnaNTA, hasTrnaNTA);

                var featureMapped = featureLocations.GroupByName();
                featureMapped.RemoveAll(m => m.GetEstimatedCount() == 0);
                featureMapped.ForEach(m => m.CombineLocations());

                if (options.NoCategory)
                {
                    featureGroups = featureMapped.GroupByIdenticalQuery();
                }
                else
                {
                    var mirnaGroups = featureMapped.Where(m => m.Name.StartsWith(SmallRNAConsts.miRNA)).GroupBySequence();
                    if (mirnaGroups.Count > 0)
                    {
                        OrderFeatureItemGroup(mirnaGroups);

                        Progress.SetMessage("writing miRNA count ...");

                        var mirnaCountFile = Path.ChangeExtension(resultFilename, "." + SmallRNAConsts.miRNA + ".count");
                        new SmallRNACountMicroRNAWriter(options.Offsets).WriteToFile(mirnaCountFile, mirnaGroups);
                        result.Add(mirnaCountFile);
                        featureGroups.AddRange(mirnaGroups);

                        var positionFile = Path.ChangeExtension(options.OutputFile, SmallRNAConsts.miRNA + ".position");
                        SmallRNAMappedPositionBuilder.Build(mirnaGroups, Path.GetFileNameWithoutExtension(options.OutputFile), positionFile, m => m[0].Name.StringAfter(":"));
                    }
                    mirnaGroups.Clear();

                    var trnaCodeGroups = featureMapped.Where(m => m.Name.StartsWith(SmallRNAConsts.tRNA)).GroupByFunction(SmallRNAUtils.GetTrnaAnticodon, false);
                    if (trnaCodeGroups.Count > 0)
                    {
                        OrderFeatureItemGroup(trnaCodeGroups);

                        Progress.SetMessage("writing tRNA code count ...");
                        var trnaCodeCountFile = Path.ChangeExtension(resultFilename, "." + SmallRNAConsts.tRNA + ".count");

                        new FeatureItemGroupCountWriter(m => m.DisplayNameWithoutCategory).WriteToFile(trnaCodeCountFile, trnaCodeGroups);
                        result.Add(trnaCodeCountFile);

                        featureGroups.AddRange(trnaCodeGroups);

                        var positionFile = Path.ChangeExtension(options.OutputFile, SmallRNAConsts.tRNA + ".position");
                        SmallRNAMappedPositionBuilder.Build(trnaCodeGroups, Path.GetFileName(options.OutputFile), positionFile, m => m[0].Name.StringAfter(":"));
                    }
                    trnaCodeGroups.Clear();

                    var otherFeatures  = featureMapped.Where(m => !m.Name.StartsWith(SmallRNAConsts.miRNA) && !m.Name.StartsWith(SmallRNAConsts.tRNA)).ToList();
                    var exportBiotypes = SmallRNAUtils.GetOutputBiotypes(options);
                    foreach (var biotype in exportBiotypes)
                    {
                        WriteGroups(result, resultFilename, featureGroups, otherFeatures, biotype);
                    }

                    var leftFeatures = otherFeatures.Where(l => !exportBiotypes.Any(b => l.Name.StartsWith(b))).ToList();
                    WriteGroups(result, resultFilename, featureGroups, leftFeatures, null);
                }

                Progress.SetMessage("writing all smallRNA count ...");
                new FeatureItemGroupCountWriter().WriteToFile(resultFilename, featureGroups);
                result.Add(resultFilename);

                Progress.SetMessage("writing mapping details...");
                new FeatureItemGroupXmlFormatHand().WriteToFile(mappedfile, featureGroups);
            }

            var readSummary = GetReadSummary(featureGroups, excludeQueries, reads, totalQueries);

            WriteInfoFile(result, resultFilename, readSummary, featureGroups);
            result.Add(mappedfile);
            Progress.End();

            return(result);
        }
    public override IEnumerable<string> Process()
    {
      var entries = (from line in File.ReadAllLines(options.InputFile)
                     let parts = line.Split('\t')
                     where parts.Length >= 3
                     let mirna = parts.Length == 3 ? string.Empty : parts[3]
                     select new { GroupName = parts[0], SampleName = parts[1], SmallRNAFile = parts[2], MiRNAFile = mirna }).ToList();

      if (entries.All(m => !File.Exists(m.MiRNAFile)))
      {
        return new SmallRNACategoryGroupPlusBuilder(options)
        {
          Progress = this.Progress
        }.Process();
      }

      var groups = entries.GroupBy(m => m.GroupName).ToList();

      var result = new List<string>();

      foreach (var group in groups)
      {
        var catfile = Path.Combine(options.OutputDirectory, group.Key + ".catcount");
        result.Add(catfile);
        using (var sw = new StreamWriter(catfile))
        {
          sw.WriteLine("SampleName\tCategory\tLevel\tCount");

          foreach (var entry in group)
          {
            Progress.SetMessage("Reading smallRNA mapped file " + entry.SmallRNAFile + " ...");
            var others = new MappedItemGroupXmlFileFormat().ReadFromFile(entry.SmallRNAFile);

            var otherQueries = (from g in others
                                from m in g
                                from mr in m.MappedRegions
                                from loc in mr.AlignedLocations
                                select new QueryRecord(loc.Parent.Qname,
                                  m.Name.StringBefore(":"),
                                  m.Name.StringAfter(":").StringBefore(":"),
                                  m.Name.StringAfter(":").StringAfter(":"),
                                  loc.Parent.QueryCount)).ToGroupDictionary(m => m.Query);
            Progress.SetMessage("Reading smallRNA mapped file finished, {0} queries mapped.", otherQueries.Count);

            //2570-KCV-01-19.bam.count.mapped.xml => 2570-KCV-01-19.bam.info
            var infofile = Path.Combine(Path.GetDirectoryName(entry.SmallRNAFile), Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(entry.SmallRNAFile))) + ".info");
            if (File.Exists(entry.MiRNAFile))
            {
              infofile = Path.Combine(Path.GetDirectoryName(entry.MiRNAFile), Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(entry.MiRNAFile))) + ".info");

              Progress.SetMessage("Reading miRNA mapped file " + entry.MiRNAFile + " ...");
              var mirnas = new MappedMirnaGroupXmlFileFormat().ReadFromFile(entry.MiRNAFile);
              var mirnaQueries = (from g in mirnas
                                  from m in g
                                  from mr in m.MappedRegions
                                  from mapped in mr.Mapped.Values
                                  from loc in mapped.AlignedLocations
                                  select new QueryRecord(loc.Parent.Qname.StringBefore(":CLIP_"),
                                    "miRNA",
                                    "miRNA",
                                    m.Name,
                                    loc.Parent.QueryCount)).ToGroupDictionary(m => m.Query);
              Progress.SetMessage("Reading miRNA mapped file finished, {0} queries mapped.", mirnaQueries.Count);

              foreach (var q in mirnaQueries)
              {
                List<QueryRecord> rec;
                if (!otherQueries.TryGetValue(q.Key, out rec))
                {
                  rec = q.Value;
                  otherQueries[q.Key] = q.Value;
                }
                else
                {
                  rec.AddRange(q.Value);
                }
              }
              Progress.SetMessage("Total {0} queries mapped.", otherQueries.Count);
            }

            var counts = new List<CategoryCount>();
            FillCounts(counts, options.Categories, otherQueries);

            var othercategories = (from v in otherQueries.Values
                                   from item in v
                                   select item.Biotype).Distinct().OrderBy(m => m).ToList();

            FillCounts(counts, othercategories, otherQueries);

            if (File.Exists(infofile))
            {
              var lines = File.ReadAllLines(infofile);

              Progress.SetMessage("reading mapping information from " + infofile + " ...");

              int totalReads = 0;
              int mappedReads = 0;
              foreach (var line in lines)
              {
                if (line.StartsWith("TotalReads"))
                {
                  totalReads = int.Parse(line.StringAfter("\t"));
                }
                else if (line.StartsWith("MappedReads"))
                {
                  mappedReads = int.Parse(line.StringAfter("\t"));
                }
              }

              var smallRNAReads = counts.Sum(m => m.Count);

              sw.WriteLine("{0}\tTotal Reads\t0\t{1}", entry.SampleName, totalReads);
              sw.WriteLine("{0}\tMapped Reads\t0\t{1}", entry.SampleName, mappedReads);
              sw.WriteLine("{0}\tsmall RNA\t0\t{1}", entry.SampleName, smallRNAReads);

              sw.WriteLine("{0}\tUnmapped\t1\t{1}", entry.SampleName, totalReads - mappedReads);
              sw.WriteLine("{0}\tOther Mapped\t1\t{1}", entry.SampleName, mappedReads - smallRNAReads);
              sw.WriteLine("{0}\tsmall RNA\t1\t{1}", entry.SampleName, smallRNAReads);
            }

            foreach (var rec in counts)
            {
              sw.WriteLine("{0}\t{1}\t{2}\t{3}", entry.SampleName, rec.Biotype, 2, rec.Count);
            }
          }
        }

        var data = (from line in File.ReadAllLines(catfile).Skip(1)
                    where !string.IsNullOrWhiteSpace(line)
                    let parts = line.Split('\t')
                    let level = double.Parse(parts[2])
                    where !(parts[1].Equals("small RNA") && level == 1)
                    select new
                    {
                      SampleName = parts[0],
                      Category = parts[1],
                      Level = level,
                      Count = int.Parse(parts[3])
                    }).ToList();

        var tablefile = catfile + ".tsv";
        result.Add(tablefile);
        using (var sw = new StreamWriter(tablefile))
        {
          var samples = (from d in data
                         select d.SampleName).Distinct().OrderBy(m => m).ToList();
          sw.WriteLine("Category\t{0}", samples.Merge("\t"));

          var categories = (from d in data
                            where d.Level == 2
                            select d.Category).Distinct().OrderBy(m => m).ToList();
          categories.Insert(0, "small RNA");
          categories.Insert(0, "Other Mapped");
          categories.Insert(0, "Unmapped");
          categories.Insert(0, "Mapped Reads");
          categories.Insert(0, "Total Reads");

          Console.WriteLine(categories.Merge("\n"));

          var map = data.ToDoubleDictionary(m => m.SampleName, m => m.Category);
          foreach (var cat in categories)
          {
            sw.WriteLine("{0}\t{1}", cat,
              (from sample in samples
               let dic = map[sample]
               select dic.ContainsKey(cat) ? dic[cat].Count.ToString() : "").Merge("\t"));
          }
        }

        var rfile = new FileInfo(FileUtils.GetTemplateDir() + "/smallrna_category_group.r").FullName;
        if (File.Exists(rfile))
        {
          var targetrfile = catfile + ".r";
          using (var sw = new StreamWriter(targetrfile))
          {
            sw.WriteLine("catfile<-\"{0}\"", catfile);
            sw.WriteLine("outputdir<-\"{0}\"", options.OutputDirectory);
            sw.WriteLine("ispdf<-{0}", options.PdfGraph ? "1" : "0");
            string line = File.ReadAllText(rfile);
            using (var sr = new StreamReader(rfile))
            {
              if (line.Contains("#predefine_end"))
              {
                while ((line = sr.ReadLine()) != null)
                {
                  if (line.Contains("#predefine_end"))
                  {
                    break;
                  }
                }
              }

              while ((line = sr.ReadLine()) != null)
              {
                sw.WriteLine(line);
              }
            }
          }
          SystemUtils.Execute("R", "--vanilla --slave -f \"" + targetrfile + "\"");
        }
      }
      return result;
    }
 public void Test()
 {
   var items = new MappedItemGroupXmlFileFormat().ReadFromFile("../../../data/2570-KCV-01-19.bam.count.mapped.trna.xml");
   Assert.AreEqual(310, items.Count);
 }