public void TestRead()
    {
      var lst = new BedItemFile<InsertionDeletionItem>().ReadFromFile(@"../../../data/tophat_deletions.bed");
      Assert.AreEqual(12, lst.Count);

      Assert.AreEqual("1", lst[0].Seqname);
      Assert.AreEqual(13656, lst[0].Start);
      Assert.AreEqual(13658, lst[0].End);
      Assert.AreEqual("-", lst[0].Name);
      Assert.AreEqual(1, lst[0].Score);

      Assert.AreEqual("Y", lst[5].Seqname);
      Assert.AreEqual(59359640, lst[5].Start);
      Assert.AreEqual(59359642, lst[5].End);
      Assert.AreEqual("-", lst[5].Name);
      Assert.AreEqual(10, lst[5].Score);

      Assert.AreEqual("1", lst[6].Seqname);
      Assert.AreEqual(12663, lst[6].Start);
      Assert.AreEqual(12663, lst[6].End);
      Assert.AreEqual("GTC", lst[6].Name);
      Assert.AreEqual(1, lst[6].Score);

      Assert.AreEqual("Y", lst.Last().Seqname);
      Assert.AreEqual(59359023, lst.Last().Start);
      Assert.AreEqual(59359023, lst.Last().End);
      Assert.AreEqual("CCCC", lst.Last().Name);
      Assert.AreEqual(2, lst.Last().Score);
    }
Пример #2
0
        /// <summary>
        /// Transfer bed format (zero-based) to gff format (one-based)
        /// </summary>
        /// <param name="options"></param>
        /// <param name="progress"></param>
        /// <returns></returns>
        public static List <CoverageRegion> GetTargetCoverageRegionFromBed(ITargetBuilderOptions options, IProgressCallback progress)
        {
            var result = new List <CoverageRegion>();

            var groups = new BedItemFile <BedItem>().ReadFromFile(options.TargetFile);

            progress.SetMessage("Total {0} potential target group read from file {1}", groups.Count, options.TargetFile);

            foreach (var utr in groups)
            {
                var rg = new CoverageRegion();
                rg.Name    = utr.Name;
                rg.Seqname = utr.Seqname.StringAfter("chr");
                rg.Start   = utr.Start + 1;
                rg.End     = utr.End;
                rg.Strand  = utr.Strand;
                for (var i = rg.Start; i < rg.End; i++)
                {
                    rg.Coverages.Add(new CoverageSite(DEFAULT_COVERAGE));
                }
                result.Add(rg);
            }

            return(result);
        }
        public List <JunctionItem> ReadFromFile(string fileName)
        {
            var beds = new BedItemFile <BedItem>().ReadFromFile(fileName);

            List <JunctionItem> result = (from bed in beds
                                          select new JunctionItem()
            {
                Chr = bed.Seqname,
                Start1 = bed.Blocks[0].ChromStart,
                End1 = bed.Blocks[0].ChromEnd,
                Start2 = bed.Blocks[1].ChromStart,
                End2 = bed.Blocks[1].ChromEnd,
                Name = bed.Name
            }).ToList();

            return(result);
        }
Пример #4
0
    public void TestReadSlimData()
    {
      List<BedItem> items =new  BedItemFile<BedItem>().ReadFromFile(filename);

      Assert.AreEqual(11, items.Count);
      Assert.AreEqual("21", items[0].Seqname);
      Assert.AreEqual(9827125, items[0].Start);
      Assert.AreEqual(9827151, items[0].End);
      Assert.AreEqual("21-1", items[0].Name);
      Assert.AreEqual(1182877.2, items[0].Score);
      Assert.AreEqual('+', items[0].Strand);

      Assert.AreEqual("12", items[10].Seqname);
      Assert.AreEqual(6647087, items[10].Start);
      Assert.AreEqual(6647113, items[10].End);
      Assert.AreEqual("12-4", items[10].Name);
      Assert.AreEqual(49330.1, items[10].Score);
      Assert.AreEqual('-', items[10].Strand);
    }
        public override IEnumerable <string> Process()
        {
            var hasheader = new StreamReader(options.BedFile).ReadLine().Contains("start");
            var beds      = new BedItemFile <BedItem>()
            {
                HasHeader = hasheader
            }.ReadFromFile(options.BedFile);
            var items      = new CNVItemReader <CNVItem>().ReadFromFile(options.InputFile);
            var itemsgroup = items.GroupBy(m => m.Seqname.StringAfter("chr"));
            var bedgroups  = beds.GroupBy(m => m.Seqname).ToDictionary(m => m.Key);

            foreach (var ig in itemsgroup)
            {
                if (!bedgroups.ContainsKey(ig.Key))
                {
                    throw new Exception(string.Format("Cannot find chromosome {0} in bed file {1}", ig.Key, options.BedFile));
                }
            }

            foreach (var ig in itemsgroup)
            {
                var bg = bedgroups[ig.Key];
                foreach (var item in ig)
                {
                    var bgmax = FindMaxOverlap(bg, item);
                    if (bgmax != null)
                    {
                        item.ItemName = bgmax.Name;
                    }
                }
            }

            items.RemoveAll(m => string.IsNullOrWhiteSpace(m.ItemName));
            var itemmap = items.GroupBy(m => m.ItemName).ToDictionary(m => m.Key);
            var genes   = itemmap.Keys.OrderBy(m => m).ToList();
            var samples = items.ConvertAll(m => m.FileName).Distinct().OrderBy(m => m).ToList();

            using (var sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine("gene,DELETION,DUPLICATION," + samples.Merge(","));
                foreach (var gene in genes)
                {
                    var sas = itemmap[gene].GroupBy(m => m.FileName).ToDictionary(m => m.Key);
                    sw.Write(gene);
                    var deletioncount    = samples.Count(m => sas.ContainsKey(m) && sas[m].Any(n => n.ItemType == CNVType.DELETION));
                    var duplicationcount = samples.Count(m => sas.ContainsKey(m) && sas[m].Any(n => n.ItemType == CNVType.DUPLICATION));
                    sw.Write(",{0:0.0}%,{1:0.0}%", deletioncount * 100.0 / samples.Count, duplicationcount * 100.0 / samples.Count);

                    foreach (var sample in samples)
                    {
                        sw.Write(",");
                        if (sas.ContainsKey(sample))
                        {
                            var sis = sas[sample];
                            if (sis.Any(m => !m.ItemType.Equals(sis.First().ItemType)))
                            {
                                Console.WriteLine("Different type of gene " + gene + " in sample " + sample);
                            }
                            sw.Write((from si in sis
                                      select string.Format("{0}:{1}:{2}-{3}", si.ItemType, si.Seqname, si.Start, si.End)).Merge("/"));
                        }
                    }
                    sw.WriteLine();
                }
            }

            return(new string[] { options.OutputFile });
        }
Пример #6
0
    public override IEnumerable<string> Process()
    {
      var paramFile = options.OutputFile + ".param";
      if (string.IsNullOrEmpty(options.ParamFile) || !Path.GetFullPath(options.ParamFile).Equals(Path.GetFullPath(paramFile)))
      {
        options.SaveToFile(options.OutputFile + ".param");
      }

      var bedfile = new BedItemFile<BedItem>(6);

      var mirnas = new List<BedItem>();
      if (File.Exists(options.MiRBaseFile))
      {
        Progress.SetMessage("Processing {0} ...", options.MiRBaseFile);

        if (options.MiRBaseFile.EndsWith(".bed"))
        {
          mirnas = bedfile.ReadFromFile(options.MiRBaseFile);
          mirnas.ForEach(m =>
          {
            m.Seqname = m.Seqname.StringAfter("chr");
            m.Name = options.MiRBaseKey + ":" + m.Name;
          });
        }
        else
        {
          using (var gf = new GtfItemFile(options.MiRBaseFile))
          {
            GtfItem item;
            while ((item = gf.Next(options.MiRBaseKey)) != null)
            {
              BedItem loc = new BedItem();
              loc.Seqname = item.Seqname.StringAfter("chr");
              loc.Start = item.Start - 1;
              loc.End = item.End;
              loc.Name = options.MiRBaseKey + ":" + item.Attributes.StringAfter("Name=").StringBefore(";");
              loc.Score = 1000;
              loc.Strand = item.Strand;
              mirnas.Add(loc);
            }
          }
        }

        Progress.SetMessage("{0} miRNA read.", mirnas.Count);
      }

      List<BedItem> trnas = new List<BedItem>();
      if (File.Exists(options.UcscTrnaFile))
      {
        //reading tRNA from ucsc table without mitocondrom tRNA
        Progress.SetMessage("Processing {0} ...", options.UcscTrnaFile);
        trnas = bedfile.ReadFromFile(options.UcscTrnaFile);
        trnas.ForEach(m => m.Seqname = m.Seqname.StringAfter("chr"));

        //remove the tRNA not from 1-22, X and Y
        trnas.RemoveAll(m => (m.Seqname.Length > 1) && !m.Seqname.All(n => char.IsDigit(n)));

        //mitocondrom tRNA will be extracted from ensembl gtf file
        trnas.RemoveAll(m => m.Seqname.Equals("M") || m.Seqname.Equals("MT"));

        trnas.ForEach(m => m.Name = SmallRNAConsts.tRNA + ":" + m.Name);

        Progress.SetMessage("{0} tRNA from ucsc read.", trnas.Count);
      }

      var others = new List<BedItem>();
      if (File.Exists(options.EnsemblGtfFile))
      {
        //reading smallRNA/tRNA from ensembl gtf file
        Progress.SetMessage("Processing {0} ...", options.EnsemblGtfFile);
        using (var gf = new GtfItemFile(options.EnsemblGtfFile))
        {
          var biotypes = new HashSet<string>(SmallRNAConsts.Biotypes);
          biotypes.Remove(SmallRNAConsts.miRNA);

          GtfItem item;
          int count = 0;
          while ((item = gf.Next("gene")) != null)
          {
            string biotype;
            if (item.Attributes.Contains("gene_biotype"))
            {
              biotype = item.Attributes.StringAfter("gene_biotype \"").StringBefore("\"");
            }
            else if (item.Attributes.Contains("gene_type"))
            {
              biotype = item.Attributes.StringAfter("gene_type \"").StringBefore("\"");
            }
            else
            {
              continue;
            }

            if (File.Exists(options.UcscTrnaFile) && biotype.Equals(SmallRNAConsts.tRNA))
            {
              continue;
            }

            if (biotype.Equals("Mt_tRNA"))
            {
              count++;
              var gene_name = item.Attributes.Contains("gene_name") ? item.Attributes.StringAfter("gene_name \"").StringBefore("\"") : item.GeneId;
              BedItem loc = new BedItem();
              loc.Seqname = "MT";
              loc.Start = item.Start - 1;
              loc.End = item.End;
              loc.Name = string.Format(SmallRNAConsts.tRNA + ":chrMT.tRNA{0}-{1}", count, gene_name.StringAfter("-"));
              loc.Score = 1000;
              loc.Strand = item.Strand;
              trnas.Add(loc);
            }
            else if (biotypes.Contains(biotype))
            {
              string seqName;
              if (item.Seqname.ToLower().StartsWith("chr"))
              {
                seqName = item.Seqname.Substring(3);
              }
              else
              {
                seqName = item.Seqname;
              }
              if (seqName.Equals("M"))
              {
                seqName = "MT";
              }

              //ignore all smallRNA coordinates on scaffold or contig.
              if (seqName.Length > 5)
              {
                continue;
              }

              var gene_name = item.Attributes.StringAfter("gene_name \"").StringBefore("\"");

              BedItem loc = new BedItem();
              loc.Seqname = seqName;
              loc.Start = item.Start - 1;
              loc.End = item.End;
              loc.Name = biotype + ":" + gene_name + ":" + item.GeneId;
              loc.Score = 1000;
              loc.Strand = item.Strand;
              others.Add(loc);
            }
          }
        }
      }

      var all = new List<BedItem>();
      all.AddRange(mirnas);
      all.AddRange(trnas);
      all.AddRange(others);

      if (File.Exists(options.RRNAFile))
      {
        var seqs = SequenceUtils.Read(options.RRNAFile);
        foreach(var seq in seqs)
        {
          all.Add(new BedItem()
          {
            Seqname = seq.Name,
            Start = 0,
            End = seq.SeqString.Length,
            Strand = '+',
            Name = "rRNA:" + seq.Name
          });
        }
      }

      Progress.SetMessage("Saving smallRNA coordinates to " + options.OutputFile + "...");
      using (var sw = new StreamWriter(options.OutputFile))
      {
        foreach (var pir in SmallRNAConsts.Biotypes)
        {
          var locs = all.Where(m => m.Name.StartsWith(pir)).ToList();

          GenomeUtils.SortChromosome(locs, m => m.Seqname, m => (int)m.Start);

          foreach (var loc in locs)
          {
            sw.WriteLine(bedfile.GetValue(loc));
          }
        }
      }

      Progress.SetMessage("Extracting sequence from " + options.FastaFile + "...");
      new Bed2FastaProcessor(new Bed2FastaProcessorOptions()
      {
        GenomeFastaFile = options.FastaFile,
        InputFile = options.OutputFile,
        OutputFile = options.OutputFile + ".fa",
        KeepChrInName = false,
        AcceptName = m => m.StartsWith(SmallRNAConsts.miRNA) || m.StartsWith(SmallRNAConsts.tRNA),
      })
      {
        Progress = this.Progress
      }.Process();

      var summaryFile = options.OutputFile + ".info";
      Progress.SetMessage("Writing summary to " + summaryFile + "...");
      using (var sw = new StreamWriter(summaryFile))
      {
        sw.WriteLine("Biotype\tCount");

        all.ConvertAll(m => m.Name).Distinct().GroupBy(m => m.StringBefore(":")).OrderByDescending(m => m.Count()).ToList().ForEach(m => sw.WriteLine("{0}\t{1}", m.Key, m.Count()));
      }

      return new string[] { options.OutputFile };
    }
Пример #7
0
    /// <summary>
    /// Transfer bed format (zero-based) to gff format (one-based)
    /// </summary>
    /// <param name="options"></param>
    /// <param name="progress"></param>
    /// <returns></returns>
    public static List<CoverageRegion> GetTargetCoverageRegionFromBed(ITargetBuilderOptions options, IProgressCallback progress)
    {
      var result = new List<CoverageRegion>();

      var groups = new BedItemFile<BedItem>().ReadFromFile(options.TargetFile);
      progress.SetMessage("Total {0} potential target group read from file {1}", groups.Count, options.TargetFile);

      foreach (var utr in groups)
      {
        var rg = new CoverageRegion();
        rg.Name = utr.Name;
        rg.Seqname = utr.Seqname.StringAfter("chr");
        rg.Start = utr.Start + 1;
        rg.End = utr.End;
        rg.Strand = utr.Strand;
        for (var i = rg.Start; i < rg.End; i++)
        {
          rg.Coverages.Add(1000);
        }
        result.Add(rg);
      }

      return result;
    }
Пример #8
0
        public override IEnumerable <string> Process()
        {
            var paramFile = options.OutputFile + ".param";

            options.SaveToFile(options.OutputFile + ".param");

            var bedfile = new BedItemFile <BedItem>(6);

            Progress.SetMessage("building chromosome name map ...");

            var mitoName = "M";
            Dictionary <string, string> chrNameMap = new Dictionary <string, string>();
            var ff = new FastaFormat(int.MaxValue);

            var faiFile = options.FastaFile + ".fai";

            if (File.Exists(faiFile))
            {
                using (StreamReader sr = new StreamReader(faiFile))
                {
                    string line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        var name = line.Split('\t')[0];
                        chrNameMap[name] = name;
                        if (name.StartsWith("chr"))
                        {
                            chrNameMap[name.StringAfter("chr")] = name;
                        }
                        if (!name.StartsWith("chr"))
                        {
                            chrNameMap["chr" + name] = name;
                        }

                        if (name.Equals("chrMT") || name.Equals("MT"))
                        {
                            mitoName = "MT";
                        }
                        if (name.Equals("chrM") || name.Equals("M"))
                        {
                            mitoName = "M";
                        }
                    }
                }
            }
            else
            {
                using (StreamReader sr = new StreamReader(options.FastaFile))
                {
                    Sequence seq;
                    while ((seq = ff.ReadSequence(sr)) != null)
                    {
                        var name = seq.Name;
                        chrNameMap[name] = name;
                        if (name.StartsWith("chr"))
                        {
                            chrNameMap[name.StringAfter("chr")] = name;
                        }
                        if (!name.StartsWith("chr"))
                        {
                            chrNameMap["chr" + name] = name;
                        }

                        if (name.Equals("chrMT") || name.Equals("MT"))
                        {
                            mitoName = "MT";
                        }
                        if (name.Equals("chrM") || name.Equals("M"))
                        {
                            mitoName = "M";
                        }
                    }
                }
            }
            var longMitoName = chrNameMap[mitoName];

            Progress.SetMessage("mitochondral chromosome name = {0}", longMitoName);

            var mirnas = new List <BedItem>();

            if (File.Exists(options.MiRBaseFile))
            {
                Progress.SetMessage("Processing {0} ...", options.MiRBaseFile);

                if (options.MiRBaseFile.EndsWith(".bed"))
                {
                    mirnas = bedfile.ReadFromFile(options.MiRBaseFile);
                    mirnas.ForEach(m =>
                    {
                        m.Seqname = m.Seqname.StringAfter("chr");
                        m.Name    = options.MiRBaseKey + ":" + m.Name;
                    });
                }
                else
                {
                    using (var gf = new GtfItemFile(options.MiRBaseFile))
                    {
                        GtfItem item;
                        while ((item = gf.Next(options.MiRBaseKey)) != null)
                        {
                            BedItem loc = new BedItem();
                            loc.Seqname = item.Seqname.StringAfter("chr");
                            loc.Start   = item.Start - 1;
                            loc.End     = item.End;
                            loc.Name    = options.MiRBaseKey + ":" + item.Attributes.StringAfter("Name=").StringBefore(";");
                            loc.Score   = 1000;
                            loc.Strand  = item.Strand;
                            mirnas.Add(loc);
                        }
                    }
                }

                Progress.SetMessage("{0} miRNA readed.", mirnas.Count);
            }

            List <BedItem> trnas = new List <BedItem>();

            if (File.Exists(options.UcscTrnaFile))
            {
                //reading tRNA from ucsc table without mitocondrom tRNA
                Progress.SetMessage("Processing {0} ...", options.UcscTrnaFile);
                trnas = bedfile.ReadFromFile(options.UcscTrnaFile);
                trnas.ForEach(m => m.Seqname = m.Seqname.StringAfter("chr"));

                var removed = trnas.Where(m => (m.Seqname.Length > 1) && !m.Seqname.All(n => char.IsDigit(n))).ToList();
                if (removed.Count != trnas.Count)
                {
                    //remove the tRNA not from 1-22, X and Y
                    trnas.RemoveAll(m => (m.Seqname.Length > 1) && !m.Seqname.All(n => char.IsDigit(n)));

                    //mitocondrom tRNA will be extracted from ensembl gtf file
                    trnas.RemoveAll(m => m.Seqname.Equals("M") || m.Seqname.Equals("MT"));
                }

                trnas.ForEach(m => m.Name = GetTRNAName(m.Name));

                Progress.SetMessage("{0} tRNA from ucsc readed.", trnas.Count);

                if (File.Exists(options.UcscMatureTrnaFastaFile))
                {
                    var seqs = SequenceUtils.Read(options.UcscMatureTrnaFastaFile);
                    foreach (var seq in seqs)
                    {
                        var tRNAName = GetTRNAName(seq.Name);
                        trnas.Add(new BedItem()
                        {
                            Seqname  = seq.Name,
                            Start    = 0,
                            End      = seq.SeqString.Length,
                            Strand   = '+',
                            Name     = tRNAName,
                            Sequence = seq.SeqString
                        });
                    }
                }
            }

            var others = new List <BedItem>();

            if (File.Exists(options.EnsemblGtfFile))
            {
                //reading smallRNA/tRNA from ensembl gtf file
                Progress.SetMessage("Processing {0} ...", options.EnsemblGtfFile);
                using (var gf = new GtfItemFile(options.EnsemblGtfFile))
                {
                    var biotypes = new HashSet <string>(SmallRNAConsts.Biotypes);
                    biotypes.Remove(SmallRNAConsts.miRNA);

                    GtfItem item;
                    int     count = 0;
                    while ((item = gf.Next("gene")) != null)
                    {
                        string biotype;
                        if (item.Attributes.Contains("gene_biotype"))
                        {
                            biotype = item.Attributes.StringAfter("gene_biotype \"").StringBefore("\"");
                        }
                        else if (item.Attributes.Contains("gene_type"))
                        {
                            biotype = item.Attributes.StringAfter("gene_type \"").StringBefore("\"");
                        }
                        else
                        {
                            continue;
                        }

                        if (File.Exists(options.UcscTrnaFile) && biotype.Equals(SmallRNAConsts.tRNA))
                        {
                            continue;
                        }

                        if (biotype.Equals("Mt_tRNA"))
                        {
                            count++;
                            var     gene_name = item.Attributes.Contains("gene_name") ? item.Attributes.StringAfter("gene_name \"").StringBefore("\"") : item.GeneId;
                            BedItem loc       = new BedItem();
                            loc.Seqname = mitoName;
                            loc.Start   = item.Start - 1;
                            loc.End     = item.End;
                            loc.Name    = string.Format(SmallRNAConsts.mt_tRNA + ":" + longMitoName + ".tRNA{0}-{1}", count, gene_name.StringAfter("-"));
                            loc.Score   = 1000;
                            loc.Strand  = item.Strand;
                            trnas.Add(loc);
                        }
                        else if (biotypes.Contains(biotype))
                        {
                            string seqName;
                            if (item.Seqname.ToLower().StartsWith("chr"))
                            {
                                seqName = item.Seqname.Substring(3);
                            }
                            else
                            {
                                seqName = item.Seqname;
                            }
                            if (seqName.Equals("M") || seqName.Equals("MT"))
                            {
                                seqName = mitoName;
                            }

                            //ignore all smallRNA coordinates on scaffold or contig.
                            //if (seqName.Length > 5)
                            //{
                            //  continue;
                            //}

                            var gene_name   = item.Attributes.StringAfter("gene_name \"").StringBefore("\"");
                            var lowGeneName = gene_name.ToLower();
                            if (lowGeneName.StartsWith("rny") || lowGeneName.Equals("y_rna"))
                            {
                                biotype = "yRNA";
                            }

                            BedItem loc = new BedItem();
                            loc.Seqname = seqName;
                            loc.Start   = item.Start - 1;
                            loc.End     = item.End;

                            //if (lowGeneName.EndsWith("_rrna") && loc.Length < 200)
                            //{
                            //  biotype = "rRNA";
                            //}

                            loc.Name   = biotype + ":" + gene_name + ":" + item.GeneId;
                            loc.Score  = 1000;
                            loc.Strand = item.Strand;

                            others.Add(loc);
                        }
                    }
                }
            }

            var all = new List <BedItem>();

            all.AddRange(mirnas);
            all.AddRange(trnas);
            all.AddRange(others);

            foreach (var bi in all)
            {
                if (chrNameMap.ContainsKey(bi.Seqname))
                {
                    bi.Seqname = chrNameMap[bi.Seqname];
                }
            }

            if (File.Exists(options.RRNAFile))
            {
                var seqs = SequenceUtils.Read(options.RRNAFile);
                foreach (var seq in seqs)
                {
                    all.Add(new BedItem()
                    {
                        Seqname = seq.Name,
                        Start   = 0,
                        End     = seq.SeqString.Length,
                        Strand  = '+',
                        Name    = "rRNA:" + SmallRNAConsts.rRNADB_KEY + seq.Name
                    });
                }
            }

            Progress.SetMessage("Saving smallRNA coordinates to " + options.OutputFile + "...");
            using (var sw = new StreamWriter(options.OutputFile))
            {
                foreach (var pir in SmallRNAConsts.Biotypes)
                {
                    var locs = all.Where(m => m.Name.StartsWith(pir)).ToList();
                    Progress.SetMessage("{0} : {1}", pir, locs.Count);

                    GenomeUtils.SortChromosome(locs, m => m.Seqname, m => (int)m.Start);

                    foreach (var loc in locs)
                    {
                        sw.WriteLine(bedfile.GetValue(loc));
                    }
                }
            }

            var miRNA_bed = FileUtils.ChangeExtension(options.OutputFile, ".miRNA.bed");

            Progress.SetMessage("Saving miRNA coordinates to " + miRNA_bed + "...");
            using (var sw = new StreamWriter(miRNA_bed))
            {
                var pir  = SmallRNAConsts.miRNA;
                var locs = all.Where(m => m.Name.StartsWith(pir)).ToList();
                Progress.SetMessage("{0} : {1}", pir, locs.Count);

                GenomeUtils.SortChromosome(locs, m => m.Seqname, m => (int)m.Start);

                foreach (var loc in locs)
                {
                    sw.WriteLine(bedfile.GetValue(loc));
                }
            }

            Progress.SetMessage("Saving smallRNA miss1 coordinates to " + options.OutputFile + ".miss1 ...");
            using (var sw = new StreamWriter(options.OutputFile + ".miss1"))
            {
                foreach (var pir in SmallRNAConsts.Biotypes)
                {
                    if (pir == SmallRNABiotype.lincRNA.ToString() || pir == SmallRNABiotype.lncRNA.ToString())
                    {
                        continue;
                    }
                    var locs = all.Where(m => m.Name.StartsWith(pir)).ToList();
                    locs.RemoveAll(l => l.Name.Contains(SmallRNAConsts.rRNADB_KEY));

                    Progress.SetMessage("{0} : {1}", pir, locs.Count);

                    GenomeUtils.SortChromosome(locs, m => m.Seqname, m => (int)m.Start);

                    foreach (var loc in locs)
                    {
                        sw.WriteLine(bedfile.GetValue(loc));
                    }
                }
            }

            Progress.SetMessage("Saving smallRNA miss1 coordinates to " + options.OutputFile + ".miss0 ...");
            using (var sw = new StreamWriter(options.OutputFile + ".miss0"))
            {
                foreach (var pir in SmallRNAConsts.Biotypes)
                {
                    if (pir != SmallRNABiotype.lincRNA.ToString() && pir != SmallRNABiotype.lncRNA.ToString() && pir != SmallRNABiotype.rRNA.ToString())
                    {
                        continue;
                    }
                    var locs = all.Where(m => m.Name.StartsWith(pir)).ToList();
                    if (pir == SmallRNABiotype.rRNA.ToString())
                    {
                        locs.RemoveAll(l => !l.Name.Contains(SmallRNAConsts.rRNADB_KEY));
                    }

                    Progress.SetMessage("{0} : {1}", pir, locs.Count);

                    GenomeUtils.SortChromosome(locs, m => m.Seqname, m => (int)m.Start);

                    foreach (var loc in locs)
                    {
                        sw.WriteLine(bedfile.GetValue(loc));
                    }
                }
            }

            var summaryFile = options.OutputFile + ".info";

            Progress.SetMessage("Writing summary to " + summaryFile + "...");
            using (var sw = new StreamWriter(summaryFile))
            {
                sw.WriteLine("Biotype\tCount");

                all.ConvertAll(m => m.Name).Distinct().GroupBy(m => m.StringBefore(":")).OrderByDescending(m => m.Count()).ToList().ForEach(m => sw.WriteLine("{0}\t{1}", m.Key, m.Count()));
            }

            var result = new List <string>(new[] { options.OutputFile });

            var fasta = Path.ChangeExtension(options.OutputFile, ".fasta");

            if ((File.Exists(options.UcscTrnaFile) && File.Exists(options.UcscMatureTrnaFastaFile)) || File.Exists(options.RRNAFile))
            {
                result.Add(fasta);
                using (var sw = new StreamWriter(fasta))
                {
                    string line;
                    using (var sr = new StreamReader(options.FastaFile))
                    {
                        while ((line = sr.ReadLine()) != null)
                        {
                            sw.WriteLine(line);
                        }
                    }

                    if (File.Exists(options.UcscTrnaFile) && File.Exists(options.UcscMatureTrnaFastaFile))
                    {
                        using (var sr = new StreamReader(options.UcscMatureTrnaFastaFile))
                        {
                            while ((line = sr.ReadLine()) != null)
                            {
                                sw.WriteLine(line);
                            }
                        }
                    }

                    if (File.Exists(options.RRNAFile))
                    {
                        using (var sr = new StreamReader(options.RRNAFile))
                        {
                            while ((line = sr.ReadLine()) != null)
                            {
                                sw.WriteLine(line);
                            }
                        }
                    }
                }
            }

            var faFile = options.OutputFile + ".fa";

            Progress.SetMessage("Extracting sequence from " + options.FastaFile + "...");
            var b2foptions = new Bed2FastaProcessorOptions()
            {
                GenomeFastaFile = options.FastaFile,
                InputFile       = options.OutputFile,
                OutputFile      = faFile,
                KeepChrInName   = false,
            };

            if (!File.Exists(options.UcscMatureTrnaFastaFile))
            {
                b2foptions.AcceptName = m => m.StartsWith(SmallRNAConsts.miRNA) || m.StartsWith(SmallRNAConsts.mt_tRNA) || m.StartsWith(SmallRNAConsts.tRNA);
            }
            else
            {
                b2foptions.AcceptName = m => m.StartsWith(SmallRNAConsts.miRNA) || m.StartsWith(SmallRNAConsts.mt_tRNA);
            }

            new Bed2FastaProcessor(b2foptions)
            {
                Progress = this.Progress
            }.Process();

            if (File.Exists(options.UcscMatureTrnaFastaFile))
            {
                Progress.SetMessage("Extracting sequence from " + options.UcscMatureTrnaFastaFile + " ...");

                using (var sw = new StreamWriter(faFile, true))
                {
                    foreach (var tRNA in trnas)
                    {
                        if (!string.IsNullOrEmpty(tRNA.Sequence))
                        {
                            sw.WriteLine(">{0}", tRNA.Name);
                            sw.WriteLine("{0}", tRNA.Sequence);
                        }
                    }
                }
            }

            return(result);
        }
        /// <summary>
        /// Get 1-based coordinate from file. Bed format will be automatically translated.
        /// </summary>
        /// <param name="coordinateFile">source file</param>
        /// <param name="gtfFeature">if it's gtf format, which feature name will be used as gene_id</param>
        /// <param name="bedAsGtf">if bed already be 1-based</param>
        /// <returns></returns>
        public static List <GtfItem> GetSequenceRegions(string coordinateFile, string gtfFeature = "", bool bedAsGtf = false)
        {
            bool isBedFormat = IsBedFormat(coordinateFile);

            List <GtfItem> result;

            if (isBedFormat)
            {
                //bed is zero-based, and the end is not included in the sequence region
                //https://genome.ucsc.edu/FAQ/FAQformat.html#format1
                //gtf is 1-based, and the end is included in the sequence region
                //http://useast.ensembl.org/info/website/upload/gff.html
                //since pos in sam format is 1-based, we need to convert beditem to gtfitem.
                //http://samtools.sourceforge.net/SAMv1.pdf
                var bedItems = new BedItemFile <BedItem>().ReadFromFile(coordinateFile);
                if (!bedAsGtf)
                {
                    bedItems.ForEach(m => m.Start++);
                }
                result = bedItems.ConvertAll(m => new GtfItem(m));
            }
            else
            {
                result = GtfItemFile.ReadFromFile(coordinateFile).ToList();
                if (!string.IsNullOrEmpty(gtfFeature))
                {
                    result.RemoveAll(m => !m.Feature.Equals(gtfFeature));
                }

                result.ForEach(m =>
                {
                    if (m.Attributes.Contains("gene_id \""))
                    {
                        m.GeneId = m.Attributes.StringAfter("gene_id \"").StringBefore("\"");
                    }
                    else if (m.Attributes.Contains("ID="))
                    {
                        m.GeneId = m.Attributes.StringAfter("ID=").StringBefore(";");
                    }

                    if (m.Attributes.Contains("gene_name \""))
                    {
                        m.Name = m.Attributes.StringAfter("gene_name \"").StringBefore("\"");
                    }
                    else if (m.Attributes.Contains("Name="))
                    {
                        m.Name = m.Attributes.StringAfter("Name=").StringBefore(";");
                    }

                    if (string.IsNullOrEmpty(m.GeneId) && !string.IsNullOrEmpty(m.Name))
                    {
                        m.GeneId = m.Name;
                    }

                    if (!string.IsNullOrEmpty(m.GeneId) && string.IsNullOrEmpty(m.Name))
                    {
                        m.Name = m.GeneId;
                    }

                    if (string.IsNullOrEmpty(m.GeneId))
                    {
                        m.GeneId = m.Attributes;
                        m.Name   = m.Attributes;
                    }
                });
            }

            return(result);
        }