Esempio n. 1
0
        public static List <SeedItem> BuildTargetSeeds(ITargetBuilderOptions options, Func <SeedItem, bool> acceptSeed, IProgressCallback progress)
        {
            List <SeedItem> seeds = new List <SeedItem>();

            var mapped = GetTargetCoverageRegion(options, progress);

            progress.SetMessage("Building seeds ...");
            progress.SetRange(0, mapped.Count);
            progress.SetPosition(0);
            foreach (var l in mapped)
            {
                progress.Increment(1);
                for (int i = 0; i < l.Sequence.Length - options.MinimumSeedLength; i++)
                {
                    SeedItem si = GetSeed(l, i, options.MinimumSeedLength, options.MinimumCoverage);

                    if (si != null && acceptSeed(si))
                    {
                        seeds.Add(si);
                    }
                }
            }
            progress.End();
            progress.SetMessage("Total {0} {1}mers seeds were built.", seeds.Count, options.MinimumSeedLength);

            return(seeds);
        }
Esempio n. 2
0
        /// <summary>
        /// Transfer bed format (zero-based) to gff format (one-based)
        /// </summary>
        /// <param name="options"></param>
        /// <param name="progress"></param>
        /// <returns></returns>
        public static List <CoverageRegion> GetTargetCoverageRegionFromBed(ITargetBuilderOptions options, IProgressCallback progress)
        {
            var result = new List <CoverageRegion>();

            var groups = new BedItemFile <BedItem>().ReadFromFile(options.TargetFile);

            progress.SetMessage("Total {0} potential target group read from file {1}", groups.Count, options.TargetFile);

            foreach (var utr in groups)
            {
                var rg = new CoverageRegion();
                rg.Name    = utr.Name;
                rg.Seqname = utr.Seqname.StringAfter("chr");
                rg.Start   = utr.Start + 1;
                rg.End     = utr.End;
                rg.Strand  = utr.Strand;
                for (var i = rg.Start; i < rg.End; i++)
                {
                    rg.Coverages.Add(new CoverageSite(DEFAULT_COVERAGE));
                }
                result.Add(rg);
            }

            return(result);
        }
Esempio n. 3
0
        public static List <CoverageRegion> GetTargetCoverageRegion(ITargetBuilderOptions options, IProgressCallback progress, bool removeRegionWithoutSequence = true)
        {
            List <CoverageRegion> result;

            if (options.TargetFile.EndsWith(".xml"))
            {
                result = GetTargetCoverageRegionFromXml(options, progress);
            }
            else
            {
                result = GetTargetCoverageRegionFromBed(options, progress);
            }

            var dic = result.ToGroupDictionary(m => m.Seqname);

            progress.SetMessage("Filling sequence from {0}...", options.GenomeFastaFile);
            using (var sr = new StreamReader(options.GenomeFastaFile))
            {
                var      ff = new FastaFormat();
                Sequence seq;
                while ((seq = ff.ReadSequence(sr)) != null)
                {
                    progress.SetMessage("Processing chromosome {0} ...", seq.Reference);
                    var seqname = seq.Name.StringAfter("chr");
                    List <CoverageRegion> lst;
                    if (dic.TryGetValue(seqname, out lst))
                    {
                        foreach (var l in lst)
                        {
                            l.Sequence = seq.SeqString.Substring((int)(l.Start - 1), (int)l.Length);
                            if (l.Strand == '+')
                            {
                                l.ReverseComplementedSequence = SequenceUtils.GetReverseComplementedSequence(l.Sequence);
                            }
                        }
                    }
                }
            }
            if (removeRegionWithoutSequence)
            {
                result.RemoveAll(l => string.IsNullOrEmpty(l.Sequence));
            }

            progress.SetMessage("Filling sequence finished.");

            var namemap = new MapReader(1, 12).ReadFromFile(options.RefgeneFile);

            result.ForEach(m =>
            {
                var gene     = m.Name.StringBefore("_utr3");
                m.GeneSymbol = namemap.ContainsKey(gene) ? namemap[gene] : string.Empty;
            });

            return(result);
        }
Esempio n. 4
0
        public static Dictionary <string, List <SeedItem> > BuildTargetSeedMap(ITargetBuilderOptions options, Func <SeedItem, bool> acceptSeed, IProgressCallback progress)
        {
            //Read 6 mers from target
            var targetSeeds = BuildTargetSeeds(options, acceptSeed, progress);

            progress.SetMessage("Grouping seeds by sequence ...");
            var result = targetSeeds.ToGroupDictionary(m => m.Sequence.ToUpper());

            progress.SetMessage("Total {0} unique {1}mers seeds were built.", result.Count, options.MinimumSeedLength);
            return(result);
        }
Esempio n. 5
0
        public static List <SeedItem> BuildTargetSeeds(ITargetBuilderOptions options, List <string> seeds, IProgressCallback progress)
        {
            List <SeedItem> result = new List <SeedItem>();

            var mapped = GetTargetCoverageRegion(options, progress);

            progress.SetMessage("Building seeds ...");
            progress.SetRange(0, mapped.Count);
            progress.SetPosition(0);
            foreach (var l in mapped)
            {
                progress.Increment(1);
                foreach (var seed in seeds)
                {
                    var curseq  = l.Strand == '+' ? l.ReverseComplementedSequence : l.Sequence;
                    int lastpos = -1;
                    while (true)
                    {
                        int pos = curseq.IndexOf(seed, lastpos + 1);
                        if (pos == -1)
                        {
                            break;
                        }

                        if (l.Strand == '+')
                        {
                            result.Add(GetSeed(l, curseq.Length - pos - options.MinimumSeedLength, options.MinimumSeedLength, options.MinimumCoverage));
                        }
                        else
                        {
                            result.Add(GetSeed(l, pos, options.MinimumSeedLength, options.MinimumCoverage));
                        }
                        lastpos = pos;
                    }
                }
            }
            progress.End();
            progress.SetMessage("Total {0} {1}mers seeds were built.", result.Count, options.MinimumSeedLength);

            return(result);
        }
Esempio n. 6
0
 public static Dictionary<string, List<SeedItem>> BuildTargetSeedMap(ITargetBuilderOptions options, Func<SeedItem, bool> acceptSeed, IProgressCallback progress)
 {
   //Read 6 mers from target
   var targetSeeds = BuildTargetSeeds(options, acceptSeed, progress);
   progress.SetMessage("Grouping seeds by sequence ...");
   var result = targetSeeds.ToGroupDictionary(m => m.Sequence.ToUpper());
   progress.SetMessage("Total {0} unique {1}mers seeds were built.", result.Count, options.MinimumSeedLength);
   return result;
 }
Esempio n. 7
0
    public static List<SeedItem> BuildTargetSeeds(ITargetBuilderOptions options, Func<SeedItem, bool> acceptSeed, IProgressCallback progress)
    {
      List<SeedItem> seeds = new List<SeedItem>();

      var mapped = GetTargetCoverageRegion(options, progress);

      progress.SetMessage("Building seeds ...");
      progress.SetRange(0, mapped.Count);
      progress.SetPosition(0);
      foreach (var l in mapped)
      {
        progress.Increment(1);
        for (int i = 0; i < l.Sequence.Length - options.MinimumSeedLength; i++)
        {
          SeedItem si = GetSeed(l, i, options.MinimumSeedLength, options.MinimumCoverage);

          if (si != null && acceptSeed(si))
          {
            seeds.Add(si);
          }
        }
      }
      progress.End();
      progress.SetMessage("Total {0} {1}mers seeds were built.", seeds.Count, options.MinimumSeedLength);

      return seeds;
    }
Esempio n. 8
0
    public static List<SeedItem> BuildTargetSeeds(ITargetBuilderOptions options, List<string> seeds, IProgressCallback progress)
    {
      List<SeedItem> result = new List<SeedItem>();

      var mapped = GetTargetCoverageRegion(options, progress);

      progress.SetMessage("Building seeds ...");
      progress.SetRange(0, mapped.Count);
      progress.SetPosition(0);
      foreach (var l in mapped)
      {
        progress.Increment(1);
        foreach (var seed in seeds)
        {
          var curseq = l.Strand == '+' ? l.ReverseComplementedSequence : l.Sequence;
          int lastpos = -1;
          while (true)
          {
            int pos = curseq.IndexOf(seed, lastpos + 1);
            if (pos == -1)
            {
              break;
            }

            if (l.Strand == '+')
            {
              result.Add(GetSeed(l, curseq.Length - pos - options.MinimumSeedLength, options.MinimumSeedLength, options.MinimumCoverage));
            }
            else
            {
              result.Add(GetSeed(l, pos, options.MinimumSeedLength, options.MinimumCoverage));
            }
            lastpos = pos;
          }
        }
      }
      progress.End();
      progress.SetMessage("Total {0} {1}mers seeds were built.", result.Count, options.MinimumSeedLength);

      return result;
    }
Esempio n. 9
0
    /// <summary>
    /// Transfer bed format (zero-based) to gff format (one-based)
    /// </summary>
    /// <param name="options"></param>
    /// <param name="progress"></param>
    /// <returns></returns>
    public static List<CoverageRegion> GetTargetCoverageRegionFromBed(ITargetBuilderOptions options, IProgressCallback progress)
    {
      var result = new List<CoverageRegion>();

      var groups = new BedItemFile<BedItem>().ReadFromFile(options.TargetFile);
      progress.SetMessage("Total {0} potential target group read from file {1}", groups.Count, options.TargetFile);

      foreach (var utr in groups)
      {
        var rg = new CoverageRegion();
        rg.Name = utr.Name;
        rg.Seqname = utr.Seqname.StringAfter("chr");
        rg.Start = utr.Start + 1;
        rg.End = utr.End;
        rg.Strand = utr.Strand;
        for (var i = rg.Start; i < rg.End; i++)
        {
          rg.Coverages.Add(1000);
        }
        result.Add(rg);
      }

      return result;
    }
Esempio n. 10
0
    public static List<CoverageRegion> GetTargetCoverageRegionFromXml(ITargetBuilderOptions options, IProgressCallback progress)
    {
      var result = new List<CoverageRegion>();

      var groups = new FeatureItemGroupXmlFormat().ReadFromFile(options.TargetFile);
      progress.SetMessage("Total {0} potential target group read from file {1}", groups.Count, options.TargetFile);

      foreach (var group in groups)
      {
        //since the items in same group shared same reads, only the first one will be used.
        for (int i = 1; i < group.Count; i++)
        {
          group[0].Name = group[0].Name + "/" + group[i].Name;
        }

        group.RemoveRange(1, group.Count - 1);

        var utr = group[0];

        utr.Locations.RemoveAll(m => m.SamLocations.Count == 0);
        utr.CombineLocationByMappedReads();

        foreach (var loc in utr.Locations)
        {
          var map = new Dictionary<long, int>();
          foreach (var sloc in loc.SamLocations)
          {
            for (long i = sloc.SamLocation.Start; i <= sloc.SamLocation.End; i++)
            {
              int count;
              if (map.TryGetValue(i, out count))
              {
                map[i] = count + sloc.SamLocation.Parent.QueryCount;
              }
              else
              {
                map[i] = sloc.SamLocation.Parent.QueryCount;
              }
            }
          }

          var keys = (from k in map.Keys
                      orderby k
                      select k).ToList();

          int start = 0;
          int end = start + 1;
          while (true)
          {
            if (end == keys.Count || keys[end] != keys[end - 1] + 1)
            {
              var rg = new CoverageRegion();
              rg.Name = utr.Name;
              rg.Seqname = loc.Seqname;
              rg.Start = keys[start];
              rg.End = keys[end - 1];
              rg.Strand = loc.Strand;
              for (int i = start; i < end; i++)
              {
                rg.Coverages.Add(map[keys[i]]);
              }
              result.Add(rg);

              if (end == keys.Count)
              {
                break;
              }

              start = end;
              end = start + 1;
            }
            else
            {
              end++;
            }
          }
        }
      }

      return result;
    }
Esempio n. 11
0
    public static List<CoverageRegion> GetTargetCoverageRegion(ITargetBuilderOptions options, IProgressCallback progress, bool removeRegionWithoutSequence = true)
    {
      List<CoverageRegion> result;
      if (options.TargetFile.EndsWith(".xml"))
      {
        result = GetTargetCoverageRegionFromXml(options, progress);
      }
      else
      {
        result = GetTargetCoverageRegionFromBed(options, progress);
      }

      var dic = result.ToGroupDictionary(m => m.Seqname);

      progress.SetMessage("Filling sequence from {0}...", options.GenomeFastaFile);
      using (var sr = new StreamReader(options.GenomeFastaFile))
      {
        var ff = new FastaFormat();
        Sequence seq;
        while ((seq = ff.ReadSequence(sr)) != null)
        {
          progress.SetMessage("Processing chromosome {0} ...", seq.Reference);
          var seqname = seq.Name.StringAfter("chr");
          List<CoverageRegion> lst;
          if (dic.TryGetValue(seqname, out lst))
          {
            foreach (var l in lst)
            {
              l.Sequence = seq.SeqString.Substring((int)(l.Start - 1), (int)l.Length);
              if(l.Strand == '+')
              {
                l.ReverseComplementedSequence = SequenceUtils.GetReverseComplementedSequence(l.Sequence);
              }
            }
          }
        }
      }
      if (removeRegionWithoutSequence)
      {
        result.RemoveAll(l => string.IsNullOrEmpty(l.Sequence));
      }

      progress.SetMessage("Filling sequence finished.");

      var namemap = new MapReader(1, 12).ReadFromFile(options.RefgeneFile);
      result.ForEach(m =>
      {
        var gene = m.Name.StringBefore("_utr3");
        m.GeneSymbol = namemap.ContainsKey(gene) ? namemap[gene] : string.Empty;
      });

      return result;
    }
Esempio n. 12
0
        public static List <CoverageRegion> GetTargetCoverageRegionFromXml(ITargetBuilderOptions options, IProgressCallback progress)
        {
            var result = new List <CoverageRegion>();

            var groups = new FeatureItemGroupXmlFormat().ReadFromFile(options.TargetFile);

            progress.SetMessage("Total {0} potential target group read from file {1}", groups.Count, options.TargetFile);

            foreach (var group in groups)
            {
                //since the items in same group shared same reads, only the first one will be used.
                for (int i = 1; i < group.Count; i++)
                {
                    group[0].Name = group[0].Name + "/" + group[i].Name;
                }

                group.RemoveRange(1, group.Count - 1);

                var utr = group[0];

                utr.Locations.RemoveAll(m => m.SamLocations.Count == 0);
                utr.CombineLocationByMappedReads();

                foreach (var loc in utr.Locations)
                {
                    var map = new Dictionary <long, CoverageSite>();
                    foreach (var sloc in loc.SamLocations)
                    {
                        for (long i = sloc.SamLocation.Start; i <= sloc.SamLocation.End; i++)
                        {
                            CoverageSite count;
                            if (map.TryGetValue(i, out count))
                            {
                                count.Coverage = count.Coverage + sloc.SamLocation.Parent.QueryCount;
                                count.UniqueRead.Add(sloc.SamLocation.Parent.Qname);
                            }
                            else
                            {
                                map[i] = new CoverageSite(sloc.SamLocation.Parent.QueryCount, sloc.SamLocation.Parent.Qname);
                            }
                        }
                    }

                    var keys = (from k in map.Keys
                                orderby k
                                select k).ToList();

                    int start = 0;
                    int end   = start + 1;
                    while (true)
                    {
                        if (end == keys.Count || keys[end] != keys[end - 1] + 1)
                        {
                            var rg = new CoverageRegion();
                            rg.Name    = utr.Name;
                            rg.Seqname = loc.Seqname;
                            rg.Start   = keys[start];
                            rg.End     = keys[end - 1];
                            rg.Strand  = loc.Strand;
                            for (int i = start; i < end; i++)
                            {
                                rg.Coverages.Add(map[keys[i]]);
                            }
                            result.Add(rg);

                            if (end == keys.Count)
                            {
                                break;
                            }

                            start = end;
                            end   = start + 1;
                        }
                        else
                        {
                            end++;
                        }
                    }
                }
            }

            return(result);
        }