public override IEnumerable <string> Process() { Progress.SetMessage("Reading T2C smallRNA..."); var mappedSmallRNA = ParclipUtils.GetSmallRNACoverageRegion(options.InputFile, null, new string[] { SmallRNAConsts.lincRNA }); mappedSmallRNA.Sort((m1, m2) => m2.Coverages.Average().CompareTo(m1.Coverages.Average())); Progress.SetMessage("Build target {0} mers...", options.MinimumSeedLength); var targetSeedMap = ParclipUtils.BuildTargetSeedMap(options, m => true, progress: this.Progress); Progress.SetMessage("Finding target..."); using (var sw = new StreamWriter(options.OutputFile)) { sw.WriteLine("SmallRNA\tChr\tStart\tEnd\tStrand\tSeed\tSeedOffset\tSeedLength\tSeedCoverage\tTarget\tTargetCoverage\tTargetGeneSymbol\tTargetName"); foreach (var t2c in mappedSmallRNA) { var seq = t2c.Sequence.ToUpper(); int[] offsets = GetPossibleOffsets(t2c.Name); foreach (var offset in offsets) { var seed = seq.Substring(offset, options.MinimumSeedLength); var coverage = t2c.Coverages.Skip(offset).Take(options.MinimumSeedLength).Average(); if (coverage < options.MinimumCoverage) { continue; } List <SeedItem> target; if (targetSeedMap.TryGetValue(seed, out target)) { target.Sort((m1, m2) => { return(m2.Coverage.CompareTo(m1.Coverage)); }); if (!t2c.Name.StartsWith(SmallRNAConsts.miRNA)) { target = ParclipUtils.FindLongestTarget(target, t2c, seq, offset, options.MinimumSeedLength, int.MaxValue, options.MinimumCoverage); } for (int j = 0; j < target.Count; j++) { var finalSeed = seq.Substring(offset, target[0].Sequence.Length); sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}", t2c.Name, t2c.Seqname, t2c.Start, t2c.End, t2c.Strand, finalSeed, offset, finalSeed.Length, Math.Round(coverage)); var t = target[j]; sw.WriteLine("\t{0}:{1}-{2}:{3}\t{4}\t{5}\t{6}", t.Seqname, t.Start, t.End, t.Strand, t.Coverage, t.GeneSymbol, t.Name); } } } } } return(new[] { options.OutputFile }); }
public override IEnumerable <string> Process() { var candidates = options.ReadSeeds(); Progress.SetMessage("Total {0} seeds readed.", candidates.Length); var offsets = GetPossibleOffsets(string.Empty); Progress.SetMessage("Build target {0} mers...", options.MinimumSeedLength); var seeds = (from seq in candidates from offset in offsets select seq.Substring(offset, options.MinimumSeedLength)).ToList(); var targetSeedMap = ParclipUtils.BuildTargetSeedMap(options, seeds, this.Progress); //var seeds = new HashSet<string>(from seq in candidates // from offset in offsets // select seq.Substring(offset, options.MinimumSeedLength)); //var targetSeedMap = ParclipUtils.BuildTargetSeedMap(options, m => seeds.Contains(m.Sequence), this.Progress); Progress.SetMessage("Finding target..."); using (var sw = new StreamWriter(options.OutputFile)) { sw.WriteLine("Sequence\tSeed\tSeedOffset\tSeedLength\tTarget\tTargetCoverage\tTargetGeneSymbol\tTargetName"); foreach (var seq in candidates) { foreach (var offset in offsets) { if (seq.Length < offset + options.MinimumSeedLength) { break; } var seed = seq.Substring(offset, options.MinimumSeedLength); List <SeedItem> target; if (targetSeedMap.TryGetValue(seed, out target)) { if (target.ConvertAll(l => l.Coverage).Distinct().Count() == 1) { GenomeUtils.SortChromosome(target, m => m.Seqname, m => m.Start); } else { target.Sort((m1, m2) => { return(m2.Coverage.CompareTo(m1.Coverage)); }); } var longest = ParclipUtils.ExtendToLongestTarget(target, null, seq, offset, options.MinimumSeedLength, int.MaxValue, options.MinimumCoverage); for (int j = 0; j < longest.Count; j++) { var t = longest[j]; var finalSeed = seq.Substring(offset, (int)t.Length); sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}:{5}-{6}:{7}\t{8}\t{9}\t{10}", seq, finalSeed, offset, finalSeed.Length, t.Seqname, t.Start, t.End, t.Strand, t.Coverage, t.GeneSymbol, t.Name); } } } } } return(new[] { options.OutputFile }); }
public override IEnumerable <string> Process() { Progress.SetMessage("Reading smallRNA..."); //exclude lincRNA var mappedSmallRNA = ParclipUtils.GetSmallRNACoverageRegion(options.InputFile, includeSmallRNATags: null, excudeSmallRNATags: SmallRNAConsts.lncRNA); mappedSmallRNA.Sort((m1, m2) => m2.Coverages.Average(l => l.Coverage).CompareTo(m1.Coverages.Average(l => l.Coverage))); Progress.SetMessage("Build target {0} mers...", options.MinimumSeedLength); var targetSeedMap = ParclipUtils.BuildTargetSeedMap(options, m => true, progress: this.Progress); Progress.SetMessage("Finding target..."); using (var sw = new StreamWriter(options.OutputFile)) { sw.WriteLine("SmallRNA\tChr\tStart\tEnd\tStrand\tSeed\tSeedOffset\tSeedLength\tSeedCoverage\tTarget\tTargetCoverage\tTargetGeneSymbol\tTargetName\tTargetSeedOffset\tTargetSeedUniqueRead"); foreach (var t2c in mappedSmallRNA) { var seq = t2c.Sequence.ToUpper(); int[] offsets = GetPossibleOffsets(t2c.Name); foreach (var offset in offsets) { if (offset + options.MinimumSeedLength >= seq.Length) { Console.Error.WriteLine("t2c={0}/{1}, seq={2}, offset={3}, minseed={4}", t2c.Name, t2c.GeneSymbol, seq, offset, options.MinimumSeedLength); continue; } var seed = seq.Substring(offset, options.MinimumSeedLength); var coverage = t2c.Coverages.Skip(offset).Take(options.MinimumSeedLength).Average(l => l.Coverage); if (coverage < options.MinimumCoverage) { continue; } List <SeedItem> target; if (targetSeedMap.TryGetValue(seed, out target)) { target.Sort((m1, m2) => { return(m2.Coverage.CompareTo(m1.Coverage)); }); var longest = ParclipUtils.FindLongestTarget(target, t2c, seq, offset, options.MinimumSeedLength, int.MaxValue, options.MinimumCoverage); for (int j = 0; j < longest.Count; j++) { var t = longest[j]; var finalSeed = seq.Substring(offset, (int)t.Length); sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}", t2c.Name, t2c.Seqname, t2c.Start, t2c.End, t2c.Strand, finalSeed, offset, finalSeed.Length, Math.Round(coverage)); var uniqueReadCount = (from c in t.Source.Coverages.Skip(t.SourceOffset).Take(finalSeed.Length) from q in c.UniqueRead select q).Distinct().Count(); sw.WriteLine("\t{0}:{1}-{2}:{3}\t{4}\t{5}\t{6}\t{7}\t{8}", t.Seqname, t.Start, t.End, t.Strand, t.Coverage, t.GeneSymbol, t.Name, t.SourceOffset, t.GetSeedUniqueReadCount(finalSeed.Length)); } } } } } return(new[] { options.OutputFile }); }