public static List <CoverageRegion> GetTargetCoverageRegion(ITargetBuilderOptions options, IProgressCallback progress, bool removeRegionWithoutSequence = true) { List <CoverageRegion> result; if (options.TargetFile.EndsWith(".xml")) { result = GetTargetCoverageRegionFromXml(options, progress); } else { result = GetTargetCoverageRegionFromBed(options, progress); } var dic = result.ToGroupDictionary(m => m.Seqname); progress.SetMessage("Filling sequence from {0}...", options.GenomeFastaFile); using (var sr = new StreamReader(options.GenomeFastaFile)) { var ff = new FastaFormat(); Sequence seq; while ((seq = ff.ReadSequence(sr)) != null) { progress.SetMessage("Processing chromosome {0} ...", seq.Reference); var seqname = seq.Name.StringAfter("chr"); List <CoverageRegion> lst; if (dic.TryGetValue(seqname, out lst)) { foreach (var l in lst) { l.Sequence = seq.SeqString.Substring((int)(l.Start - 1), (int)l.Length); if (l.Strand == '+') { l.ReverseComplementedSequence = SequenceUtils.GetReverseComplementedSequence(l.Sequence); } } } } } if (removeRegionWithoutSequence) { result.RemoveAll(l => string.IsNullOrEmpty(l.Sequence)); } progress.SetMessage("Filling sequence finished."); var namemap = new MapReader(1, 12).ReadFromFile(options.RefgeneFile); result.ForEach(m => { var gene = m.Name.StringBefore("_utr3"); m.GeneSymbol = namemap.ContainsKey(gene) ? namemap[gene] : string.Empty; }); return(result); }
public override IEnumerable <string> Process() { this.Progress.SetMessage("Reading group sample map file ..."); var groupSampleMap = new MapReader(0, 1).ReadFromFile(options.MapFile); Dictionary <string, SignificantItem> geneNameMap = new Dictionary <string, SignificantItem>(); this.Progress.SetMessage("Reading cuffdiff significant files ..."); var sigs = (from file in options.SignificantFiles from line in File.ReadAllLines(file).Skip(1) let parts = line.Split('\t') where parts.Length >= 3 let gene = parts[1] let name = parts[2] where !name.Equals("-") let location = parts[3] select new SignificantItem() { Gene = gene, GeneName = name, GeneLocation = location }).ToList(); foreach (var gene in sigs) { if (!geneNameMap.ContainsKey(gene.Gene)) { geneNameMap[gene.Gene] = gene; } } Func <string, bool> acceptGene = m => geneNameMap.ContainsKey(m); var countFile = options.OutputFilePrefix + ".count"; var fpkmFile = options.OutputFilePrefix + ".fpkm"; var items = new List <TrackingItem>(); foreach (var trackingFile in options.InputFiles) { this.Progress.SetMessage("Reading cuffdiff read_group_tracking file " + trackingFile + "..."); using (StreamReader sr = new StreamReader(trackingFile)) { string line = sr.ReadLine(); while ((line = sr.ReadLine()) != null) { var parts = line.Split('\t'); if (parts.Length <= 7) { continue; } var gene = parts[0]; if (!acceptGene(gene)) { continue; } var group_index = parts[1] + "_" + parts[2]; var sample = groupSampleMap.ContainsKey(group_index) ? groupSampleMap[group_index] : group_index; var count = parts[3]; var fpkm = parts[6]; var item = new TrackingItem() { Gene = gene, Sample = sample, Count = count, FPKM = fpkm }; items.Add(item); } } } this.Progress.SetMessage("Preparing result ..."); var samples = new HashSet <string>(from item in items select item.Sample).OrderBy(m => m).ToList(); this.Progress.SetMessage(string.Format("There are {0} samples", samples.Count)); var genes = new HashSet <string>(from item in items select item.Gene).OrderBy(m => m).ToList(); this.Progress.SetMessage(string.Format("There are {0} genes", genes.Count)); var map = ToDoubleDirectory(items); this.Progress.SetMessage("Removing empty genes ..."); foreach (var gene in genes) { if (map[gene].All(m => m.Value.Count == "0")) { map.Remove(gene); } } var finalGenes = map.Keys.OrderBy(m => m).ToList(); this.Progress.SetMessage("Outputing result ..."); OutputFile(samples, finalGenes, map, geneNameMap, m => Math.Round(double.Parse(m.Count)).ToString(), countFile); OutputFile(samples, finalGenes, map, geneNameMap, m => m.FPKM, fpkmFile); this.Progress.End(); return(new[] { countFile, fpkmFile }); }