protected virtual void ReadCountFile(string countFile) { Dictionary<string, string> counts = new MapReader(0, 1).ReadFromFile(countFile); foreach (var c in counts) { Counts[c.Key] = int.Parse(c.Value); } }
public void TestRead() { var map = new MapReader("Derived Array Data File", "Comment [Aliquot Barcode]").ReadFromFile("../../../data/broad.mit.edu_BRCA.Genome_Wide_SNP_6.sdrf.txt"); Assert.AreEqual(6789, map.Count); Assert.AreEqual("TCGA-A8-A06R-01A-11D-A011-01", map["CUSKS_p_TCGAb47_SNP_1N_GenomeWideSNP_6_A02_628278.hg18.seg.txt"]); Assert.AreEqual("TCGA-A8-A06R-01A-11D-A011-01", map["CUSKS_p_TCGAb47_SNP_1N_GenomeWideSNP_6_A02_628278.hg19.seg.txt"]); Assert.AreEqual("TCGA-A8-A06R-01A-11D-A011-01", map["CUSKS_p_TCGAb47_SNP_1N_GenomeWideSNP_6_A02_628278.nocnv_hg18.seg.txt"]); Assert.AreEqual("TCGA-A8-A06R-01A-11D-A011-01", map["CUSKS_p_TCGAb47_SNP_1N_GenomeWideSNP_6_A02_628278.nocnv_hg19.seg.txt"]); }
public FindParticipantBySdrfFile(string sdrfFile, string fileColumn, string barCodeColumn) { this.sdrfFile = sdrfFile; var anns = new MapReader(fileColumn, barCodeColumn).ReadFromFile(sdrfFile); maps = new Dictionary<string, string>(); foreach (var ann in anns) { if (ann.Key.Equals("->")) { continue; } maps[ann.Key.ToLower()] = TCGAUtils.GetSampleBarCode(ann.Value); } }
public void ParseDataset(string datasetDirectory, Dictionary<string, BreastCancerSampleItem> sampleMap) { //var files = GeoUtils.GetGsmNameFileMap(datasetDirectory); var dirname = Path.GetFileName(datasetDirectory); var map = new MapReader("geo_accn", "er").ReadFromFile(datasetDirectory + @"\GSE2990_suppl_info.txt"); foreach (var m in map) { var f = datasetDirectory + "\\" + m.Key + ".cel"; if (File.Exists(f)) { if (!sampleMap.ContainsKey(m.Key)) { sampleMap[m.Key] = new BreastCancerSampleItem(dirname, m.Key); } var item = sampleMap[m.Key]; item.ER = m.Value; } } }
/// <summary> /// After chip type check, remove file from specified chip types /// </summary> public void FilterChipType(string chipTypeTableFileName, string[] removeChipTypes) { chipTypeTableFileName = CheckFileName(chipTypeTableFileName); var dic = new MapReader(0, 1).ReadFromFile(chipTypeTableFileName); foreach (var cel in dic) { var chiptype = cel.Value; if (removeChipTypes.Contains(chiptype)) { var celfile = cel.Key; if (File.Exists(celfile)) { Console.WriteLine("Removing {0} : {1}", chiptype, celfile); ExcludeFile(celfile); } } } }
public override IEnumerable<string> Process() { Dictionary<string, List<GtfItem>> map = new Dictionary<string, List<GtfItem>>(); var namemap = new Dictionary<string, string>(); if (File.Exists(options.MapFile)) { namemap = new MapReader(0, 1, hasHeader: false).ReadFromFile(options.MapFile); } using (var gtf = new GtfItemFile(options.InputFile)) { GtfItem item; int count = 0; while ((item = gtf.Next()) != null) { count++; if ((count % 100000) == 0) { Progress.SetMessage("{0} gtf item processed", count); } List<GtfItem> oldItems; if (!map.TryGetValue(item.GeneId, out oldItems)) { map[item.GeneId] = new[] { item }.ToList(); } else { if (IsExon(item)) { oldItems.RemoveAll(m => !IsExon(m)); oldItems.Add(item); } else { if (oldItems.All(m => !IsExon(m))) { oldItems.Add(item); } } } } } // map[item.GeneId] = item.Attributes.StringAfter("gene_name \"").StringBefore("\""); var keys = (from key in map.Keys orderby key select key).ToList(); using (StreamWriter sw = new StreamWriter(options.OutputFile)) { bool bHasGeneName = map.Values.Any(l => l.Any(m => m.Attributes.Contains("gene_name"))); if (!bHasGeneName && !File.Exists(options.MapFile)) { throw new Exception(string.Format("No gene_name found in {0} and no id/name map file defined.", options.InputFile)); } bool bHasGeneBiotype = map.Values.Any(l => l.Any(m => m.Attributes.Contains("gene_biotype"))); if (bHasGeneBiotype) { sw.WriteLine("gene_id\tgene_name\tlength\tgene_biotype"); } else { sw.WriteLine("gene_id\tgene_name\tlength"); } foreach (var key in keys) { var gtfs = map[key]; string name; var gtf = gtfs.FirstOrDefault(m => m.Attributes.Contains("gene_name")); gtfs.CombineCoordinates(); string biotype; if (gtf == null) { biotype = string.Empty; if (!namemap.TryGetValue(key, out name)) { name = key; } } else { biotype = gtf.GetBiotype(); name = gtf.Attributes.StringAfter("gene_name \"").StringBefore("\""); } if (bHasGeneBiotype) { sw.WriteLine("{0}\t{1}\t{2}\t{3}", key, name, gtfs.Sum(m => m.Length), biotype); } else { sw.WriteLine("{0}\t{1}\t{2}", key, name, gtfs.Sum(m => m.Length)); } } } return new string[] { options.OutputFile }; }
public static List<CoverageRegion> GetTargetCoverageRegion(ITargetBuilderOptions options, IProgressCallback progress, bool removeRegionWithoutSequence = true) { List<CoverageRegion> result; if (options.TargetFile.EndsWith(".xml")) { result = GetTargetCoverageRegionFromXml(options, progress); } else { result = GetTargetCoverageRegionFromBed(options, progress); } var dic = result.ToGroupDictionary(m => m.Seqname); progress.SetMessage("Filling sequence from {0}...", options.GenomeFastaFile); using (var sr = new StreamReader(options.GenomeFastaFile)) { var ff = new FastaFormat(); Sequence seq; while ((seq = ff.ReadSequence(sr)) != null) { progress.SetMessage("Processing chromosome {0} ...", seq.Reference); var seqname = seq.Name.StringAfter("chr"); List<CoverageRegion> lst; if (dic.TryGetValue(seqname, out lst)) { foreach (var l in lst) { l.Sequence = seq.SeqString.Substring((int)(l.Start - 1), (int)l.Length); if(l.Strand == '+') { l.ReverseComplementedSequence = SequenceUtils.GetReverseComplementedSequence(l.Sequence); } } } } } if (removeRegionWithoutSequence) { result.RemoveAll(l => string.IsNullOrEmpty(l.Sequence)); } progress.SetMessage("Filling sequence finished."); var namemap = new MapReader(1, 12).ReadFromFile(options.RefgeneFile); result.ForEach(m => { var gene = m.Name.StringBefore("_utr3"); m.GeneSymbol = namemap.ContainsKey(gene) ? namemap[gene] : string.Empty; }); return result; }
public override IEnumerable<string> Process() { this.Progress.SetMessage("Reading group sample map file ..."); var groupSampleMap = new MapReader(0, 1).ReadFromFile(options.MapFile); Dictionary<string, SignificantItem> geneNameMap = new Dictionary<string, SignificantItem>(); this.Progress.SetMessage("Reading cuffdiff significant files ..."); var sigs = (from file in options.SignificantFiles from line in File.ReadAllLines(file).Skip(1) let parts = line.Split('\t') where parts.Length >= 3 let gene = parts[1] let name = parts[2] where !name.Equals("-") let location = parts[3] select new SignificantItem() { Gene = gene, GeneName = name, GeneLocation = location }).ToList(); foreach (var gene in sigs) { if (!geneNameMap.ContainsKey(gene.Gene)) { geneNameMap[gene.Gene] = gene; } } Func<string, bool> acceptGene = m => geneNameMap.ContainsKey(m); var countFile = options.OutputFilePrefix + ".count"; var fpkmFile = options.OutputFilePrefix + ".fpkm"; var items = new List<TrackingItem>(); foreach (var trackingFile in options.InputFiles) { this.Progress.SetMessage("Reading cuffdiff read_group_tracking file " + trackingFile + "..."); using (StreamReader sr = new StreamReader(trackingFile)) { string line = sr.ReadLine(); while ((line = sr.ReadLine()) != null) { var parts = line.Split('\t'); if (parts.Length <= 7) { continue; } var gene = parts[0]; if (!acceptGene(gene)) { continue; } var group_index = parts[1] + "_" + parts[2]; var sample = groupSampleMap.ContainsKey(group_index) ? groupSampleMap[group_index] : group_index; var count = parts[3]; var fpkm = parts[6]; var item = new TrackingItem() { Gene = gene, Sample = sample, Count = count, FPKM = fpkm }; items.Add(item); } } } this.Progress.SetMessage("Preparing result ..."); var samples = new HashSet<string>(from item in items select item.Sample).OrderBy(m => m).ToList(); this.Progress.SetMessage(string.Format("There are {0} samples", samples.Count)); var genes = new HashSet<string>(from item in items select item.Gene).OrderBy(m => m).ToList(); this.Progress.SetMessage(string.Format("There are {0} genes", genes.Count)); var map = ToDoubleDirectory(items); this.Progress.SetMessage("Removing empty genes ..."); foreach (var gene in genes) { if (map[gene].All(m => m.Value.Count == "0")) { map.Remove(gene); } } var finalGenes = map.Keys.OrderBy(m => m).ToList(); this.Progress.SetMessage("Outputing result ..."); OutputFile(samples, finalGenes, map, geneNameMap, m => Math.Round(double.Parse(m.Count)).ToString(), countFile); OutputFile(samples, finalGenes, map, geneNameMap, m => m.FPKM, fpkmFile); this.Progress.End(); return new[] { countFile, fpkmFile }; }