public override IEnumerable <string> Process() { var result = new List <string>(); Dictionary <string, BarInfo> barMap; barMap = new Dictionary <string, BarInfo>(); foreach (var tumor in _options.TumorTypes) { var curMap = TCGAUtils.GetBarcodeFileMap(_options.TCGADirectory, _options.GetTechnology(), tumor, _options.Platforms, _options.GetTCGASampleCodes().ToArray()); foreach (var v in curMap) { barMap[GetSampleKey(tumor, v.Key)] = v.Value; } } var headers = new List <string>(); var clindata = new Dictionary <string, IAnnotation>(); foreach (var tumor in _options.TumorTypes) { ReadClinData(clindata, tumor, headers); } Console.WriteLine("{0} patient clinical information readed", clindata.Count); List <string> noclinical = new List <string>(); var keyvalues = barMap.ToList(); foreach (var bm in keyvalues) { if (!clindata.ContainsKey(GetSampleKey(GetTumorType(bm.Key), bm.Value.Paticipant))) { noclinical.Add(bm.Key); Console.Error.WriteLine(string.Format("Cannot find clinical data for patient {0}", bm.Value.Paticipant)); if (_options.WithClinicalInformationOnly) { barMap.Remove(bm.Key); } } } Progress.SetMessage("Reading data ..."); Func <double, double> getValue; var valueMap = GetData(barMap, out getValue); var genes = GetCommonGenes(valueMap); var samples = valueMap.Keys.OrderBy(m => m).ToList(); Progress.SetMessage("Saving data ..."); result.Add(_options.OutputFile); result.Add(_options.DesignFile); if (_options.TumorTypes.Count > 1) { using (var sw = new StreamWriter(_options.OutputFile)) { sw.WriteLine("Gene\t{0}", samples.Merge("\t")); foreach (var gene in genes) { sw.Write(gene); foreach (var sample in samples) { sw.Write("\t{0}", getValue(valueMap[sample][gene])); } sw.WriteLine(); } } using (var sw = new StreamWriter(_options.DesignFile)) { sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription"); if (headers.Count > 0) { sw.WriteLine("\t{0}", headers.Merge("\t")); } else { sw.WriteLine(); } foreach (var entry in barMap) { var tumor = GetTumorType(entry.Key); var type = TCGASampleCode.Find(entry.Value.Sample); sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key, entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode, type.Definition); var key = GetSampleKey(tumor, entry.Value.Paticipant); var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation(); foreach (var header in headers) { if (vdata.Annotations.ContainsKey(header)) { sw.Write("\t{0}", vdata.Annotations[header]); } else { sw.Write("\t"); } } sw.WriteLine(); } } } else { using (var sw = new StreamWriter(_options.OutputFile)) { sw.WriteLine("Gene\t{0}", (from s in samples select s.StringAfter("_")).Merge("\t")); foreach (var gene in genes) { sw.Write(gene); foreach (var sample in samples) { sw.Write("\t{0}", getValue(valueMap[sample][gene])); } sw.WriteLine(); } } using (var sw = new StreamWriter(_options.DesignFile)) { sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription"); if (headers.Count > 0) { sw.WriteLine("\t{0}", headers.Merge("\t")); } else { sw.WriteLine(); } foreach (var entry in barMap) { var tumor = _options.TumorTypes.First(); var type = TCGASampleCode.Find(entry.Value.Sample); sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key.StringAfter("_"), entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode, type.Definition); var key = GetSampleKey(tumor, entry.Value.Paticipant); var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation(); foreach (var header in headers) { if (vdata.Annotations.ContainsKey(header)) { sw.Write("\t{0}", vdata.Annotations[header]); } else { sw.Write("\t"); } } sw.WriteLine(); } } var clinicalOptions = new TCGAClinicalInformationBuilderOptions() { ClinicalFile = TCGAUtils.GetClinicPatientFile(_options.TCGADirectory, _options.TumorTypes.First()), DataFile = _options.OutputFile, ThrowException = false, }; result.AddRange(new TCGAClinicalInformationBuilder(clinicalOptions) { Progress = this.Progress }.Process()); } Progress.End(); if (noclinical.Count == 0) { return(result.ToArray()); } else { return(new[] { string.Format("There are {0} samples without patient information:\n {1}\n\nResult have been saved to:\n {2}", noclinical.Count, noclinical.Merge("\n "), result.Merge("\n ")) }); } }
public override IEnumerable <string> Process() { if (!_options.PrepareOptions()) { throw new Exception(_options.ParsingErrors.Merge("\n")); } HashSet <int> sampleCodes = new HashSet <int>(_options.GetTCGASampleCodes().ToList().ConvertAll(m => m.Code)); Func <string, bool> acceptBarcode = m => sampleCodes.Contains(new BarInfo(m, null).Sample); var tec = _options.GetTechnology(); var items = new List <MutationItem>(); foreach (var tumor in _options.TumorTypes) { var dir = Path.Combine(_options.TCGADirectory, tumor); if (!Directory.Exists(dir)) { continue; } var tecdir = tec.GetTechnologyDirectory(dir); if (!Directory.Exists(tecdir)) { continue; } foreach (var platform in _options.Platforms) { var platdir = Path.Combine(tecdir, platform); var datadirs = Directory.GetDirectories(platdir, "*Level_2*"); foreach (var datadir in datadirs) { var maffiles = Directory.GetFiles(datadir, "*.somatic.maf"); if (maffiles.Length == 0) { continue; } foreach (var maffile in maffiles) { using (var sr = new StreamReader(maffile)) { string line; //skip comments while ((line = sr.ReadLine()) != null && line.StartsWith("#")) { } if (string.IsNullOrEmpty(line)) { continue; } //read header var headers = line.Split('\t'); var nameIndex = Array.IndexOf(headers, "Hugo_Symbol"); var ncbiIndex = Array.IndexOf(headers, "NCBI_Build"); var chromosomeIndex = Array.IndexOf(headers, "Chromosome"); var startIndex = Array.IndexOf(headers, "Start_position"); var endIndex = Array.IndexOf(headers, "End_position"); var strandIndex = Array.IndexOf(headers, "Strand"); var variantClassificationIndex = Array.IndexOf(headers, "Variant_Classification"); var variantTypeIndex = Array.IndexOf(headers, "Variant_Type"); var barcodeIndex = Array.IndexOf(headers, "Tumor_Sample_Barcode"); while ((line = sr.ReadLine()) != null) { var parts = line.Split('\t'); var item = new MutationItem() { Tumor = tumor, Platform = platform, Name = parts[nameIndex], NcbiBuild = parts[ncbiIndex], Chromosome = parts[chromosomeIndex], Start = parts[startIndex], End = parts[endIndex], Strand = parts[strandIndex], VariantClassification = parts[variantClassificationIndex], VariantType = parts[variantTypeIndex], TumorBarcode = parts[barcodeIndex] }; item.InitLocus(); item.InitPaticipant(); items.Add(item); } } } } } } using (var sw = new StreamWriter(_options.OutputFile)) { var paticipants = (from item in items select item.Paticipant).Distinct().OrderBy(m => m).ToList(); var itemMap = items.ToDoubleDictionaryGroup(m => m.Locus, m => m.Paticipant); var locusList = itemMap.Keys.ToList(); GenomeUtils.SortChromosome(locusList, m => m.StringBefore(":"), m => int.Parse(m.StringAfter(":").StringBefore("-"))); sw.WriteLine("Hugo_Symbol\tNCBI_Build\tChromosome\tStart_position\tEnd_position\tStrand\tVariant_Classification\tVariant_Type\t{0}", paticipants.Merge("\t")); foreach (var locus in locusList) { var dic = itemMap[locus]; var item = dic.Values.First().First(); sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", item.Name, item.NcbiBuild, item.Chromosome, item.Start, item.End, item.Start, item.VariantClassification, item.VariantType); foreach (var paticipant in paticipants) { if (dic.ContainsKey(paticipant)) { sw.Write("\t1"); } else { sw.Write("\t0"); } } sw.WriteLine(); } } var genefile = FileUtils.ChangeExtension(_options.OutputFile, ".gene.tsv"); using (var sw = new StreamWriter(genefile)) { var paticipants = (from item in items select item.Paticipant).Distinct().OrderBy(m => m).ToList(); var itemMap = items.ToDoubleDictionaryGroup(m => m.Name, m => m.Paticipant); var nameList = itemMap.Keys.OrderBy(m => m).ToList(); sw.WriteLine("Hugo_Symbol\t{0}", paticipants.Merge("\t")); foreach (var name in nameList) { var dic = itemMap[name]; var item = dic.Values.First().First(); sw.Write("{0}", item.Name); foreach (var paticipant in paticipants) { if (dic.ContainsKey(paticipant)) { sw.Write("\t1"); } else { sw.Write("\t0"); } } sw.WriteLine(); } } return(new[] { _options.OutputFile, genefile }); }