public override IEnumerable <string> Process() { var result = new List <string>(); Dictionary <string, BarInfo> barMap; barMap = new Dictionary <string, BarInfo>(); foreach (var tumor in _options.TumorTypes) { var curMap = TCGAUtils.GetBarcodeFileMap(_options.TCGADirectory, _options.GetTechnology(), tumor, _options.Platforms, _options.GetTCGASampleCodes().ToArray()); foreach (var v in curMap) { barMap[GetSampleKey(tumor, v.Key)] = v.Value; } } var headers = new List <string>(); var clindata = new Dictionary <string, IAnnotation>(); foreach (var tumor in _options.TumorTypes) { ReadClinData(clindata, tumor, headers); } Console.WriteLine("{0} patient clinical information readed", clindata.Count); List <string> noclinical = new List <string>(); var keyvalues = barMap.ToList(); foreach (var bm in keyvalues) { if (!clindata.ContainsKey(GetSampleKey(GetTumorType(bm.Key), bm.Value.Paticipant))) { noclinical.Add(bm.Key); Console.Error.WriteLine(string.Format("Cannot find clinical data for patient {0}", bm.Value.Paticipant)); if (_options.WithClinicalInformationOnly) { barMap.Remove(bm.Key); } } } Progress.SetMessage("Reading data ..."); Func <double, double> getValue; var valueMap = GetData(barMap, out getValue); var genes = GetCommonGenes(valueMap); var samples = valueMap.Keys.OrderBy(m => m).ToList(); Progress.SetMessage("Saving data ..."); result.Add(_options.OutputFile); result.Add(_options.DesignFile); if (_options.TumorTypes.Count > 1) { using (var sw = new StreamWriter(_options.OutputFile)) { sw.WriteLine("Gene\t{0}", samples.Merge("\t")); foreach (var gene in genes) { sw.Write(gene); foreach (var sample in samples) { sw.Write("\t{0}", getValue(valueMap[sample][gene])); } sw.WriteLine(); } } using (var sw = new StreamWriter(_options.DesignFile)) { sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription"); if (headers.Count > 0) { sw.WriteLine("\t{0}", headers.Merge("\t")); } else { sw.WriteLine(); } foreach (var entry in barMap) { var tumor = GetTumorType(entry.Key); var type = TCGASampleCode.Find(entry.Value.Sample); sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key, entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode, type.Definition); var key = GetSampleKey(tumor, entry.Value.Paticipant); var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation(); foreach (var header in headers) { if (vdata.Annotations.ContainsKey(header)) { sw.Write("\t{0}", vdata.Annotations[header]); } else { sw.Write("\t"); } } sw.WriteLine(); } } } else { using (var sw = new StreamWriter(_options.OutputFile)) { sw.WriteLine("Gene\t{0}", (from s in samples select s.StringAfter("_")).Merge("\t")); foreach (var gene in genes) { sw.Write(gene); foreach (var sample in samples) { sw.Write("\t{0}", getValue(valueMap[sample][gene])); } sw.WriteLine(); } } using (var sw = new StreamWriter(_options.DesignFile)) { sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription"); if (headers.Count > 0) { sw.WriteLine("\t{0}", headers.Merge("\t")); } else { sw.WriteLine(); } foreach (var entry in barMap) { var tumor = _options.TumorTypes.First(); var type = TCGASampleCode.Find(entry.Value.Sample); sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key.StringAfter("_"), entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode, type.Definition); var key = GetSampleKey(tumor, entry.Value.Paticipant); var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation(); foreach (var header in headers) { if (vdata.Annotations.ContainsKey(header)) { sw.Write("\t{0}", vdata.Annotations[header]); } else { sw.Write("\t"); } } sw.WriteLine(); } } var clinicalOptions = new TCGAClinicalInformationBuilderOptions() { ClinicalFile = TCGAUtils.GetClinicPatientFile(_options.TCGADirectory, _options.TumorTypes.First()), DataFile = _options.OutputFile, ThrowException = false, }; result.AddRange(new TCGAClinicalInformationBuilder(clinicalOptions) { Progress = this.Progress }.Process()); } Progress.End(); if (noclinical.Count == 0) { return(result.ToArray()); } else { return(new[] { string.Format("There are {0} samples without patient information:\n {1}\n\nResult have been saved to:\n {2}", noclinical.Count, noclinical.Merge("\n "), result.Merge("\n ")) }); } }
public TCGAClinicalInformationBuilder(TCGAClinicalInformationBuilderOptions options) { this._options = options; this._options.PrepareOptions(); }
public override IEnumerable<string> Process() { var result = new List<string>(); Dictionary<string, BarInfo> barMap; barMap = new Dictionary<string, BarInfo>(); foreach (var tumor in _options.TumorTypes) { var curMap = TCGAUtils.GetBarcodeFileMap(_options.TCGADirectory, _options.GetTechnology(), tumor, _options.Platforms, _options.GetTCGASampleCodes().ToArray()); foreach (var v in curMap) { barMap[GetSampleKey(tumor, v.Key)] = v.Value; } } var headers = new List<string>(); var clindata = new Dictionary<string, IAnnotation>(); foreach (var tumor in _options.TumorTypes) { ReadClinData(clindata, tumor, headers); } Console.WriteLine("{0} patient clinical information readed", clindata.Count); List<string> noclinical = new List<string>(); var keyvalues = barMap.ToList(); foreach (var bm in keyvalues) { if (!clindata.ContainsKey(GetSampleKey(GetTumorType(bm.Key), bm.Value.Paticipant))) { noclinical.Add(bm.Key); Console.Error.WriteLine(string.Format("Cannot find clinical data for patient {0}", bm.Value.Paticipant)); if (_options.WithClinicalInformationOnly) { barMap.Remove(bm.Key); } } } Progress.SetMessage("Reading data ..."); Func<double, double> getValue; var valueMap = GetData(barMap, out getValue); var genes = GetCommonGenes(valueMap); var samples = valueMap.Keys.OrderBy(m => m).ToList(); Progress.SetMessage("Saving data ..."); result.Add(_options.OutputFile); result.Add(_options.DesignFile); if (_options.TumorTypes.Count > 1) { using (var sw = new StreamWriter(_options.OutputFile)) { sw.WriteLine("Gene\t{0}", samples.Merge("\t")); foreach (var gene in genes) { sw.Write(gene); foreach (var sample in samples) { sw.Write("\t{0}", getValue(valueMap[sample][gene])); } sw.WriteLine(); } } using (var sw = new StreamWriter(_options.DesignFile)) { sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription"); if (headers.Count > 0) { sw.WriteLine("\t{0}", headers.Merge("\t")); } else { sw.WriteLine(); } foreach (var entry in barMap) { var tumor = GetTumorType(entry.Key); var type = TCGASampleCode.Find(entry.Value.Sample); sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key, entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode, type.Definition); var key = GetSampleKey(tumor, entry.Value.Paticipant); var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation(); foreach (var header in headers) { if (vdata.Annotations.ContainsKey(header)) { sw.Write("\t{0}", vdata.Annotations[header]); } else { sw.Write("\t"); } } sw.WriteLine(); } } } else { using (var sw = new StreamWriter(_options.OutputFile)) { sw.WriteLine("Gene\t{0}", (from s in samples select s.StringAfter("_")).Merge("\t")); foreach (var gene in genes) { sw.Write(gene); foreach (var sample in samples) { sw.Write("\t{0}", getValue(valueMap[sample][gene])); } sw.WriteLine(); } } using (var sw = new StreamWriter(_options.DesignFile)) { sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription"); if (headers.Count > 0) { sw.WriteLine("\t{0}", headers.Merge("\t")); } else { sw.WriteLine(); } foreach (var entry in barMap) { var tumor = _options.TumorTypes.First(); var type = TCGASampleCode.Find(entry.Value.Sample); sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key.StringAfter("_"), entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode, type.Definition); var key = GetSampleKey(tumor, entry.Value.Paticipant); var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation(); foreach (var header in headers) { if (vdata.Annotations.ContainsKey(header)) { sw.Write("\t{0}", vdata.Annotations[header]); } else { sw.Write("\t"); } } sw.WriteLine(); } } var clinicalOptions = new TCGAClinicalInformationBuilderOptions() { ClinicalFile = TCGAUtils.GetClinicPatientFile(_options.TCGADirectory, _options.TumorTypes.First()), DataFile = _options.OutputFile, ThrowException = false, }; result.AddRange(new TCGAClinicalInformationBuilder(clinicalOptions) { Progress = this.Progress }.Process()); } Progress.End(); if (noclinical.Count == 0) { return result.ToArray(); } else { return new[] { string.Format("There are {0} samples without patient information:\n {1}\n\nResult have been saved to:\n {2}", noclinical.Count, noclinical.Merge("\n "), result.Merge("\n ")) }; } }