public TCGADatatableBuilderUI() { InitializeComponent(); rootDir.SetDirectoryArgument("TCGARoot", "TCGA Data Root"); targetFile.FileArgument = new SaveFileArgument("Target Data", "tsv"); lbDataTypes.Items.AddRange(TCGATechnology.GetTechnologyNames().ToArray()); lbSampleTypes.Items.AddRange(TCGASampleCode.GetSampleCodes().OrderBy(m => m.Code).ToList().ConvertAll(m => string.Format("{0}, {1}", m.ShortLetterCode, m.Definition)).ToArray()); this.Text = Constants.GetSQHTitle(title, version); }
public IList <TCGASampleCode> GetTCGASampleCodes() { List <TCGASampleCode> result = new List <TCGASampleCode>(); foreach (var s in TCGASampleCodeStrings) { var code = TCGASampleCode.Find(s); if (code == null) { throw new ArgumentException("Cannot find sample code for {0}", s); } result.Add(code); } return(result); }
public override bool PrepareOptions() { if (!Directory.Exists(this.TCGADirectory)) { ParsingErrors.Add(string.Format("Directory not exists {0}.", this.TCGADirectory)); return(false); } try { TCGATechnology.Parse(this.DataType); } catch (Exception ex) { ParsingErrors.Add(ex.Message); return(false); } foreach (var tumor in this.TumorTypes) { var tumordir = this.TCGADirectory + "/" + tumor; if (!Directory.Exists(tumordir)) { ParsingErrors.Add(string.Format("Directory not exists {0}.", tumordir)); return(false); } } if (TCGASampleCodeStrings == null || TCGASampleCodeStrings.Count == 0) { TCGASampleCodeStrings = (from v in TCGASampleCode.GetSampleCodes() select v.ShortLetterCode).ToList(); } else { try { GetTCGASampleCodes(); } catch (Exception ex) { ParsingErrors.Add(ex.Message); return(false); } } try { GetTechnology(); } catch (Exception ex) { ParsingErrors.Add(ex.Message); return(false); } if (this.Platforms == null || this.Platforms.Count == 0) { var tec = GetTechnology(); this.Platforms = (from tumor in TumorTypes let dir = Path.Combine(this.TCGADirectory, tumor) let tecdir = tec.GetTechnologyDirectory(dir) from subdir in Directory.GetDirectories(tecdir) select Path.GetFileName(subdir)).Distinct().OrderBy(m => m).ToList(); } return(true); }
public override IEnumerable <string> Process() { var result = new List <string>(); Dictionary <string, BarInfo> barMap; barMap = new Dictionary <string, BarInfo>(); foreach (var tumor in _options.TumorTypes) { var curMap = TCGAUtils.GetBarcodeFileMap(_options.TCGADirectory, _options.GetTechnology(), tumor, _options.Platforms, _options.GetTCGASampleCodes().ToArray()); foreach (var v in curMap) { barMap[GetSampleKey(tumor, v.Key)] = v.Value; } } var headers = new List <string>(); var clindata = new Dictionary <string, IAnnotation>(); foreach (var tumor in _options.TumorTypes) { ReadClinData(clindata, tumor, headers); } Console.WriteLine("{0} patient clinical information readed", clindata.Count); List <string> noclinical = new List <string>(); var keyvalues = barMap.ToList(); foreach (var bm in keyvalues) { if (!clindata.ContainsKey(GetSampleKey(GetTumorType(bm.Key), bm.Value.Paticipant))) { noclinical.Add(bm.Key); Console.Error.WriteLine(string.Format("Cannot find clinical data for patient {0}", bm.Value.Paticipant)); if (_options.WithClinicalInformationOnly) { barMap.Remove(bm.Key); } } } Progress.SetMessage("Reading data ..."); Func <double, double> getValue; var valueMap = GetData(barMap, out getValue); var genes = GetCommonGenes(valueMap); var samples = valueMap.Keys.OrderBy(m => m).ToList(); Progress.SetMessage("Saving data ..."); result.Add(_options.OutputFile); result.Add(_options.DesignFile); if (_options.TumorTypes.Count > 1) { using (var sw = new StreamWriter(_options.OutputFile)) { sw.WriteLine("Gene\t{0}", samples.Merge("\t")); foreach (var gene in genes) { sw.Write(gene); foreach (var sample in samples) { sw.Write("\t{0}", getValue(valueMap[sample][gene])); } sw.WriteLine(); } } using (var sw = new StreamWriter(_options.DesignFile)) { sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription"); if (headers.Count > 0) { sw.WriteLine("\t{0}", headers.Merge("\t")); } else { sw.WriteLine(); } foreach (var entry in barMap) { var tumor = GetTumorType(entry.Key); var type = TCGASampleCode.Find(entry.Value.Sample); sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key, entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode, type.Definition); var key = GetSampleKey(tumor, entry.Value.Paticipant); var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation(); foreach (var header in headers) { if (vdata.Annotations.ContainsKey(header)) { sw.Write("\t{0}", vdata.Annotations[header]); } else { sw.Write("\t"); } } sw.WriteLine(); } } } else { using (var sw = new StreamWriter(_options.OutputFile)) { sw.WriteLine("Gene\t{0}", (from s in samples select s.StringAfter("_")).Merge("\t")); foreach (var gene in genes) { sw.Write(gene); foreach (var sample in samples) { sw.Write("\t{0}", getValue(valueMap[sample][gene])); } sw.WriteLine(); } } using (var sw = new StreamWriter(_options.DesignFile)) { sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription"); if (headers.Count > 0) { sw.WriteLine("\t{0}", headers.Merge("\t")); } else { sw.WriteLine(); } foreach (var entry in barMap) { var tumor = _options.TumorTypes.First(); var type = TCGASampleCode.Find(entry.Value.Sample); sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key.StringAfter("_"), entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode, type.Definition); var key = GetSampleKey(tumor, entry.Value.Paticipant); var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation(); foreach (var header in headers) { if (vdata.Annotations.ContainsKey(header)) { sw.Write("\t{0}", vdata.Annotations[header]); } else { sw.Write("\t"); } } sw.WriteLine(); } } var clinicalOptions = new TCGAClinicalInformationBuilderOptions() { ClinicalFile = TCGAUtils.GetClinicPatientFile(_options.TCGADirectory, _options.TumorTypes.First()), DataFile = _options.OutputFile, ThrowException = false, }; result.AddRange(new TCGAClinicalInformationBuilder(clinicalOptions) { Progress = this.Progress }.Process()); } Progress.End(); if (noclinical.Count == 0) { return(result.ToArray()); } else { return(new[] { string.Format("There are {0} samples without patient information:\n {1}\n\nResult have been saved to:\n {2}", noclinical.Count, noclinical.Merge("\n "), result.Merge("\n ")) }); } }