public static Dictionary <TCGATechnologyType, Dictionary <TCGASampleType, List <BarInfo> > > GetTumorSampleMap(string tumordir) { var result = new Dictionary <TCGATechnologyType, Dictionary <TCGASampleType, List <BarInfo> > >(); List <DatasetInfo> microarray = TCGAUtils.GetMicroarrayDatasets(tumordir); var affy = microarray.Find(m => m.Name.Equals("U133A")); var agilent = microarray.Find(m => m.Name.Equals("Agil3")); if (agilent == null) { agilent = microarray.Find(m => m.Name.Equals("Agil2")); } var rnaseqv1 = TCGAUtils.GetRnaSeqV1DataSet(tumordir); var rnaseqv2 = TCGAUtils.GetRnaSeqV2DataSet(tumordir); DatasetInfo[] dis = new[] { affy, agilent, rnaseqv1, rnaseqv2 }; var technologies = EnumUtils.EnumToArray <TCGATechnologyType>(); for (int i = 0; i < dis.Length; i++) { AddDataset(result, technologies[i], dis[i]); } return(result); }
/// <summary> /// Read the clinical patient file and store the data into directory. The key is tumor type plus bar code. /// Also read the /// </summary> /// <param name="clinicalData"></param> /// <param name="tumorType"></param> /// <param name="queryHeaders"></param> private void ReadClinData(IDictionary <string, IAnnotation> clinicalData, string tumorType, ICollection <string> queryHeaders) { var clinfile = TCGAUtils.GetClinicPatientFile(_options.TCGADirectory, tumorType); if (!File.Exists(clinfile)) { return; } var data = new TCGAClinicalInformationFormat().ReadFromFile(clinfile); foreach (var entry in data) { clinicalData[GetSampleKey(tumorType, entry.BarCode())] = entry; } var configheader = TemplateDirectory + "/" + Path.GetFileNameWithoutExtension(clinfile) + ".header.xml"; if (!File.Exists(configheader)) { configheader = TemplateDirectory + "/clinical_patient_tcga.header.xml"; } var fd = HeaderDefinition.LoadFromFile(configheader); foreach (var line in fd.Properties) { if (!queryHeaders.Contains(line)) { queryHeaders.Add(line); } } }
public void RemoveAll(TCGASampleType stype) { var keys = BarInfoListMap.Keys.ToList(); foreach (var key in keys) { if (TCGAUtils.GetSampleType(key) == stype) { BarInfoListMap.Remove(key); } } }
private void FillTumor() { var map = TCGAUtils.GetTumorDescriptionMap(); object[] curitems; if (lbDataTypes.SelectedItem != null && Directory.Exists(rootDir.FullName)) { var tecname = lbDataTypes.SelectedItem as string; var selected = TCGATechnology.Parse(tecname); curitems = (from tumor in this.tumors let dir = rootDir.FullName + "/" + tumor where Directory.Exists(selected.GetTechnologyDirectory(dir)) select tumor).ToArray(); } else { curitems = this.tumors.ToArray(); } lbTumors.BeginUpdate(); try { var selected = new HashSet <string>(GetSelectedTumors().ConvertAll(m => m as string)); lbTumors.Items.Clear(); foreach (string item in curitems) { var name = map.ContainsKey(item.ToUpper()) ? item + ", " + map[item.ToUpper()] : item; lbTumors.Items.Add(name); } if (selected.Count > 0) { for (int i = 0; i < lbTumors.Items.Count; i++) { var name = curitems[i] as string; if (selected.Contains(name)) { lbTumors.SetSelected(i, true); } } } } finally { lbTumors.EndUpdate(); } }
private static void AddDataset(Dictionary <TCGATechnologyType, Dictionary <TCGASampleType, List <BarInfo> > > tumormap, TCGATechnologyType technolyType, DatasetInfo datasetInfo) { var map = new Dictionary <TCGASampleType, List <BarInfo> >(); tumormap[technolyType] = map; foreach (var type in EnumUtils.EnumToArray <TCGASampleType>()) { map[type] = new List <BarInfo>(); } if (datasetInfo == null) { return; } foreach (var key in datasetInfo.BarInfoListMap) { var type = TCGAUtils.GetSampleType(key.Key); map[type].Add(key.Value.First()); } }
public static List <DatasetInfo> GetMicroarrayDatasets(string tumordir) { List <DatasetInfo> result = new List <DatasetInfo>(); var dir = tumordir + @"\data\transcriptome"; if (!Directory.Exists(dir)) { return(result); } var mdirs = Directory.GetDirectories(dir); foreach (var mdir in mdirs) { result.Add(new DatasetInfo() { Name = TCGAUtils.GetMicroarrayName(mdir), BarInfoListMap = TCGAUtils.GetMicroarrayFiles(mdir, false), Reader = new Level3MicroarrayDataTxtReader() }); } return(result); }
public override IEnumerable <string> Process() { var result = new List <string>(); Dictionary <string, BarInfo> barMap; barMap = new Dictionary <string, BarInfo>(); foreach (var tumor in _options.TumorTypes) { var curMap = TCGAUtils.GetBarcodeFileMap(_options.TCGADirectory, _options.GetTechnology(), tumor, _options.Platforms, _options.GetTCGASampleCodes().ToArray()); foreach (var v in curMap) { barMap[GetSampleKey(tumor, v.Key)] = v.Value; } } var headers = new List <string>(); var clindata = new Dictionary <string, IAnnotation>(); foreach (var tumor in _options.TumorTypes) { ReadClinData(clindata, tumor, headers); } Console.WriteLine("{0} patient clinical information readed", clindata.Count); List <string> noclinical = new List <string>(); var keyvalues = barMap.ToList(); foreach (var bm in keyvalues) { if (!clindata.ContainsKey(GetSampleKey(GetTumorType(bm.Key), bm.Value.Paticipant))) { noclinical.Add(bm.Key); Console.Error.WriteLine(string.Format("Cannot find clinical data for patient {0}", bm.Value.Paticipant)); if (_options.WithClinicalInformationOnly) { barMap.Remove(bm.Key); } } } Progress.SetMessage("Reading data ..."); Func <double, double> getValue; var valueMap = GetData(barMap, out getValue); var genes = GetCommonGenes(valueMap); var samples = valueMap.Keys.OrderBy(m => m).ToList(); Progress.SetMessage("Saving data ..."); result.Add(_options.OutputFile); result.Add(_options.DesignFile); if (_options.TumorTypes.Count > 1) { using (var sw = new StreamWriter(_options.OutputFile)) { sw.WriteLine("Gene\t{0}", samples.Merge("\t")); foreach (var gene in genes) { sw.Write(gene); foreach (var sample in samples) { sw.Write("\t{0}", getValue(valueMap[sample][gene])); } sw.WriteLine(); } } using (var sw = new StreamWriter(_options.DesignFile)) { sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription"); if (headers.Count > 0) { sw.WriteLine("\t{0}", headers.Merge("\t")); } else { sw.WriteLine(); } foreach (var entry in barMap) { var tumor = GetTumorType(entry.Key); var type = TCGASampleCode.Find(entry.Value.Sample); sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key, entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode, type.Definition); var key = GetSampleKey(tumor, entry.Value.Paticipant); var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation(); foreach (var header in headers) { if (vdata.Annotations.ContainsKey(header)) { sw.Write("\t{0}", vdata.Annotations[header]); } else { sw.Write("\t"); } } sw.WriteLine(); } } } else { using (var sw = new StreamWriter(_options.OutputFile)) { sw.WriteLine("Gene\t{0}", (from s in samples select s.StringAfter("_")).Merge("\t")); foreach (var gene in genes) { sw.Write(gene); foreach (var sample in samples) { sw.Write("\t{0}", getValue(valueMap[sample][gene])); } sw.WriteLine(); } } using (var sw = new StreamWriter(_options.DesignFile)) { sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription"); if (headers.Count > 0) { sw.WriteLine("\t{0}", headers.Merge("\t")); } else { sw.WriteLine(); } foreach (var entry in barMap) { var tumor = _options.TumorTypes.First(); var type = TCGASampleCode.Find(entry.Value.Sample); sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key.StringAfter("_"), entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode, type.Definition); var key = GetSampleKey(tumor, entry.Value.Paticipant); var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation(); foreach (var header in headers) { if (vdata.Annotations.ContainsKey(header)) { sw.Write("\t{0}", vdata.Annotations[header]); } else { sw.Write("\t"); } } sw.WriteLine(); } } var clinicalOptions = new TCGAClinicalInformationBuilderOptions() { ClinicalFile = TCGAUtils.GetClinicPatientFile(_options.TCGADirectory, _options.TumorTypes.First()), DataFile = _options.OutputFile, ThrowException = false, }; result.AddRange(new TCGAClinicalInformationBuilder(clinicalOptions) { Progress = this.Progress }.Process()); } Progress.End(); if (noclinical.Count == 0) { return(result.ToArray()); } else { return(new[] { string.Format("There are {0} samples without patient information:\n {1}\n\nResult have been saved to:\n {2}", noclinical.Count, noclinical.Merge("\n "), result.Merge("\n ")) }); } }
public void ExtractData(string datatype, string[] platforms, bool outputCountDataOnly = false) { TCGAUtils.ExtractData(TCGARoot, TargetDirectory, TargetFilePrefix, Tumors, datatype, platforms, SampleCodes, outputCountDataOnly); }
private void DownloadLevel3Data(SpiderTreeNode m, string currDir) { if (m.Nodes.Any(n => TCGAUtils.IsLevel3(n.Name))) { m.Nodes.RemoveAll(n => TCGAUtils.IsLevel1(n.Name) || TCGAUtils.IsLevel2(n.Name)); } else //download level2 data { m.Nodes.RemoveAll(n => TCGAUtils.IsLevel1(n.Name)); } m.MarkHighestVersionNodes(); foreach (var node in m.Nodes) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } var fDir = currDir + "/" + node.Name; var compressed = fDir + ".tar.gz"; var compressedMd5 = fDir + ".tar.gz.md5"; var parentDir = Path.GetDirectoryName(currDir); var parentFDir = parentDir + "/" + node.Name; var parentFComparessed = parentFDir + ".tar.gz"; var parentFComparessedMd5 = parentFDir + ".tar.gz.md5"; if (Directory.Exists(parentFDir)) { Directory.Move(parentFDir, fDir); } if (File.Exists(parentFComparessed)) { File.Move(parentFComparessed, compressed); } if (File.Exists(parentFComparessedMd5)) { File.Move(parentFComparessedMd5, compressedMd5); } if (node.IsPreviousVersion) { if (Directory.Exists(fDir)) { Progress.SetMessage("Deleting previous version : " + fDir); Directory.GetFiles(fDir).ToList().ForEach(File.Delete); Directory.Delete(fDir); } if (!File.Exists(compressed)) { continue; } File.Delete(compressed); File.Delete(compressedMd5); } else { Progress.SetMessage("Processing {0}.{1} ...", m.Name, node.Name); var bDownload = !File.Exists(compressed); var bTar = bDownload || !Directory.Exists(fDir); if (bDownload) { var uri = node.Uri.Substring(0, node.Uri.Length - 1) + ".tar.gz"; if (!WebUtils.DownloadFile(uri, compressed, this.Progress)) { throw new Exception(string.Format("Downloading {0} failed", uri)); } if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } WebUtils.DownloadFile(uri + ".md5", compressedMd5); var downloadedMD5 = HashUtils.GetMD5Hash(compressed, true, false); var trueMD5 = File.ReadAllText(compressedMd5).Split(new[] { '\t', ' ' })[0]; if (!downloadedMD5.Equals(trueMD5)) { throw new Exception(string.Format("MD5 of file {0} doesn't equal to server provided MD5, downloading failed!\nYou may consider to delete the file and try again, or you may download and de-compress it by youself.", compressed)); } } UncompressFile(currDir, fDir, compressed, bTar); } } }