예제 #1
0
        public static Dictionary <TCGATechnologyType, Dictionary <TCGASampleType, List <BarInfo> > > GetTumorSampleMap(string tumordir)
        {
            var result = new Dictionary <TCGATechnologyType, Dictionary <TCGASampleType, List <BarInfo> > >();

            List <DatasetInfo> microarray = TCGAUtils.GetMicroarrayDatasets(tumordir);
            var affy    = microarray.Find(m => m.Name.Equals("U133A"));
            var agilent = microarray.Find(m => m.Name.Equals("Agil3"));

            if (agilent == null)
            {
                agilent = microarray.Find(m => m.Name.Equals("Agil2"));
            }

            var rnaseqv1 = TCGAUtils.GetRnaSeqV1DataSet(tumordir);
            var rnaseqv2 = TCGAUtils.GetRnaSeqV2DataSet(tumordir);

            DatasetInfo[] dis          = new[] { affy, agilent, rnaseqv1, rnaseqv2 };
            var           technologies = EnumUtils.EnumToArray <TCGATechnologyType>();

            for (int i = 0; i < dis.Length; i++)
            {
                AddDataset(result, technologies[i], dis[i]);
            }
            return(result);
        }
        /// <summary>
        ///   Read the clinical patient file and store the data into directory. The key is tumor type plus bar code.
        ///   Also read the
        /// </summary>
        /// <param name="clinicalData"></param>
        /// <param name="tumorType"></param>
        /// <param name="queryHeaders"></param>
        private void ReadClinData(IDictionary <string, IAnnotation> clinicalData, string tumorType,
                                  ICollection <string> queryHeaders)
        {
            var clinfile = TCGAUtils.GetClinicPatientFile(_options.TCGADirectory, tumorType);

            if (!File.Exists(clinfile))
            {
                return;
            }

            var data = new TCGAClinicalInformationFormat().ReadFromFile(clinfile);

            foreach (var entry in data)
            {
                clinicalData[GetSampleKey(tumorType, entry.BarCode())] = entry;
            }
            var configheader = TemplateDirectory + "/" + Path.GetFileNameWithoutExtension(clinfile) + ".header.xml";

            if (!File.Exists(configheader))
            {
                configheader = TemplateDirectory + "/clinical_patient_tcga.header.xml";
            }

            var fd = HeaderDefinition.LoadFromFile(configheader);

            foreach (var line in fd.Properties)
            {
                if (!queryHeaders.Contains(line))
                {
                    queryHeaders.Add(line);
                }
            }
        }
예제 #3
0
        public void RemoveAll(TCGASampleType stype)
        {
            var keys = BarInfoListMap.Keys.ToList();

            foreach (var key in keys)
            {
                if (TCGAUtils.GetSampleType(key) == stype)
                {
                    BarInfoListMap.Remove(key);
                }
            }
        }
예제 #4
0
        private void FillTumor()
        {
            var map = TCGAUtils.GetTumorDescriptionMap();

            object[] curitems;
            if (lbDataTypes.SelectedItem != null && Directory.Exists(rootDir.FullName))
            {
                var tecname  = lbDataTypes.SelectedItem as string;
                var selected = TCGATechnology.Parse(tecname);
                curitems = (from tumor in this.tumors
                            let dir = rootDir.FullName + "/" + tumor
                                      where Directory.Exists(selected.GetTechnologyDirectory(dir))
                                      select tumor).ToArray();
            }
            else
            {
                curitems = this.tumors.ToArray();
            }

            lbTumors.BeginUpdate();
            try
            {
                var selected = new HashSet <string>(GetSelectedTumors().ConvertAll(m => m as string));
                lbTumors.Items.Clear();
                foreach (string item in curitems)
                {
                    var name = map.ContainsKey(item.ToUpper()) ? item + ", " + map[item.ToUpper()] : item;
                    lbTumors.Items.Add(name);
                }
                if (selected.Count > 0)
                {
                    for (int i = 0; i < lbTumors.Items.Count; i++)
                    {
                        var name = curitems[i] as string;
                        if (selected.Contains(name))
                        {
                            lbTumors.SetSelected(i, true);
                        }
                    }
                }
            }
            finally
            {
                lbTumors.EndUpdate();
            }
        }
예제 #5
0
        private static void AddDataset(Dictionary <TCGATechnologyType, Dictionary <TCGASampleType, List <BarInfo> > > tumormap, TCGATechnologyType technolyType, DatasetInfo datasetInfo)
        {
            var map = new Dictionary <TCGASampleType, List <BarInfo> >();

            tumormap[technolyType] = map;
            foreach (var type in EnumUtils.EnumToArray <TCGASampleType>())
            {
                map[type] = new List <BarInfo>();
            }

            if (datasetInfo == null)
            {
                return;
            }

            foreach (var key in datasetInfo.BarInfoListMap)
            {
                var type = TCGAUtils.GetSampleType(key.Key);
                map[type].Add(key.Value.First());
            }
        }
예제 #6
0
        public static List <DatasetInfo> GetMicroarrayDatasets(string tumordir)
        {
            List <DatasetInfo> result = new List <DatasetInfo>();
            var dir = tumordir + @"\data\transcriptome";

            if (!Directory.Exists(dir))
            {
                return(result);
            }

            var mdirs = Directory.GetDirectories(dir);

            foreach (var mdir in mdirs)
            {
                result.Add(new DatasetInfo()
                {
                    Name           = TCGAUtils.GetMicroarrayName(mdir),
                    BarInfoListMap = TCGAUtils.GetMicroarrayFiles(mdir, false),
                    Reader         = new Level3MicroarrayDataTxtReader()
                });
            }

            return(result);
        }
        public override IEnumerable <string> Process()
        {
            var result = new List <string>();

            Dictionary <string, BarInfo> barMap;

            barMap = new Dictionary <string, BarInfo>();
            foreach (var tumor in _options.TumorTypes)
            {
                var curMap = TCGAUtils.GetBarcodeFileMap(_options.TCGADirectory,
                                                         _options.GetTechnology(), tumor, _options.Platforms, _options.GetTCGASampleCodes().ToArray());

                foreach (var v in curMap)
                {
                    barMap[GetSampleKey(tumor, v.Key)] = v.Value;
                }
            }

            var headers  = new List <string>();
            var clindata = new Dictionary <string, IAnnotation>();

            foreach (var tumor in _options.TumorTypes)
            {
                ReadClinData(clindata, tumor, headers);
            }
            Console.WriteLine("{0} patient clinical information readed", clindata.Count);

            List <string> noclinical = new List <string>();
            var           keyvalues  = barMap.ToList();

            foreach (var bm in keyvalues)
            {
                if (!clindata.ContainsKey(GetSampleKey(GetTumorType(bm.Key), bm.Value.Paticipant)))
                {
                    noclinical.Add(bm.Key);

                    Console.Error.WriteLine(string.Format("Cannot find clinical data for patient {0}", bm.Value.Paticipant));
                    if (_options.WithClinicalInformationOnly)
                    {
                        barMap.Remove(bm.Key);
                    }
                }
            }

            Progress.SetMessage("Reading data ...");
            Func <double, double> getValue;
            var valueMap = GetData(barMap, out getValue);

            var genes   = GetCommonGenes(valueMap);
            var samples = valueMap.Keys.OrderBy(m => m).ToList();

            Progress.SetMessage("Saving data ...");

            result.Add(_options.OutputFile);
            result.Add(_options.DesignFile);
            if (_options.TumorTypes.Count > 1)
            {
                using (var sw = new StreamWriter(_options.OutputFile))
                {
                    sw.WriteLine("Gene\t{0}", samples.Merge("\t"));
                    foreach (var gene in genes)
                    {
                        sw.Write(gene);
                        foreach (var sample in samples)
                        {
                            sw.Write("\t{0}", getValue(valueMap[sample][gene]));
                        }
                        sw.WriteLine();
                    }
                }

                using (var sw = new StreamWriter(_options.DesignFile))
                {
                    sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription");
                    if (headers.Count > 0)
                    {
                        sw.WriteLine("\t{0}", headers.Merge("\t"));
                    }
                    else
                    {
                        sw.WriteLine();
                    }

                    foreach (var entry in barMap)
                    {
                        var tumor = GetTumorType(entry.Key);
                        var type  = TCGASampleCode.Find(entry.Value.Sample);
                        sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key, entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode,
                                 type.Definition);
                        var key   = GetSampleKey(tumor, entry.Value.Paticipant);
                        var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation();
                        foreach (var header in headers)
                        {
                            if (vdata.Annotations.ContainsKey(header))
                            {
                                sw.Write("\t{0}", vdata.Annotations[header]);
                            }
                            else
                            {
                                sw.Write("\t");
                            }
                        }
                        sw.WriteLine();
                    }
                }
            }
            else
            {
                using (var sw = new StreamWriter(_options.OutputFile))
                {
                    sw.WriteLine("Gene\t{0}", (from s in samples select s.StringAfter("_")).Merge("\t"));
                    foreach (var gene in genes)
                    {
                        sw.Write(gene);
                        foreach (var sample in samples)
                        {
                            sw.Write("\t{0}", getValue(valueMap[sample][gene]));
                        }
                        sw.WriteLine();
                    }
                }

                using (var sw = new StreamWriter(_options.DesignFile))
                {
                    sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription");
                    if (headers.Count > 0)
                    {
                        sw.WriteLine("\t{0}", headers.Merge("\t"));
                    }
                    else
                    {
                        sw.WriteLine();
                    }

                    foreach (var entry in barMap)
                    {
                        var tumor = _options.TumorTypes.First();
                        var type  = TCGASampleCode.Find(entry.Value.Sample);
                        sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key.StringAfter("_"), entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode,
                                 type.Definition);
                        var key   = GetSampleKey(tumor, entry.Value.Paticipant);
                        var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation();
                        foreach (var header in headers)
                        {
                            if (vdata.Annotations.ContainsKey(header))
                            {
                                sw.Write("\t{0}", vdata.Annotations[header]);
                            }
                            else
                            {
                                sw.Write("\t");
                            }
                        }
                        sw.WriteLine();
                    }
                }

                var clinicalOptions = new TCGAClinicalInformationBuilderOptions()
                {
                    ClinicalFile   = TCGAUtils.GetClinicPatientFile(_options.TCGADirectory, _options.TumorTypes.First()),
                    DataFile       = _options.OutputFile,
                    ThrowException = false,
                };
                result.AddRange(new TCGAClinicalInformationBuilder(clinicalOptions)
                {
                    Progress = this.Progress
                }.Process());
            }
            Progress.End();

            if (noclinical.Count == 0)
            {
                return(result.ToArray());
            }
            else
            {
                return(new[] { string.Format("There are {0} samples without patient information:\n  {1}\n\nResult have been saved to:\n  {2}", noclinical.Count, noclinical.Merge("\n  "), result.Merge("\n  ")) });
            }
        }
예제 #8
0
 public void ExtractData(string datatype, string[] platforms, bool outputCountDataOnly = false)
 {
     TCGAUtils.ExtractData(TCGARoot, TargetDirectory, TargetFilePrefix, Tumors, datatype, platforms, SampleCodes, outputCountDataOnly);
 }
        private void DownloadLevel3Data(SpiderTreeNode m, string currDir)
        {
            if (m.Nodes.Any(n => TCGAUtils.IsLevel3(n.Name)))
            {
                m.Nodes.RemoveAll(n => TCGAUtils.IsLevel1(n.Name) || TCGAUtils.IsLevel2(n.Name));
            }
            else //download level2 data
            {
                m.Nodes.RemoveAll(n => TCGAUtils.IsLevel1(n.Name));
            }

            m.MarkHighestVersionNodes();
            foreach (var node in m.Nodes)
            {
                if (Progress.IsCancellationPending())
                {
                    throw new UserTerminatedException();
                }

                var fDir          = currDir + "/" + node.Name;
                var compressed    = fDir + ".tar.gz";
                var compressedMd5 = fDir + ".tar.gz.md5";

                var parentDir             = Path.GetDirectoryName(currDir);
                var parentFDir            = parentDir + "/" + node.Name;
                var parentFComparessed    = parentFDir + ".tar.gz";
                var parentFComparessedMd5 = parentFDir + ".tar.gz.md5";

                if (Directory.Exists(parentFDir))
                {
                    Directory.Move(parentFDir, fDir);
                }
                if (File.Exists(parentFComparessed))
                {
                    File.Move(parentFComparessed, compressed);
                }
                if (File.Exists(parentFComparessedMd5))
                {
                    File.Move(parentFComparessedMd5, compressedMd5);
                }

                if (node.IsPreviousVersion)
                {
                    if (Directory.Exists(fDir))
                    {
                        Progress.SetMessage("Deleting previous version : " + fDir);
                        Directory.GetFiles(fDir).ToList().ForEach(File.Delete);
                        Directory.Delete(fDir);
                    }

                    if (!File.Exists(compressed))
                    {
                        continue;
                    }

                    File.Delete(compressed);
                    File.Delete(compressedMd5);
                }
                else
                {
                    Progress.SetMessage("Processing {0}.{1} ...", m.Name, node.Name);

                    var bDownload = !File.Exists(compressed);
                    var bTar      = bDownload || !Directory.Exists(fDir);

                    if (bDownload)
                    {
                        var uri = node.Uri.Substring(0, node.Uri.Length - 1) + ".tar.gz";
                        if (!WebUtils.DownloadFile(uri, compressed, this.Progress))
                        {
                            throw new Exception(string.Format("Downloading {0} failed", uri));
                        }

                        if (Progress.IsCancellationPending())
                        {
                            throw new UserTerminatedException();
                        }

                        WebUtils.DownloadFile(uri + ".md5", compressedMd5);

                        var downloadedMD5 = HashUtils.GetMD5Hash(compressed, true, false);
                        var trueMD5       = File.ReadAllText(compressedMd5).Split(new[] { '\t', ' ' })[0];

                        if (!downloadedMD5.Equals(trueMD5))
                        {
                            throw new Exception(string.Format("MD5 of file {0} doesn't equal to server provided MD5, downloading failed!\nYou may consider to delete the file and try again, or you may download and de-compress it by youself.", compressed));
                        }
                    }

                    UncompressFile(currDir, fDir, compressed, bTar);
                }
            }
        }