Ejemplo n.º 1
0
        public IList <TCGASampleCode> GetTCGASampleCodes()
        {
            List <TCGASampleCode> result = new List <TCGASampleCode>();

            foreach (var s in TCGASampleCodeStrings)
            {
                var code = TCGASampleCode.Find(s);
                if (code == null)
                {
                    throw new ArgumentException("Cannot find sample code for {0}", s);
                }
                result.Add(code);
            }
            return(result);
        }
        public override IEnumerable <string> Process()
        {
            var result = new List <string>();

            Dictionary <string, BarInfo> barMap;

            barMap = new Dictionary <string, BarInfo>();
            foreach (var tumor in _options.TumorTypes)
            {
                var curMap = TCGAUtils.GetBarcodeFileMap(_options.TCGADirectory,
                                                         _options.GetTechnology(), tumor, _options.Platforms, _options.GetTCGASampleCodes().ToArray());

                foreach (var v in curMap)
                {
                    barMap[GetSampleKey(tumor, v.Key)] = v.Value;
                }
            }

            var headers  = new List <string>();
            var clindata = new Dictionary <string, IAnnotation>();

            foreach (var tumor in _options.TumorTypes)
            {
                ReadClinData(clindata, tumor, headers);
            }
            Console.WriteLine("{0} patient clinical information readed", clindata.Count);

            List <string> noclinical = new List <string>();
            var           keyvalues  = barMap.ToList();

            foreach (var bm in keyvalues)
            {
                if (!clindata.ContainsKey(GetSampleKey(GetTumorType(bm.Key), bm.Value.Paticipant)))
                {
                    noclinical.Add(bm.Key);

                    Console.Error.WriteLine(string.Format("Cannot find clinical data for patient {0}", bm.Value.Paticipant));
                    if (_options.WithClinicalInformationOnly)
                    {
                        barMap.Remove(bm.Key);
                    }
                }
            }

            Progress.SetMessage("Reading data ...");
            Func <double, double> getValue;
            var valueMap = GetData(barMap, out getValue);

            var genes   = GetCommonGenes(valueMap);
            var samples = valueMap.Keys.OrderBy(m => m).ToList();

            Progress.SetMessage("Saving data ...");

            result.Add(_options.OutputFile);
            result.Add(_options.DesignFile);
            if (_options.TumorTypes.Count > 1)
            {
                using (var sw = new StreamWriter(_options.OutputFile))
                {
                    sw.WriteLine("Gene\t{0}", samples.Merge("\t"));
                    foreach (var gene in genes)
                    {
                        sw.Write(gene);
                        foreach (var sample in samples)
                        {
                            sw.Write("\t{0}", getValue(valueMap[sample][gene]));
                        }
                        sw.WriteLine();
                    }
                }

                using (var sw = new StreamWriter(_options.DesignFile))
                {
                    sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription");
                    if (headers.Count > 0)
                    {
                        sw.WriteLine("\t{0}", headers.Merge("\t"));
                    }
                    else
                    {
                        sw.WriteLine();
                    }

                    foreach (var entry in barMap)
                    {
                        var tumor = GetTumorType(entry.Key);
                        var type  = TCGASampleCode.Find(entry.Value.Sample);
                        sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key, entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode,
                                 type.Definition);
                        var key   = GetSampleKey(tumor, entry.Value.Paticipant);
                        var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation();
                        foreach (var header in headers)
                        {
                            if (vdata.Annotations.ContainsKey(header))
                            {
                                sw.Write("\t{0}", vdata.Annotations[header]);
                            }
                            else
                            {
                                sw.Write("\t");
                            }
                        }
                        sw.WriteLine();
                    }
                }
            }
            else
            {
                using (var sw = new StreamWriter(_options.OutputFile))
                {
                    sw.WriteLine("Gene\t{0}", (from s in samples select s.StringAfter("_")).Merge("\t"));
                    foreach (var gene in genes)
                    {
                        sw.Write(gene);
                        foreach (var sample in samples)
                        {
                            sw.Write("\t{0}", getValue(valueMap[sample][gene]));
                        }
                        sw.WriteLine();
                    }
                }

                using (var sw = new StreamWriter(_options.DesignFile))
                {
                    sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription");
                    if (headers.Count > 0)
                    {
                        sw.WriteLine("\t{0}", headers.Merge("\t"));
                    }
                    else
                    {
                        sw.WriteLine();
                    }

                    foreach (var entry in barMap)
                    {
                        var tumor = _options.TumorTypes.First();
                        var type  = TCGASampleCode.Find(entry.Value.Sample);
                        sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key.StringAfter("_"), entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode,
                                 type.Definition);
                        var key   = GetSampleKey(tumor, entry.Value.Paticipant);
                        var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation();
                        foreach (var header in headers)
                        {
                            if (vdata.Annotations.ContainsKey(header))
                            {
                                sw.Write("\t{0}", vdata.Annotations[header]);
                            }
                            else
                            {
                                sw.Write("\t");
                            }
                        }
                        sw.WriteLine();
                    }
                }

                var clinicalOptions = new TCGAClinicalInformationBuilderOptions()
                {
                    ClinicalFile   = TCGAUtils.GetClinicPatientFile(_options.TCGADirectory, _options.TumorTypes.First()),
                    DataFile       = _options.OutputFile,
                    ThrowException = false,
                };
                result.AddRange(new TCGAClinicalInformationBuilder(clinicalOptions)
                {
                    Progress = this.Progress
                }.Process());
            }
            Progress.End();

            if (noclinical.Count == 0)
            {
                return(result.ToArray());
            }
            else
            {
                return(new[] { string.Format("There are {0} samples without patient information:\n  {1}\n\nResult have been saved to:\n  {2}", noclinical.Count, noclinical.Merge("\n  "), result.Merge("\n  ")) });
            }
        }