public override IEnumerable <string> Process()
        {
            var result = new List <string>();

            Dictionary <string, BarInfo> barMap;

            barMap = new Dictionary <string, BarInfo>();
            foreach (var tumor in _options.TumorTypes)
            {
                var curMap = TCGAUtils.GetBarcodeFileMap(_options.TCGADirectory,
                                                         _options.GetTechnology(), tumor, _options.Platforms, _options.GetTCGASampleCodes().ToArray());

                foreach (var v in curMap)
                {
                    barMap[GetSampleKey(tumor, v.Key)] = v.Value;
                }
            }

            var headers  = new List <string>();
            var clindata = new Dictionary <string, IAnnotation>();

            foreach (var tumor in _options.TumorTypes)
            {
                ReadClinData(clindata, tumor, headers);
            }
            Console.WriteLine("{0} patient clinical information readed", clindata.Count);

            List <string> noclinical = new List <string>();
            var           keyvalues  = barMap.ToList();

            foreach (var bm in keyvalues)
            {
                if (!clindata.ContainsKey(GetSampleKey(GetTumorType(bm.Key), bm.Value.Paticipant)))
                {
                    noclinical.Add(bm.Key);

                    Console.Error.WriteLine(string.Format("Cannot find clinical data for patient {0}", bm.Value.Paticipant));
                    if (_options.WithClinicalInformationOnly)
                    {
                        barMap.Remove(bm.Key);
                    }
                }
            }

            Progress.SetMessage("Reading data ...");
            Func <double, double> getValue;
            var valueMap = GetData(barMap, out getValue);

            var genes   = GetCommonGenes(valueMap);
            var samples = valueMap.Keys.OrderBy(m => m).ToList();

            Progress.SetMessage("Saving data ...");

            result.Add(_options.OutputFile);
            result.Add(_options.DesignFile);
            if (_options.TumorTypes.Count > 1)
            {
                using (var sw = new StreamWriter(_options.OutputFile))
                {
                    sw.WriteLine("Gene\t{0}", samples.Merge("\t"));
                    foreach (var gene in genes)
                    {
                        sw.Write(gene);
                        foreach (var sample in samples)
                        {
                            sw.Write("\t{0}", getValue(valueMap[sample][gene]));
                        }
                        sw.WriteLine();
                    }
                }

                using (var sw = new StreamWriter(_options.DesignFile))
                {
                    sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription");
                    if (headers.Count > 0)
                    {
                        sw.WriteLine("\t{0}", headers.Merge("\t"));
                    }
                    else
                    {
                        sw.WriteLine();
                    }

                    foreach (var entry in barMap)
                    {
                        var tumor = GetTumorType(entry.Key);
                        var type  = TCGASampleCode.Find(entry.Value.Sample);
                        sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key, entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode,
                                 type.Definition);
                        var key   = GetSampleKey(tumor, entry.Value.Paticipant);
                        var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation();
                        foreach (var header in headers)
                        {
                            if (vdata.Annotations.ContainsKey(header))
                            {
                                sw.Write("\t{0}", vdata.Annotations[header]);
                            }
                            else
                            {
                                sw.Write("\t");
                            }
                        }
                        sw.WriteLine();
                    }
                }
            }
            else
            {
                using (var sw = new StreamWriter(_options.OutputFile))
                {
                    sw.WriteLine("Gene\t{0}", (from s in samples select s.StringAfter("_")).Merge("\t"));
                    foreach (var gene in genes)
                    {
                        sw.Write(gene);
                        foreach (var sample in samples)
                        {
                            sw.Write("\t{0}", getValue(valueMap[sample][gene]));
                        }
                        sw.WriteLine();
                    }
                }

                using (var sw = new StreamWriter(_options.DesignFile))
                {
                    sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription");
                    if (headers.Count > 0)
                    {
                        sw.WriteLine("\t{0}", headers.Merge("\t"));
                    }
                    else
                    {
                        sw.WriteLine();
                    }

                    foreach (var entry in barMap)
                    {
                        var tumor = _options.TumorTypes.First();
                        var type  = TCGASampleCode.Find(entry.Value.Sample);
                        sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key.StringAfter("_"), entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode,
                                 type.Definition);
                        var key   = GetSampleKey(tumor, entry.Value.Paticipant);
                        var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation();
                        foreach (var header in headers)
                        {
                            if (vdata.Annotations.ContainsKey(header))
                            {
                                sw.Write("\t{0}", vdata.Annotations[header]);
                            }
                            else
                            {
                                sw.Write("\t");
                            }
                        }
                        sw.WriteLine();
                    }
                }

                var clinicalOptions = new TCGAClinicalInformationBuilderOptions()
                {
                    ClinicalFile   = TCGAUtils.GetClinicPatientFile(_options.TCGADirectory, _options.TumorTypes.First()),
                    DataFile       = _options.OutputFile,
                    ThrowException = false,
                };
                result.AddRange(new TCGAClinicalInformationBuilder(clinicalOptions)
                {
                    Progress = this.Progress
                }.Process());
            }
            Progress.End();

            if (noclinical.Count == 0)
            {
                return(result.ToArray());
            }
            else
            {
                return(new[] { string.Format("There are {0} samples without patient information:\n  {1}\n\nResult have been saved to:\n  {2}", noclinical.Count, noclinical.Merge("\n  "), result.Merge("\n  ")) });
            }
        }
 public TCGAClinicalInformationBuilder(TCGAClinicalInformationBuilderOptions options)
 {
   this._options = options;
   this._options.PrepareOptions();
 }
Esempio n. 3
0
    public override IEnumerable<string> Process()
    {
      var result = new List<string>();

      Dictionary<string, BarInfo> barMap;

      barMap = new Dictionary<string, BarInfo>();
      foreach (var tumor in _options.TumorTypes)
      {
        var curMap = TCGAUtils.GetBarcodeFileMap(_options.TCGADirectory,
          _options.GetTechnology(), tumor, _options.Platforms, _options.GetTCGASampleCodes().ToArray());

        foreach (var v in curMap)
        {
          barMap[GetSampleKey(tumor, v.Key)] = v.Value;
        }
      }

      var headers = new List<string>();
      var clindata = new Dictionary<string, IAnnotation>();
      foreach (var tumor in _options.TumorTypes)
      {
        ReadClinData(clindata, tumor, headers);
      }
      Console.WriteLine("{0} patient clinical information readed", clindata.Count);

      List<string> noclinical = new List<string>();
      var keyvalues = barMap.ToList();
      foreach (var bm in keyvalues)
      {
        if (!clindata.ContainsKey(GetSampleKey(GetTumorType(bm.Key), bm.Value.Paticipant)))
        {
          noclinical.Add(bm.Key);

          Console.Error.WriteLine(string.Format("Cannot find clinical data for patient {0}", bm.Value.Paticipant));
          if (_options.WithClinicalInformationOnly)
          {
            barMap.Remove(bm.Key);
          }
        }
      }

      Progress.SetMessage("Reading data ...");
      Func<double, double> getValue;
      var valueMap = GetData(barMap, out getValue);

      var genes = GetCommonGenes(valueMap);
      var samples = valueMap.Keys.OrderBy(m => m).ToList();

      Progress.SetMessage("Saving data ...");

      result.Add(_options.OutputFile);
      result.Add(_options.DesignFile);
      if (_options.TumorTypes.Count > 1)
      {
        using (var sw = new StreamWriter(_options.OutputFile))
        {
          sw.WriteLine("Gene\t{0}", samples.Merge("\t"));
          foreach (var gene in genes)
          {
            sw.Write(gene);
            foreach (var sample in samples)
            {
              sw.Write("\t{0}", getValue(valueMap[sample][gene]));
            }
            sw.WriteLine();
          }
        }

        using (var sw = new StreamWriter(_options.DesignFile))
        {
          sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription");
          if (headers.Count > 0)
          {
            sw.WriteLine("\t{0}", headers.Merge("\t"));
          }
          else
          {
            sw.WriteLine();
          }

          foreach (var entry in barMap)
          {
            var tumor = GetTumorType(entry.Key);
            var type = TCGASampleCode.Find(entry.Value.Sample);
            sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key, entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode,
              type.Definition);
            var key = GetSampleKey(tumor, entry.Value.Paticipant);
            var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation();
            foreach (var header in headers)
            {
              if (vdata.Annotations.ContainsKey(header))
              {
                sw.Write("\t{0}", vdata.Annotations[header]);
              }
              else
              {
                sw.Write("\t");
              }
            }
            sw.WriteLine();
          }
        }
      }
      else
      {
        using (var sw = new StreamWriter(_options.OutputFile))
        {
          sw.WriteLine("Gene\t{0}", (from s in samples select s.StringAfter("_")).Merge("\t"));
          foreach (var gene in genes)
          {
            sw.Write(gene);
            foreach (var sample in samples)
            {
              sw.Write("\t{0}", getValue(valueMap[sample][gene]));
            }
            sw.WriteLine();
          }
        }

        using (var sw = new StreamWriter(_options.DesignFile))
        {
          sw.Write("Sample\tBarcode\tPatient\tTumorType\tPlatform\tSampleType\tSampleTypeDescription");
          if (headers.Count > 0)
          {
            sw.WriteLine("\t{0}", headers.Merge("\t"));
          }
          else
          {
            sw.WriteLine();
          }

          foreach (var entry in barMap)
          {
            var tumor = _options.TumorTypes.First();
            var type = TCGASampleCode.Find(entry.Value.Sample);
            sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", entry.Key.StringAfter("_"), entry.Value.BarCode, entry.Value.Paticipant, tumor, entry.Value.Platform, type.ShortLetterCode,
              type.Definition);
            var key = GetSampleKey(tumor, entry.Value.Paticipant);
            var vdata = clindata.ContainsKey(key) ? clindata[key] : new Annotation();
            foreach (var header in headers)
            {
              if (vdata.Annotations.ContainsKey(header))
              {
                sw.Write("\t{0}", vdata.Annotations[header]);
              }
              else
              {
                sw.Write("\t");
              }
            }
            sw.WriteLine();
          }
        }

        var clinicalOptions = new TCGAClinicalInformationBuilderOptions()
        {
          ClinicalFile = TCGAUtils.GetClinicPatientFile(_options.TCGADirectory, _options.TumorTypes.First()),
          DataFile = _options.OutputFile,
          ThrowException = false,
        };
        result.AddRange(new TCGAClinicalInformationBuilder(clinicalOptions) { Progress = this.Progress }.Process());
      }
      Progress.End();

      if (noclinical.Count == 0)
      {
        return result.ToArray();
      }
      else
      {
        return new[] { string.Format("There are {0} samples without patient information:\n  {1}\n\nResult have been saved to:\n  {2}", noclinical.Count, noclinical.Merge("\n  "), result.Merge("\n  ")) };
      }
    }
Esempio n. 4
0
 public TCGAClinicalInformationBuilder(TCGAClinicalInformationBuilderOptions options)
 {
     this._options = options;
     this._options.PrepareOptions();
 }