Esempio n. 1
0
    public override IEnumerable<string> Process()
    {
      var sql = string.Format(@"
select DISTINCT gse.gse, count(gsm.gsm) as gsm_count, gse.submission_date, gse.title, gse.summary, gse.type, gse.overall_design
from gse 
	JOIN gse_gsm ON gse.gse=gse_gsm.gse   
	JOIN gsm ON gse_gsm.gsm=gsm.gsm
	JOIN gse_gpl ON gse_gpl.gse=gse.gse   
	JOIN gpl ON gse_gpl.gpl=gpl.gpl 
{0} 
GROUP BY gse.gse
order by gse.ID", options.GetFilterSql());

      File.WriteAllText(Path.ChangeExtension(options.OutputFile, ".sql"), sql);

      SQLiteDBHelper sqlite = new SQLiteDBHelper(options.GeoMetaDatabase);

      var data = sqlite.ExecuteReader(sql, null);

      var xlApp = new Microsoft.Office.Interop.Excel.Application();
      try
      {
        Workbook workbook = xlApp.Workbooks.Add();
        try
        {
          Worksheet workSheet = workbook.Worksheets[1];
          for (int i = 0; i < data.FieldCount; i++)
          {
            workSheet.Cells[1, i + 1] = data.GetName(i);
          }

          var row = 1;
          while (data.Read())
          {
            var gsmCount = data.GetInt32(1);
            if (gsmCount >= options.MininumGsmPerGse)
            {
              row++;

              for (int i = 0; i < data.FieldCount; i++)
              {
                workSheet.Cells[row, i + 1] = data.GetValue(i);
              }

              var position = "A" + row.ToString();
              Range range = workSheet.Range[position];
              var link = string.Format(@"http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={0}", data.GetString(0));
              workSheet.Hyperlinks.Add(range, link);
            }
          }
          workSheet.Range["A:A"].EntireColumn.ColumnWidth = 10;
          workSheet.Range["A:A"].EntireColumn.VerticalAlignment = XlVAlign.xlVAlignTop;
          workSheet.Range["B:B"].EntireColumn.ColumnWidth = 10;
          workSheet.Range["B:B"].EntireColumn.VerticalAlignment = XlVAlign.xlVAlignTop;
          workSheet.Range["C:C"].EntireColumn.ColumnWidth = 15;
          workSheet.Range["C:C"].EntireColumn.VerticalAlignment = XlVAlign.xlVAlignTop;
          for (int i = 3; i < data.FieldCount; i++)
          {
            var col = (char)('A' + i);
            var range = string.Format("{0}:{0}", col);
            workSheet.Range[range].EntireColumn.ColumnWidth = 60;
            workSheet.Range[range].EntireColumn.WrapText = true;
            workSheet.Range[range].EntireColumn.VerticalAlignment = XlVAlign.xlVAlignTop;
          }
          xlApp.DisplayAlerts = false;
          workbook.SaveAs(options.OutputFile);
        }
        finally
        {
          workbook.Close();
        }
      }
      finally
      {
        xlApp.Quit();
      }

      return new string[] { options.OutputFile };
    }
Esempio n. 2
0
    public override IEnumerable<string> Process()
    {
      var gses = File.ReadAllLines(options.GseListFile).Where(m => !string.IsNullOrWhiteSpace(m)).ToList().ConvertAll(m => m.Split(new[] { '\t', ' ' })[0]).ToArray();

      var gseInfoMap = new Dictionary<string, Dictionary<string, Dictionary<string, List<string>>>>();
      for (int gseIndex = 0; gseIndex < gses.Length; gseIndex++)
      {
        var gse = gses[gseIndex];

        Func<GsmRecord, bool> accept;
        if (!options.AcceptMap.TryGetValue(gse, out accept))
        {
          accept = m => true;
        }

        Func<Dictionary<string, List<string>>, bool> acceptDescription;
        if (!options.AcceptDescriptionMap.TryGetValue(gse, out acceptDescription))
        {
          acceptDescription = null;
        }

        var sql = string.Format(@"select gse.gse, gsm.supplementary_file, gsm.title, gsm.gpl, gsm.source_name_ch1
from gse 
	JOIN gse_gsm ON gse.gse=gse_gsm.gse   
	JOIN gsm ON gse_gsm.gsm=gsm.gsm
where 
  gse.gse == '{0}'
  and (gsm.supplementary_file like '%CEL.gz%' or gsm.supplementary_file like '%cel.gz%')
group by gse.gse, gsm.supplementary_file
", gse);

        SQLiteDBHelper sqlite = new SQLiteDBHelper(options.GeoMetaDatabase);

        Progress.SetMessage("{0}/{1}: querying files ...", gseIndex + 1, gses.Length);
        var data = sqlite.ExecuteDataTable(sql, null);

        var records = new List<GsmRecord>();
        for (int row = 0; row < data.Rows.Count; row++)
        {
          var rec = new GsmRecord()
          {
            GSE = data.Rows[row].ItemArray[0].ToString(),
            Url = data.Rows[row].ItemArray[1].ToString(),
            Title = data.Rows[row].ItemArray[2].ToString(),
            GPL = data.Rows[row].ItemArray[3].ToString(),
            SourceName = data.Rows[row].ItemArray[4].ToString()
          };

          records.Add(rec);
        }

        var totalValids = records.Where(m => accept(m)).Count();

        int currentValid = 0;
        foreach (var rec in records)
        {
          var file = rec.Url.Split(';').Where(m => m.Trim().ToLower().EndsWith("cel.gz")).First();
          var dataDir = builder.DataDir + "/";
          var dir = dataDir + rec.GSE;
          var gzipped = dir + "/" + Path.GetFileName(file);
          var tmp = gzipped + "tmp";

          if (!accept(rec))
          {
            DeleteGzippedFile(gzipped);
            continue;
          }

          currentValid++;

          var prefix = string.Format("{0}/{1} ~ {2} : {3}/{4}", gseIndex + 1, gses.Length, rec.GSE, currentValid, totalValids);
          Progress.SetMessage(prefix + " ~ " + file);

          if (!Directory.Exists(dataDir))
          {
            Directory.CreateDirectory(dataDir);
          }

          if (!Directory.Exists(dir))
          {
            Directory.CreateDirectory(dir);
          }

          if (acceptDescription != null)
          {
            Dictionary<string, Dictionary<string, List<string>>> curInfoMap;
            if (!gseInfoMap.TryGetValue(rec.GSE, out curInfoMap))
            {
              if (!GseSeriesMatrixReader.HasMatrixFiles(dir))
              {
                new GseMatrixDownloader(new GseMatrixDownloaderOptions()
                {
                  InputDirectory = dir
                }).Process();
              }
              if (!GseSeriesMatrixReader.HasMatrixFiles(dir))
              {
                throw new Exception("Failed to download matrix file for " + gse);
              }
              curInfoMap = new GseSeriesMatrixReader().ReadDescriptionFromDirectory(dir);
              gseInfoMap[rec.GSE] = curInfoMap;
            }

            var gsmName = Path.GetFileName(file).StringBefore(".cel.gz").StringBefore(".CEL.gz");
            var sampleInfoMap = curInfoMap[gsmName];
            if (!acceptDescription(sampleInfoMap))
            {
              DeleteGzippedFile(gzipped);
              continue;
            }
          }

          if (File.Exists(gzipped) && new FileInfo(gzipped).Length == 0)
          {
            File.Delete(gzipped);
          }

          if (!File.Exists(gzipped))
          {
            Progress.SetMessage(prefix + " ~ downloading " + file + " ...");
            if (!WebUtils.DownloadFile(file, tmp))
            {
              Console.Error.WriteLine("Download {0} failed.", file);
              break;
            }

            File.Move(tmp, gzipped);
          }
        }
      }

      var nocels = (from gse in gses
                    let dir = builder.DataDir + "/" + gse
                    where !Directory.Exists(dir)
                    select gse).Merge("\n");

      if (!string.IsNullOrEmpty(nocels))
      {
        throw new Exception("No cel file found for\n" + nocels);
      }

      return new[] { options.RootDirectory };
    }