public void TestSetGeneString()
    {
      AnnovarGenomeSummaryItem item = new AnnovarGenomeSummaryItem();
      item.GeneString = "TP53";
      Assert.AreEqual(1, item.Genes.Count);
      Assert.AreEqual("TP53", item.Genes[0].Name);

      item.GeneString = "FLJ20518(dist=2106),LOC401074(dist=2958)";
      Assert.AreEqual(2, item.Genes.Count);
      Assert.AreEqual("FLJ20518", item.Genes[0].Name);
      Assert.AreEqual("dist=2106", item.Genes[0].Annotation);
      Assert.AreEqual("LOC401074", item.Genes[1].Name);
      Assert.AreEqual("dist=2958", item.Genes[1].Annotation);

      item.GeneString = "FLJ20518(dist=2106),NONE(dist=NONE)";
      Assert.AreEqual(1, item.Genes.Count);
      Assert.AreEqual("FLJ20518", item.Genes[0].Name);
      Assert.AreEqual("dist=2106", item.Genes[0].Annotation);

      item.GeneString = "FRAS1(NM_025074:exon47:c.6584-2A>G)";
      Assert.AreEqual(1, item.Genes.Count);
      Assert.AreEqual("FRAS1", item.Genes[0].Name);
      Assert.AreEqual("NM_025074:exon47:c.6584-2A>G", item.Genes[0].Annotation);
      
    }
    public override IEnumerable<string> Process(string fileName)
    {
      //Write the stream data of workbook to the root directory
      using (FileStream file = new FileStream(this.targetFile, FileMode.Create))
      {
        var book = new HSSFWorkbook();

        //cell style for hyperlinks
        //by default hyperlinks are blue and underlined
        var hlink_style = book.CreateCellStyle();
        IFont hlink_font = book.CreateFont();
        hlink_font.Underline = (byte)FontUnderlineType.SINGLE;
        hlink_font.Color = HSSFColor.BLUE.index;
        hlink_style.SetFont(hlink_font);
        hlink_style.WrapText = true;

        var wrap_style = book.CreateCellStyle();
        wrap_style.WrapText = true;

        var numeric_style = book.CreateCellStyle();
        numeric_style.DataFormat = 0xb;

        Dictionary<string, string> genenames = File.Exists(this.affyAnnotationFile) ? AnnotationFile.GetGeneSymbolDescriptionMap(this.affyAnnotationFile) : new Dictionary<string, string>();
        ISheet all = book.CreateSheet("all");

        AnnovarGenomeSummaryItem item = new AnnovarGenomeSummaryItem();

        var sr = new StreamReader(fileName);
        var headers = sr.ReadLine().Split(',').ToList();

        var geneIndex = headers.FindIndex(m => m.Equals("Gene") || m.Equals("Gene.refGene"));
        var funcIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("Func") || m.Equals("Func.refGene")));
        var exonicIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("ExonicFunc") || m.Equals("ExonicFunc.refGene")));
        var dbsnpIndex = FindIndex(geneIndex, headers.FindIndex(m => m.ToLower().StartsWith("dbsnp") || m.ToLower().StartsWith("snp")));
        var chrIndex = headers.IndexOf("Chr");
        var startIndex = headers.IndexOf("Start");
        var endIndex = headers.IndexOf("End");

        var otherInfoIndex = headers.IndexOf("Otherinfo");

        //handle the headers. The length of headers may less than the data.
        var firstrow = all.CreateRow(0);
        for (int i = 0; i <= geneIndex; i++)
        {
          firstrow.CreateCell(i).SetCellValue(headers[i]);
        }
        firstrow.CreateCell(geneIndex + 1).SetCellValue("Description");
        for (int i = geneIndex + 1; i < otherInfoIndex; i++)
        {
          firstrow.CreateCell(i + 1).SetCellValue(headers[i]);
        }
        firstrow.CreateCell(otherInfoIndex + 1).SetCellValue("Location");

        bool? isMuTect = null;
        bool isTableVersion = false; //using table_annovar.pl or summarize_annovar.pl
        bool hasLOD = false;
        double lod = 0.0;
        //handle data
        using (CsvReader csv = new CsvReader(sr, false))
        {
          int nRow = 0;
          while (csv.ReadNextRecord())
          {
            if (!isMuTect.HasValue)
            {
              isTableVersion = csv.FieldCount == headers.Count;
              isMuTect = mutectRegex.Match(csv[csv.FieldCount - 2]).Success;
              hasLOD = double.TryParse(csv[csv.FieldCount - 1], out lod);
              if (isMuTect.Value)
              {
                firstrow.CreateCell(otherInfoIndex + 2).SetCellValue("Normal");
                firstrow.CreateCell(otherInfoIndex + 3).SetCellValue("Tumor");
                firstrow.CreateCell(otherInfoIndex + 4).SetCellValue("FisherExactTest");
                all.SetDefaultColumnStyle(otherInfoIndex + 4, numeric_style);
                if (hasLOD)
                {
                  firstrow.CreateCell(otherInfoIndex + 5).SetCellValue("LOD_FStar");
                  all.SetDefaultColumnStyle(otherInfoIndex + 5, numeric_style);
                }
              }
              else
              {
                for (int i = otherInfoIndex; i < headers.Count; i++)
                {
                  firstrow.CreateCell(i + 2).SetCellValue(headers[i]);
                }
              }
            }

            nRow++;
            var row = all.CreateRow(nRow);
            for (int i = 0; i < geneIndex; i++)
            {
              row.CreateCell(i).SetCellValue(csv[i]);
            }

            //add link for gene symbol
            item.GeneString = csv[geneIndex];
            var cell = row.CreateCell(geneIndex);
            cell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
            {
              Address = string.Format("http://www.genecards.org/cgi-bin/carddisp.pl?gene={0}", item.Genes[0].Name)
            };
            cell.CellStyle = hlink_style;
            cell.SetCellValue((from g in item.Genes select g.Name).Merge("\n"));

            //gene description
            var desCell = row.CreateCell(geneIndex + 1);
            desCell.CellStyle = wrap_style;
            desCell.SetCellValue((from gene in item.Genes
                                  let description = genenames.ContainsKey(gene.Name) ? genenames[gene.Name] : " "
                                  select description).Merge("\n"));

            //add location information
            for (int i = geneIndex + 1; i < otherInfoIndex; i++)
            {
              row.CreateCell(i + 1).SetCellValue(csv[i]);
            }
            var locationCell = row.CreateCell(otherInfoIndex + 1);
            locationCell.SetCellValue(string.Format("{0}:{1}-{2}", csv[chrIndex], csv[startIndex], csv[endIndex]));

            if (isMuTect.Value)
            {
              Match normal, tumor;
              if (isTableVersion)
              {
                var parts = csv[csv.FieldCount - 1].Split('\t');
                if (hasLOD)
                {
                  normal = mutectRegex.Match(parts[parts.Length - 3]);
                  tumor = mutectRegex.Match(parts[parts.Length - 2]);
                  lod = double.Parse(parts[parts.Length - 1]);
                }
                else
                {
                  normal = mutectRegex.Match(parts[parts.Length - 2]);
                  tumor = mutectRegex.Match(parts[parts.Length - 1]);
                }
              }
              else
              {
                if (hasLOD)
                {
                  tumor = mutectRegex.Match(csv[csv.FieldCount - 3]);
                  normal = mutectRegex.Match(csv[csv.FieldCount - 2]);
                  lod = double.Parse(csv[csv.FieldCount - 1]);
                }
                else
                {
                  tumor = mutectRegex.Match(csv[csv.FieldCount - 2]);
                  normal = mutectRegex.Match(csv[csv.FieldCount - 1]);
                }
              }
              FisherExactTestResult fetr = new FisherExactTestResult();
              fetr.Sample1.Succeed = int.Parse(normal.Groups[1].Value);
              fetr.Sample1.Failed = int.Parse(normal.Groups[2].Value);
              fetr.Sample2.Succeed = int.Parse(tumor.Groups[1].Value);
              fetr.Sample2.Failed = int.Parse(tumor.Groups[2].Value);

              row.CreateCell(otherInfoIndex + 2).SetCellValue(string.Format("{0}:{1}", fetr.Sample1.Succeed, fetr.Sample1.Failed));
              row.CreateCell(otherInfoIndex + 3).SetCellValue(string.Format("{0}:{1}", fetr.Sample2.Succeed, fetr.Sample2.Failed));
              row.CreateCell(otherInfoIndex + 4).SetCellValue(fetr.CalculateTwoTailPValue());
              if (hasLOD)
              {
                row.CreateCell(otherInfoIndex + 5).SetCellValue(lod);
              }
            }
            else
            {
              for (int i = otherInfoIndex; i < csv.FieldCount; i++)
              {
                row.CreateCell(i + 2).SetCellValue(csv[i]);
              }
            }

            if (dbsnpIndex > 0)
            {
              var dbsnpcell = row.GetCell(dbsnpIndex);
              var dbsnp = dbsnpcell.StringCellValue;
              if (!string.IsNullOrEmpty(dbsnp))
              {
                dbsnpcell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
                {
                  Address = string.Format("http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs={0}", dbsnp.Substring(2))
                };
                dbsnpcell.CellStyle = (hlink_style);
              }
            }
          }
        }

        all.SetColumnWidth(chrIndex, 5 * 256);
        all.SetColumnWidth(startIndex, 13 * 256);
        all.SetColumnWidth(endIndex, 13 * 256);
        all.SetColumnWidth(funcIndex, 15 * 256);
        all.SetColumnWidth(geneIndex, 13 * 256);
        all.SetColumnWidth(geneIndex + 1, 60 * 256);
        all.SetColumnWidth(exonicIndex, 20 * 256);
        all.SetColumnWidth(dbsnpIndex, 15 * 256);
        all.SetColumnWidth(otherInfoIndex + 1, 22 * 256);

        if (isMuTect.Value)
        {
          all.SetColumnWidth(otherInfoIndex + 2, 10 * 256);
          all.SetColumnWidth(otherInfoIndex + 3, 10 * 256);
          all.SetColumnWidth(otherInfoIndex + 4, 10 * 256);
          if (hasLOD)
          {
            all.SetColumnWidth(otherInfoIndex + 5, 10 * 256);
          }
        }

        book.Write(file);
      }

      return new string[] { targetFile };
    }
    public override IEnumerable<string> Process()
    {
      //Write the stream data of workbook to the root directory
      using (FileStream file = new FileStream(options.OutputFile, FileMode.Create))
      {
        var book = new HSSFWorkbook();

        //cell style for hyperlinks
        //by default hyperlinks are blue and underlined
        var hlinkStyle = book.CreateCellStyle();
        var hlinkFont = book.CreateFont();
        hlinkFont.Underline = (byte)FontUnderlineType.SINGLE;
        hlinkFont.Color = HSSFColor.BLUE.index;
        hlinkStyle.SetFont(hlinkFont);
        hlinkStyle.WrapText = true;

        var wrapStyle = book.CreateCellStyle();
        wrapStyle.WrapText = true;

        var numericStyle = book.CreateCellStyle();
        numericStyle.DataFormat = 0xb;

        var genenames = File.Exists(options.AffyAnnotationFile) ? AnnotationFile.GetGeneSymbolDescriptionMap(options.AffyAnnotationFile) : new Dictionary<string, string>();
        var all = book.CreateSheet("all");

        var item = new AnnovarGenomeSummaryItem();

        using (var sr = new StreamReader(options.InputFile))
        {
          int nRow = 0;

          bool isMutect = false;
          bool hasLod = false;
          string tumorSampleName = "", normalSampleName = "";
          string line;
          //ignore the comments
          while ((line = sr.ReadLine()) != null)
          {
            if (!line.StartsWith("#"))
            {
              break;
            }

            var row = all.CreateRow(nRow++);
            row.CreateCell(0).SetCellValue(line);

            if (line.StartsWith("##INFO=<ID=LOD"))
            {
              hasLod = true;
            }

            if (line.StartsWith("##MuTect"))
            {
              isMutect = true;
              tumorSampleName = line.StringAfter("tumor_sample_name=").StringBefore(" ");
              normalSampleName = line.StringAfter("normal_sample_name=").StringBefore(" ");
            }
          }

          if (line == null)
          {
            throw new Exception("No entries in file " + options.InputFile);
          }

          var headers = line.Split('\t').ToList();

          //original index
          var geneIndex = headers.FindIndex(m => m.Equals("Gene") || m.Equals("Gene.refGene"));
          var oldInfoIndex = headers.FindIndex(m => m.ToLower().StartsWith("info"));
          var newInfoIndex = FindIndex(oldInfoIndex, geneIndex);

          //relative index
          var funcIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("Func") || m.Equals("Func.refGene")));
          var exonicIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("ExonicFunc") || m.Equals("ExonicFunc.refGene")));
          var dbsnpIndex = FindIndex(geneIndex, headers.FindIndex(m => m.ToLower().StartsWith("dbsnp") || m.ToLower().StartsWith("snp")));
          var chrIndex = headers.IndexOf("Chr");
          var startIndex = headers.IndexOf("Start");
          var endIndex = headers.IndexOf("End");
          var tumorIndex = headers.IndexOf(tumorSampleName);
          var normalIndex = headers.IndexOf(normalSampleName);

          hasLod = hasLod && oldInfoIndex != -1;

          //handle the headers. The length of headers may less than the data.
          var firstrow = all.CreateRow(nRow++);
          for (int i = 0; i <= geneIndex; i++)
          {
            firstrow.CreateCell(i).SetCellValue(headers[i]);
          }
          firstrow.CreateCell(geneIndex + 1).SetCellValue("Description");
          for (int i = geneIndex + 1; i < headers.Count; i++)
          {
            if (isMutect)
            {
              if (i == tumorIndex)
              {
                firstrow.CreateCell(i + 1).SetCellValue("Tumor:" + tumorSampleName);
                continue;
              }

              if (i == normalIndex)
              {
                firstrow.CreateCell(i + 1).SetCellValue("Normal:" + normalSampleName);
                continue;
              }
            }
            firstrow.CreateCell(i + 1).SetCellValue(headers[i]);
          }

          var lastcol = headers.Count + 1;
          if (hasLod)
          {
            firstrow.CreateCell(lastcol++).SetCellValue("TLodFstar");
          }

          if (isMutect)
          {
            firstrow.CreateCell(lastcol++).SetCellValue("NormalAlleles");
            firstrow.CreateCell(lastcol++).SetCellValue("TumorAlleles");
            firstrow.CreateCell(lastcol++).SetCellValue("FisherExactTest");
          }

          while ((line = sr.ReadLine()) != null)
          {
            var parts = line.Split('\t');
            if (parts.Length < geneIndex)
            {
              break;
            }

            var row = all.CreateRow(nRow++);
            for (int i = 0; i < geneIndex; i++)
            {
              row.CreateCell(i).SetCellValue(parts[i]);
            }

            item.GeneString = parts[geneIndex];
            if (item.Genes.Count > 0)
            {
              //add link for gene symbol
              var cell = row.CreateCell(geneIndex);
              cell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
              {
                Address = string.Format("http://www.genecards.org/cgi-bin/carddisp.pl?gene={0}", item.Genes[0].Name)
              };
              cell.CellStyle = hlinkStyle;
              cell.SetCellValue((from g in item.Genes select g.Name).Merge("\n"));

              //gene description
              var desCell = row.CreateCell(geneIndex + 1);
              desCell.CellStyle = wrapStyle;
              desCell.SetCellValue((from gene in item.Genes
                                    let description = genenames.ContainsKey(gene.Name) ? genenames[gene.Name] : " "
                                    select description).Merge("\n"));
            }

            //add other information
            for (int i = geneIndex + 1; i < headers.Count; i++)
            {
              row.CreateCell(i + 1).SetCellValue(parts[i]);
            }

            lastcol = headers.Count + 1;
            if (hasLod)
            {
              row.CreateCell(lastcol++).SetCellValue(parts[oldInfoIndex].StringAfter("LOD=").StringBefore(";"));
            }

            if (isMutect)
            {
              Match normal = SomaticMutationUtils.MutectPattern.Match(parts[normalIndex]);
              Match tumor = SomaticMutationUtils.MutectPattern.Match(parts[tumorIndex]);

              var fetr = new FisherExactTestResult();
              fetr.Sample1.Succeed = int.Parse(normal.Groups[1].Value);
              fetr.Sample1.Failed = int.Parse(normal.Groups[2].Value);
              fetr.Sample2.Succeed = int.Parse(tumor.Groups[1].Value);
              fetr.Sample2.Failed = int.Parse(tumor.Groups[2].Value);

              row.CreateCell(lastcol++).SetCellValue(string.Format("{0}:{1}", fetr.Sample1.Succeed, fetr.Sample1.Failed));
              row.CreateCell(lastcol++).SetCellValue(string.Format("{0}:{1}", fetr.Sample2.Succeed, fetr.Sample2.Failed));
              row.CreateCell(lastcol).SetCellValue(fetr.CalculateTwoTailPValue());
            }

            if (dbsnpIndex > 0)
            {
              var dbsnpcell = row.GetCell(dbsnpIndex);
              var dbsnp = dbsnpcell.StringCellValue;
              if (!string.IsNullOrEmpty(dbsnp))
              {
                dbsnpcell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
                {
                  Address = string.Format("http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs={0}", dbsnp.Substring(2))
                };
                dbsnpcell.CellStyle = (hlinkStyle);
              }
            }
          }


          all.SetColumnWidth(chrIndex, 5 * 256);
          all.SetColumnWidth(startIndex, 11 * 256);
          all.SetColumnWidth(endIndex, 11 * 256);
          all.SetColumnWidth(funcIndex, 15 * 256);
          all.SetColumnWidth(geneIndex, 15 * 256);
          all.SetColumnWidth(geneIndex + 1, 60 * 256);
          all.SetColumnWidth(exonicIndex, 20 * 256);
          all.SetColumnWidth(dbsnpIndex, 15 * 256);

          lastcol = headers.Count + 1;
          if (hasLod)
          {
            all.SetColumnWidth(newInfoIndex, 15 * 256);
            all.SetColumnWidth(lastcol++, 10 * 256);
          }

          if (isMutect)
          {
            all.SetColumnWidth(lastcol++, 10 * 256);
            all.SetColumnWidth(lastcol++, 10 * 256);
            all.SetColumnWidth(lastcol, 10 * 256);
          }
        }
        book.Write(file);
      }
      return new string[] { options.OutputFile };
    }