Example #1
0
        public FisherExactTestResult InitializeTable(PairedEvent events)
        {
            var result = new FisherExactTestResult
            {
                Sample1     = { Name = Samples[0].SampleName },
                Sample2     = { Name = Samples[1].SampleName },
                SucceedName = events.MajorEvent,
                FailedName  = events.MinorEvent
            };

            PrepareCount(result.Sample1, events, _samples[0]);
            PrepareCount(result.Sample2, events, _samples[1]);

            return(result);
        }
        public static double CalculateT2CPvalue(int totalRead, int t2cRead, double expectT2CRate)
        {
            var fisher = new FisherExactTestResult();

            fisher.Sample1.Succeed = totalRead - t2cRead;
            fisher.Sample1.Failed  = t2cRead;
            fisher.Sample2.Succeed = (int)(totalRead * (1 - expectT2CRate));
            fisher.Sample2.Failed  = (int)(totalRead * expectT2CRate);

            if (fisher.Sample1.Failed < fisher.Sample2.Failed)
            {
                return(1);
            }
            else
            {
                return(fisher.CalculateTwoTailPValue());
            }
        }
        public FisherExactTestResult FisherExactTest()
        {
            FisherExactTestResult result = new FisherExactTestResult();

            if (this.Count == 0)
            {
                return(result);
            }

            if (this.Count == 1 && this.ContainsKey(this.Reference))
            {
                return(result);
            }

            var counts = this.ToList().OrderByDescending(m => m.Value).ToList();

            result.Sample1.Name    = this.Reference.ToString();
            result.Sample1.Succeed = counts.Sum(m => m.Value);
            result.Sample1.Failed  = 0;

            if (counts[0].Key == this.Reference)
            {
                result.Sample2.Name    = counts[1].Key.ToString();
                result.Sample2.Succeed = counts[0].Value;
                result.Sample2.Failed  = counts[1].Value;
            }
            else
            {
                result.Sample2.Name   = counts[0].Key.ToString();
                result.Sample2.Failed = counts[0].Value;
                if (this.ContainsKey(this.Reference))
                {
                    result.Sample2.Succeed = this[this.Reference];
                }
                else
                {
                    result.Sample2.Succeed = 0;
                }
            }

            result.CalculateTwoTailPValue();
            return(result);
        }
    public void TestAccept()
    {
      //only sample2 will be used to do the test
      var f = new FisherExactTestResult();
      f.Sample1.Failed = 0;
      f.Sample1.Succeed = 0;
      f.Sample2.Failed = 3;
      f.Sample2.Succeed = 30;

      var t = new PileupItemTumorTest(1, 0.1);
      Assert.IsFalse(t.Accept(f));

      //test minimum percentage
      f.Sample2.Failed = 4;
      Assert.IsTrue(t.Accept(f));

      //test minimum reads
      t = new PileupItemTumorTest(5, 0.1);
      Assert.IsFalse(t.Accept(f));
    }
Example #5
0
        public override IEnumerable <string> Process(string fileName)
        {
            //Write the stream data of workbook to the root directory
            using (FileStream file = new FileStream(this.targetFile, FileMode.Create))
            {
                var book = new HSSFWorkbook();

                //cell style for hyperlinks
                //by default hyperlinks are blue and underlined
                var   hlink_style = book.CreateCellStyle();
                IFont hlink_font  = book.CreateFont();
                hlink_font.Underline = (byte)FontUnderlineType.SINGLE;
                hlink_font.Color     = HSSFColor.BLUE.index;
                hlink_style.SetFont(hlink_font);
                hlink_style.WrapText = true;

                var wrap_style = book.CreateCellStyle();
                wrap_style.WrapText = true;

                var numeric_style = book.CreateCellStyle();
                numeric_style.DataFormat = 0xb;

                Dictionary <string, string> genenames = File.Exists(this.affyAnnotationFile) ? AnnotationFile.GetGeneSymbolDescriptionMap(this.affyAnnotationFile) : new Dictionary <string, string>();
                ISheet all = book.CreateSheet("all");

                AnnovarGenomeSummaryItem item = new AnnovarGenomeSummaryItem();

                var sr      = new StreamReader(fileName);
                var headers = sr.ReadLine().Split(',').ToList();

                var geneIndex   = headers.FindIndex(m => m.Equals("Gene") || m.Equals("Gene.refGene"));
                var funcIndex   = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("Func") || m.Equals("Func.refGene")));
                var exonicIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("ExonicFunc") || m.Equals("ExonicFunc.refGene")));
                var dbsnpIndex  = FindIndex(geneIndex, headers.FindIndex(m => m.ToLower().StartsWith("dbsnp") || m.ToLower().StartsWith("snp")));
                var chrIndex    = headers.IndexOf("Chr");
                var startIndex  = headers.IndexOf("Start");
                var endIndex    = headers.IndexOf("End");

                var otherInfoIndex = headers.IndexOf("Otherinfo");

                //handle the headers. The length of headers may less than the data.
                var firstrow = all.CreateRow(0);
                for (int i = 0; i <= geneIndex; i++)
                {
                    firstrow.CreateCell(i).SetCellValue(headers[i]);
                }
                firstrow.CreateCell(geneIndex + 1).SetCellValue("Description");
                for (int i = geneIndex + 1; i < otherInfoIndex; i++)
                {
                    firstrow.CreateCell(i + 1).SetCellValue(headers[i]);
                }
                firstrow.CreateCell(otherInfoIndex + 1).SetCellValue("Location");

                bool?  isMuTect       = null;
                bool   isTableVersion = false; //using table_annovar.pl or summarize_annovar.pl
                bool   hasLOD         = false;
                double lod            = 0.0;
                //handle data
                using (CsvReader csv = new CsvReader(sr, false))
                {
                    int nRow = 0;
                    while (csv.ReadNextRecord())
                    {
                        if (!isMuTect.HasValue)
                        {
                            isTableVersion = csv.FieldCount == headers.Count;
                            isMuTect       = mutectRegex.Match(csv[csv.FieldCount - 2]).Success;
                            hasLOD         = double.TryParse(csv[csv.FieldCount - 1], out lod);
                            if (isMuTect.Value)
                            {
                                firstrow.CreateCell(otherInfoIndex + 2).SetCellValue("Normal");
                                firstrow.CreateCell(otherInfoIndex + 3).SetCellValue("Tumor");
                                firstrow.CreateCell(otherInfoIndex + 4).SetCellValue("FisherExactTest");
                                all.SetDefaultColumnStyle(otherInfoIndex + 4, numeric_style);
                                if (hasLOD)
                                {
                                    firstrow.CreateCell(otherInfoIndex + 5).SetCellValue("LOD_FStar");
                                    all.SetDefaultColumnStyle(otherInfoIndex + 5, numeric_style);
                                }
                            }
                            else
                            {
                                for (int i = otherInfoIndex; i < headers.Count; i++)
                                {
                                    firstrow.CreateCell(i + 2).SetCellValue(headers[i]);
                                }
                            }
                        }

                        nRow++;
                        var row = all.CreateRow(nRow);
                        for (int i = 0; i < geneIndex; i++)
                        {
                            row.CreateCell(i).SetCellValue(csv[i]);
                        }

                        //add link for gene symbol
                        item.GeneString = csv[geneIndex];
                        var cell = row.CreateCell(geneIndex);
                        cell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
                        {
                            Address = string.Format("http://www.genecards.org/cgi-bin/carddisp.pl?gene={0}", item.Genes[0].Name)
                        };
                        cell.CellStyle = hlink_style;
                        cell.SetCellValue((from g in item.Genes select g.Name).Merge("\n"));

                        //gene description
                        var desCell = row.CreateCell(geneIndex + 1);
                        desCell.CellStyle = wrap_style;
                        desCell.SetCellValue((from gene in item.Genes
                                              let description = genenames.ContainsKey(gene.Name) ? genenames[gene.Name] : " "
                                                                select description).Merge("\n"));

                        //add location information
                        for (int i = geneIndex + 1; i < otherInfoIndex; i++)
                        {
                            row.CreateCell(i + 1).SetCellValue(csv[i]);
                        }
                        var locationCell = row.CreateCell(otherInfoIndex + 1);
                        locationCell.SetCellValue(string.Format("{0}:{1}-{2}", csv[chrIndex], csv[startIndex], csv[endIndex]));

                        if (isMuTect.Value)
                        {
                            Match normal, tumor;
                            if (isTableVersion)
                            {
                                var parts = csv[csv.FieldCount - 1].Split('\t');
                                if (hasLOD)
                                {
                                    normal = mutectRegex.Match(parts[parts.Length - 3]);
                                    tumor  = mutectRegex.Match(parts[parts.Length - 2]);
                                    lod    = double.Parse(parts[parts.Length - 1]);
                                }
                                else
                                {
                                    normal = mutectRegex.Match(parts[parts.Length - 2]);
                                    tumor  = mutectRegex.Match(parts[parts.Length - 1]);
                                }
                            }
                            else
                            {
                                if (hasLOD)
                                {
                                    tumor  = mutectRegex.Match(csv[csv.FieldCount - 3]);
                                    normal = mutectRegex.Match(csv[csv.FieldCount - 2]);
                                    lod    = double.Parse(csv[csv.FieldCount - 1]);
                                }
                                else
                                {
                                    tumor  = mutectRegex.Match(csv[csv.FieldCount - 2]);
                                    normal = mutectRegex.Match(csv[csv.FieldCount - 1]);
                                }
                            }
                            FisherExactTestResult fetr = new FisherExactTestResult();
                            fetr.Sample1.Succeed = int.Parse(normal.Groups[1].Value);
                            fetr.Sample1.Failed  = int.Parse(normal.Groups[2].Value);
                            fetr.Sample2.Succeed = int.Parse(tumor.Groups[1].Value);
                            fetr.Sample2.Failed  = int.Parse(tumor.Groups[2].Value);

                            row.CreateCell(otherInfoIndex + 2).SetCellValue(string.Format("{0}:{1}", fetr.Sample1.Succeed, fetr.Sample1.Failed));
                            row.CreateCell(otherInfoIndex + 3).SetCellValue(string.Format("{0}:{1}", fetr.Sample2.Succeed, fetr.Sample2.Failed));
                            row.CreateCell(otherInfoIndex + 4).SetCellValue(fetr.CalculateTwoTailPValue());
                            if (hasLOD)
                            {
                                row.CreateCell(otherInfoIndex + 5).SetCellValue(lod);
                            }
                        }
                        else
                        {
                            for (int i = otherInfoIndex; i < csv.FieldCount; i++)
                            {
                                row.CreateCell(i + 2).SetCellValue(csv[i]);
                            }
                        }

                        if (dbsnpIndex > 0)
                        {
                            var dbsnpcell = row.GetCell(dbsnpIndex);
                            var dbsnp     = dbsnpcell.StringCellValue;
                            if (!string.IsNullOrEmpty(dbsnp))
                            {
                                dbsnpcell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
                                {
                                    Address = string.Format("http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs={0}", dbsnp.Substring(2))
                                };
                                dbsnpcell.CellStyle = (hlink_style);
                            }
                        }
                    }
                }

                all.SetColumnWidth(chrIndex, 5 * 256);
                all.SetColumnWidth(startIndex, 13 * 256);
                all.SetColumnWidth(endIndex, 13 * 256);
                all.SetColumnWidth(funcIndex, 15 * 256);
                all.SetColumnWidth(geneIndex, 13 * 256);
                all.SetColumnWidth(geneIndex + 1, 60 * 256);
                all.SetColumnWidth(exonicIndex, 20 * 256);
                all.SetColumnWidth(dbsnpIndex, 15 * 256);
                all.SetColumnWidth(otherInfoIndex + 1, 22 * 256);

                if (isMuTect.Value)
                {
                    all.SetColumnWidth(otherInfoIndex + 2, 10 * 256);
                    all.SetColumnWidth(otherInfoIndex + 3, 10 * 256);
                    all.SetColumnWidth(otherInfoIndex + 4, 10 * 256);
                    if (hasLOD)
                    {
                        all.SetColumnWidth(otherInfoIndex + 5, 10 * 256);
                    }
                }

                book.Write(file);
            }

            return(new string[] { targetFile });
        }
Example #6
0
 public static void PrepareCount(FisherExactTestResult.Sample sample, PairedEvent events, PileupBaseList bases)
 {
   foreach (var b in bases)
   {
     if (b.Event.Equals(events.MajorEvent))
     {
       sample.Succeed++;
     }
     else if (b.Event.Equals(events.MinorEvent))
     {
       sample.Failed++;
     }
   }
 }
Example #7
0
    public FisherExactTestResult InitializeTable(PairedEvent events)
    {
      var result = new FisherExactTestResult
      {
        Sample1 = { Name = Samples[0].SampleName },
        Sample2 = { Name = Samples[1].SampleName },
        SucceedName = events.MajorEvent,
        FailedName = events.MinorEvent
      };

      PrepareCount(result.Sample1, events, _samples[0]);
      PrepareCount(result.Sample2, events, _samples[1]);

      return result;
    }
    public static double CalculateT2CPvalue(int totalRead, int t2cRead, double expectT2CRate)
    {
      var fisher = new FisherExactTestResult();
      fisher.Sample1.Succeed = totalRead - t2cRead;
      fisher.Sample1.Failed = t2cRead;
      fisher.Sample2.Succeed = (int)(totalRead * (1 - expectT2CRate));
      fisher.Sample2.Failed = (int)(totalRead * expectT2CRate);

      if (fisher.Sample1.Failed < fisher.Sample2.Failed)
      {
        return 1;
      }
      else
      {
        return fisher.CalculateTwoTailPValue();
      }
    }
    public override IEnumerable<string> Process(string fileName)
    {
      //Write the stream data of workbook to the root directory
      using (FileStream file = new FileStream(this.targetFile, FileMode.Create))
      {
        var book = new HSSFWorkbook();

        //cell style for hyperlinks
        //by default hyperlinks are blue and underlined
        var hlink_style = book.CreateCellStyle();
        IFont hlink_font = book.CreateFont();
        hlink_font.Underline = (byte)FontUnderlineType.SINGLE;
        hlink_font.Color = HSSFColor.BLUE.index;
        hlink_style.SetFont(hlink_font);
        hlink_style.WrapText = true;

        var wrap_style = book.CreateCellStyle();
        wrap_style.WrapText = true;

        var numeric_style = book.CreateCellStyle();
        numeric_style.DataFormat = 0xb;

        Dictionary<string, string> genenames = File.Exists(this.affyAnnotationFile) ? AnnotationFile.GetGeneSymbolDescriptionMap(this.affyAnnotationFile) : new Dictionary<string, string>();
        ISheet all = book.CreateSheet("all");

        AnnovarGenomeSummaryItem item = new AnnovarGenomeSummaryItem();

        var sr = new StreamReader(fileName);
        var headers = sr.ReadLine().Split(',').ToList();

        var geneIndex = headers.FindIndex(m => m.Equals("Gene") || m.Equals("Gene.refGene"));
        var funcIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("Func") || m.Equals("Func.refGene")));
        var exonicIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("ExonicFunc") || m.Equals("ExonicFunc.refGene")));
        var dbsnpIndex = FindIndex(geneIndex, headers.FindIndex(m => m.ToLower().StartsWith("dbsnp") || m.ToLower().StartsWith("snp")));
        var chrIndex = headers.IndexOf("Chr");
        var startIndex = headers.IndexOf("Start");
        var endIndex = headers.IndexOf("End");

        var otherInfoIndex = headers.IndexOf("Otherinfo");

        //handle the headers. The length of headers may less than the data.
        var firstrow = all.CreateRow(0);
        for (int i = 0; i <= geneIndex; i++)
        {
          firstrow.CreateCell(i).SetCellValue(headers[i]);
        }
        firstrow.CreateCell(geneIndex + 1).SetCellValue("Description");
        for (int i = geneIndex + 1; i < otherInfoIndex; i++)
        {
          firstrow.CreateCell(i + 1).SetCellValue(headers[i]);
        }
        firstrow.CreateCell(otherInfoIndex + 1).SetCellValue("Location");

        bool? isMuTect = null;
        bool isTableVersion = false; //using table_annovar.pl or summarize_annovar.pl
        bool hasLOD = false;
        double lod = 0.0;
        //handle data
        using (CsvReader csv = new CsvReader(sr, false))
        {
          int nRow = 0;
          while (csv.ReadNextRecord())
          {
            if (!isMuTect.HasValue)
            {
              isTableVersion = csv.FieldCount == headers.Count;
              isMuTect = mutectRegex.Match(csv[csv.FieldCount - 2]).Success;
              hasLOD = double.TryParse(csv[csv.FieldCount - 1], out lod);
              if (isMuTect.Value)
              {
                firstrow.CreateCell(otherInfoIndex + 2).SetCellValue("Normal");
                firstrow.CreateCell(otherInfoIndex + 3).SetCellValue("Tumor");
                firstrow.CreateCell(otherInfoIndex + 4).SetCellValue("FisherExactTest");
                all.SetDefaultColumnStyle(otherInfoIndex + 4, numeric_style);
                if (hasLOD)
                {
                  firstrow.CreateCell(otherInfoIndex + 5).SetCellValue("LOD_FStar");
                  all.SetDefaultColumnStyle(otherInfoIndex + 5, numeric_style);
                }
              }
              else
              {
                for (int i = otherInfoIndex; i < headers.Count; i++)
                {
                  firstrow.CreateCell(i + 2).SetCellValue(headers[i]);
                }
              }
            }

            nRow++;
            var row = all.CreateRow(nRow);
            for (int i = 0; i < geneIndex; i++)
            {
              row.CreateCell(i).SetCellValue(csv[i]);
            }

            //add link for gene symbol
            item.GeneString = csv[geneIndex];
            var cell = row.CreateCell(geneIndex);
            cell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
            {
              Address = string.Format("http://www.genecards.org/cgi-bin/carddisp.pl?gene={0}", item.Genes[0].Name)
            };
            cell.CellStyle = hlink_style;
            cell.SetCellValue((from g in item.Genes select g.Name).Merge("\n"));

            //gene description
            var desCell = row.CreateCell(geneIndex + 1);
            desCell.CellStyle = wrap_style;
            desCell.SetCellValue((from gene in item.Genes
                                  let description = genenames.ContainsKey(gene.Name) ? genenames[gene.Name] : " "
                                  select description).Merge("\n"));

            //add location information
            for (int i = geneIndex + 1; i < otherInfoIndex; i++)
            {
              row.CreateCell(i + 1).SetCellValue(csv[i]);
            }
            var locationCell = row.CreateCell(otherInfoIndex + 1);
            locationCell.SetCellValue(string.Format("{0}:{1}-{2}", csv[chrIndex], csv[startIndex], csv[endIndex]));

            if (isMuTect.Value)
            {
              Match normal, tumor;
              if (isTableVersion)
              {
                var parts = csv[csv.FieldCount - 1].Split('\t');
                if (hasLOD)
                {
                  normal = mutectRegex.Match(parts[parts.Length - 3]);
                  tumor = mutectRegex.Match(parts[parts.Length - 2]);
                  lod = double.Parse(parts[parts.Length - 1]);
                }
                else
                {
                  normal = mutectRegex.Match(parts[parts.Length - 2]);
                  tumor = mutectRegex.Match(parts[parts.Length - 1]);
                }
              }
              else
              {
                if (hasLOD)
                {
                  tumor = mutectRegex.Match(csv[csv.FieldCount - 3]);
                  normal = mutectRegex.Match(csv[csv.FieldCount - 2]);
                  lod = double.Parse(csv[csv.FieldCount - 1]);
                }
                else
                {
                  tumor = mutectRegex.Match(csv[csv.FieldCount - 2]);
                  normal = mutectRegex.Match(csv[csv.FieldCount - 1]);
                }
              }
              FisherExactTestResult fetr = new FisherExactTestResult();
              fetr.Sample1.Succeed = int.Parse(normal.Groups[1].Value);
              fetr.Sample1.Failed = int.Parse(normal.Groups[2].Value);
              fetr.Sample2.Succeed = int.Parse(tumor.Groups[1].Value);
              fetr.Sample2.Failed = int.Parse(tumor.Groups[2].Value);

              row.CreateCell(otherInfoIndex + 2).SetCellValue(string.Format("{0}:{1}", fetr.Sample1.Succeed, fetr.Sample1.Failed));
              row.CreateCell(otherInfoIndex + 3).SetCellValue(string.Format("{0}:{1}", fetr.Sample2.Succeed, fetr.Sample2.Failed));
              row.CreateCell(otherInfoIndex + 4).SetCellValue(fetr.CalculateTwoTailPValue());
              if (hasLOD)
              {
                row.CreateCell(otherInfoIndex + 5).SetCellValue(lod);
              }
            }
            else
            {
              for (int i = otherInfoIndex; i < csv.FieldCount; i++)
              {
                row.CreateCell(i + 2).SetCellValue(csv[i]);
              }
            }

            if (dbsnpIndex > 0)
            {
              var dbsnpcell = row.GetCell(dbsnpIndex);
              var dbsnp = dbsnpcell.StringCellValue;
              if (!string.IsNullOrEmpty(dbsnp))
              {
                dbsnpcell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
                {
                  Address = string.Format("http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs={0}", dbsnp.Substring(2))
                };
                dbsnpcell.CellStyle = (hlink_style);
              }
            }
          }
        }

        all.SetColumnWidth(chrIndex, 5 * 256);
        all.SetColumnWidth(startIndex, 13 * 256);
        all.SetColumnWidth(endIndex, 13 * 256);
        all.SetColumnWidth(funcIndex, 15 * 256);
        all.SetColumnWidth(geneIndex, 13 * 256);
        all.SetColumnWidth(geneIndex + 1, 60 * 256);
        all.SetColumnWidth(exonicIndex, 20 * 256);
        all.SetColumnWidth(dbsnpIndex, 15 * 256);
        all.SetColumnWidth(otherInfoIndex + 1, 22 * 256);

        if (isMuTect.Value)
        {
          all.SetColumnWidth(otherInfoIndex + 2, 10 * 256);
          all.SetColumnWidth(otherInfoIndex + 3, 10 * 256);
          all.SetColumnWidth(otherInfoIndex + 4, 10 * 256);
          if (hasLOD)
          {
            all.SetColumnWidth(otherInfoIndex + 5, 10 * 256);
          }
        }

        book.Write(file);
      }

      return new string[] { targetFile };
    }
 public bool Accept(FisherExactTestResult result)
 {
   return result.Sample2.Failed >= minReads && result.Sample2.FailedPercentage >= minPrecentage;
 }
 public bool Accept(FisherExactTestResult result)
 {
   return result.Sample1.FailedPercentage <= this.maxPrecentage;
 }
Example #12
0
 public bool Accept(FisherExactTestResult result)
 {
     return(result.Sample2.Failed >= minReads && result.Sample2.FailedPercentage >= minPrecentage);
 }
Example #13
0
 public bool Accept(FisherExactTestResult result)
 {
     return(result.Sample1.FailedPercentage <= this.maxPrecentage);
 }
    public override IEnumerable<string> Process()
    {
      //Write the stream data of workbook to the root directory
      using (FileStream file = new FileStream(options.OutputFile, FileMode.Create))
      {
        var book = new HSSFWorkbook();

        //cell style for hyperlinks
        //by default hyperlinks are blue and underlined
        var hlinkStyle = book.CreateCellStyle();
        var hlinkFont = book.CreateFont();
        hlinkFont.Underline = (byte)FontUnderlineType.SINGLE;
        hlinkFont.Color = HSSFColor.BLUE.index;
        hlinkStyle.SetFont(hlinkFont);
        hlinkStyle.WrapText = true;

        var wrapStyle = book.CreateCellStyle();
        wrapStyle.WrapText = true;

        var numericStyle = book.CreateCellStyle();
        numericStyle.DataFormat = 0xb;

        var genenames = File.Exists(options.AffyAnnotationFile) ? AnnotationFile.GetGeneSymbolDescriptionMap(options.AffyAnnotationFile) : new Dictionary<string, string>();
        var all = book.CreateSheet("all");

        var item = new AnnovarGenomeSummaryItem();

        using (var sr = new StreamReader(options.InputFile))
        {
          int nRow = 0;

          bool isMutect = false;
          bool hasLod = false;
          string tumorSampleName = "", normalSampleName = "";
          string line;
          //ignore the comments
          while ((line = sr.ReadLine()) != null)
          {
            if (!line.StartsWith("#"))
            {
              break;
            }

            var row = all.CreateRow(nRow++);
            row.CreateCell(0).SetCellValue(line);

            if (line.StartsWith("##INFO=<ID=LOD"))
            {
              hasLod = true;
            }

            if (line.StartsWith("##MuTect"))
            {
              isMutect = true;
              tumorSampleName = line.StringAfter("tumor_sample_name=").StringBefore(" ");
              normalSampleName = line.StringAfter("normal_sample_name=").StringBefore(" ");
            }
          }

          if (line == null)
          {
            throw new Exception("No entries in file " + options.InputFile);
          }

          var headers = line.Split('\t').ToList();

          //original index
          var geneIndex = headers.FindIndex(m => m.Equals("Gene") || m.Equals("Gene.refGene"));
          var oldInfoIndex = headers.FindIndex(m => m.ToLower().StartsWith("info"));
          var newInfoIndex = FindIndex(oldInfoIndex, geneIndex);

          //relative index
          var funcIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("Func") || m.Equals("Func.refGene")));
          var exonicIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("ExonicFunc") || m.Equals("ExonicFunc.refGene")));
          var dbsnpIndex = FindIndex(geneIndex, headers.FindIndex(m => m.ToLower().StartsWith("dbsnp") || m.ToLower().StartsWith("snp")));
          var chrIndex = headers.IndexOf("Chr");
          var startIndex = headers.IndexOf("Start");
          var endIndex = headers.IndexOf("End");
          var tumorIndex = headers.IndexOf(tumorSampleName);
          var normalIndex = headers.IndexOf(normalSampleName);

          hasLod = hasLod && oldInfoIndex != -1;

          //handle the headers. The length of headers may less than the data.
          var firstrow = all.CreateRow(nRow++);
          for (int i = 0; i <= geneIndex; i++)
          {
            firstrow.CreateCell(i).SetCellValue(headers[i]);
          }
          firstrow.CreateCell(geneIndex + 1).SetCellValue("Description");
          for (int i = geneIndex + 1; i < headers.Count; i++)
          {
            if (isMutect)
            {
              if (i == tumorIndex)
              {
                firstrow.CreateCell(i + 1).SetCellValue("Tumor:" + tumorSampleName);
                continue;
              }

              if (i == normalIndex)
              {
                firstrow.CreateCell(i + 1).SetCellValue("Normal:" + normalSampleName);
                continue;
              }
            }
            firstrow.CreateCell(i + 1).SetCellValue(headers[i]);
          }

          var lastcol = headers.Count + 1;
          if (hasLod)
          {
            firstrow.CreateCell(lastcol++).SetCellValue("TLodFstar");
          }

          if (isMutect)
          {
            firstrow.CreateCell(lastcol++).SetCellValue("NormalAlleles");
            firstrow.CreateCell(lastcol++).SetCellValue("TumorAlleles");
            firstrow.CreateCell(lastcol++).SetCellValue("FisherExactTest");
          }

          while ((line = sr.ReadLine()) != null)
          {
            var parts = line.Split('\t');
            if (parts.Length < geneIndex)
            {
              break;
            }

            var row = all.CreateRow(nRow++);
            for (int i = 0; i < geneIndex; i++)
            {
              row.CreateCell(i).SetCellValue(parts[i]);
            }

            item.GeneString = parts[geneIndex];
            if (item.Genes.Count > 0)
            {
              //add link for gene symbol
              var cell = row.CreateCell(geneIndex);
              cell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
              {
                Address = string.Format("http://www.genecards.org/cgi-bin/carddisp.pl?gene={0}", item.Genes[0].Name)
              };
              cell.CellStyle = hlinkStyle;
              cell.SetCellValue((from g in item.Genes select g.Name).Merge("\n"));

              //gene description
              var desCell = row.CreateCell(geneIndex + 1);
              desCell.CellStyle = wrapStyle;
              desCell.SetCellValue((from gene in item.Genes
                                    let description = genenames.ContainsKey(gene.Name) ? genenames[gene.Name] : " "
                                    select description).Merge("\n"));
            }

            //add other information
            for (int i = geneIndex + 1; i < headers.Count; i++)
            {
              row.CreateCell(i + 1).SetCellValue(parts[i]);
            }

            lastcol = headers.Count + 1;
            if (hasLod)
            {
              row.CreateCell(lastcol++).SetCellValue(parts[oldInfoIndex].StringAfter("LOD=").StringBefore(";"));
            }

            if (isMutect)
            {
              Match normal = SomaticMutationUtils.MutectPattern.Match(parts[normalIndex]);
              Match tumor = SomaticMutationUtils.MutectPattern.Match(parts[tumorIndex]);

              var fetr = new FisherExactTestResult();
              fetr.Sample1.Succeed = int.Parse(normal.Groups[1].Value);
              fetr.Sample1.Failed = int.Parse(normal.Groups[2].Value);
              fetr.Sample2.Succeed = int.Parse(tumor.Groups[1].Value);
              fetr.Sample2.Failed = int.Parse(tumor.Groups[2].Value);

              row.CreateCell(lastcol++).SetCellValue(string.Format("{0}:{1}", fetr.Sample1.Succeed, fetr.Sample1.Failed));
              row.CreateCell(lastcol++).SetCellValue(string.Format("{0}:{1}", fetr.Sample2.Succeed, fetr.Sample2.Failed));
              row.CreateCell(lastcol).SetCellValue(fetr.CalculateTwoTailPValue());
            }

            if (dbsnpIndex > 0)
            {
              var dbsnpcell = row.GetCell(dbsnpIndex);
              var dbsnp = dbsnpcell.StringCellValue;
              if (!string.IsNullOrEmpty(dbsnp))
              {
                dbsnpcell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
                {
                  Address = string.Format("http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs={0}", dbsnp.Substring(2))
                };
                dbsnpcell.CellStyle = (hlinkStyle);
              }
            }
          }


          all.SetColumnWidth(chrIndex, 5 * 256);
          all.SetColumnWidth(startIndex, 11 * 256);
          all.SetColumnWidth(endIndex, 11 * 256);
          all.SetColumnWidth(funcIndex, 15 * 256);
          all.SetColumnWidth(geneIndex, 15 * 256);
          all.SetColumnWidth(geneIndex + 1, 60 * 256);
          all.SetColumnWidth(exonicIndex, 20 * 256);
          all.SetColumnWidth(dbsnpIndex, 15 * 256);

          lastcol = headers.Count + 1;
          if (hasLod)
          {
            all.SetColumnWidth(newInfoIndex, 15 * 256);
            all.SetColumnWidth(lastcol++, 10 * 256);
          }

          if (isMutect)
          {
            all.SetColumnWidth(lastcol++, 10 * 256);
            all.SetColumnWidth(lastcol++, 10 * 256);
            all.SetColumnWidth(lastcol, 10 * 256);
          }
        }
        book.Write(file);
      }
      return new string[] { options.OutputFile };
    }
Example #15
0
        public override IEnumerable <string> Process()
        {
            //Write the stream data of workbook to the root directory
            using (FileStream file = new FileStream(options.OutputFile, FileMode.Create))
            {
                var book = new HSSFWorkbook();

                //cell style for hyperlinks
                //by default hyperlinks are blue and underlined
                var hlinkStyle = book.CreateCellStyle();
                var hlinkFont  = book.CreateFont();
                hlinkFont.Underline = (byte)FontUnderlineType.SINGLE;
                hlinkFont.Color     = HSSFColor.BLUE.index;
                hlinkStyle.SetFont(hlinkFont);
                hlinkStyle.WrapText = true;

                var wrapStyle = book.CreateCellStyle();
                wrapStyle.WrapText = true;

                var numericStyle = book.CreateCellStyle();
                numericStyle.DataFormat = 0xb;

                var genenames = File.Exists(options.AffyAnnotationFile) ? AnnotationFile.GetGeneSymbolDescriptionMap(options.AffyAnnotationFile) : new Dictionary <string, string>();
                var all       = book.CreateSheet("all");

                var item = new AnnovarGenomeSummaryItem();

                using (var sr = new StreamReader(options.InputFile))
                {
                    int nRow = 0;

                    bool   isMutect = false;
                    bool   hasLod = false;
                    string tumorSampleName = "", normalSampleName = "";
                    string line;
                    //ignore the comments
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (!line.StartsWith("#"))
                        {
                            break;
                        }

                        var row = all.CreateRow(nRow++);
                        row.CreateCell(0).SetCellValue(line);

                        if (line.StartsWith("##INFO=<ID=LOD"))
                        {
                            hasLod = true;
                        }

                        if (line.StartsWith("##MuTect"))
                        {
                            isMutect         = true;
                            tumorSampleName  = line.StringAfter("tumor_sample_name=").StringBefore(" ");
                            normalSampleName = line.StringAfter("normal_sample_name=").StringBefore(" ");
                        }
                    }

                    if (line == null)
                    {
                        throw new Exception("No entries in file " + options.InputFile);
                    }

                    var headers = line.Split('\t').ToList();

                    //original index
                    var geneIndex    = headers.FindIndex(m => m.Equals("Gene") || m.Equals("Gene.refGene"));
                    var oldInfoIndex = headers.FindIndex(m => m.ToLower().StartsWith("info"));
                    var newInfoIndex = FindIndex(oldInfoIndex, geneIndex);

                    //relative index
                    var funcIndex   = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("Func") || m.Equals("Func.refGene")));
                    var exonicIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("ExonicFunc") || m.Equals("ExonicFunc.refGene")));
                    var dbsnpIndex  = FindIndex(geneIndex, headers.FindIndex(m => m.ToLower().StartsWith("dbsnp") || m.ToLower().StartsWith("snp")));
                    var chrIndex    = headers.IndexOf("Chr");
                    var startIndex  = headers.IndexOf("Start");
                    var endIndex    = headers.IndexOf("End");
                    var tumorIndex  = headers.IndexOf(tumorSampleName);
                    var normalIndex = headers.IndexOf(normalSampleName);

                    hasLod = hasLod && oldInfoIndex != -1;

                    //handle the headers. The length of headers may less than the data.
                    var firstrow = all.CreateRow(nRow++);
                    for (int i = 0; i <= geneIndex; i++)
                    {
                        firstrow.CreateCell(i).SetCellValue(headers[i]);
                    }
                    firstrow.CreateCell(geneIndex + 1).SetCellValue("Description");
                    for (int i = geneIndex + 1; i < headers.Count; i++)
                    {
                        if (isMutect)
                        {
                            if (i == tumorIndex)
                            {
                                firstrow.CreateCell(i + 1).SetCellValue("Tumor:" + tumorSampleName);
                                continue;
                            }

                            if (i == normalIndex)
                            {
                                firstrow.CreateCell(i + 1).SetCellValue("Normal:" + normalSampleName);
                                continue;
                            }
                        }
                        firstrow.CreateCell(i + 1).SetCellValue(headers[i]);
                    }

                    var lastcol = headers.Count + 1;
                    if (hasLod)
                    {
                        firstrow.CreateCell(lastcol++).SetCellValue("TLodFstar");
                    }

                    if (isMutect)
                    {
                        firstrow.CreateCell(lastcol++).SetCellValue("NormalAlleles");
                        firstrow.CreateCell(lastcol++).SetCellValue("TumorAlleles");
                        firstrow.CreateCell(lastcol++).SetCellValue("FisherExactTest");
                    }

                    while ((line = sr.ReadLine()) != null)
                    {
                        var parts = line.Split('\t');
                        if (parts.Length < geneIndex)
                        {
                            break;
                        }

                        var row = all.CreateRow(nRow++);
                        for (int i = 0; i < geneIndex; i++)
                        {
                            row.CreateCell(i).SetCellValue(parts[i]);
                        }

                        item.GeneString = parts[geneIndex];
                        if (item.Genes.Count > 0)
                        {
                            //add link for gene symbol
                            var cell = row.CreateCell(geneIndex);
                            cell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
                            {
                                Address = string.Format("http://www.genecards.org/cgi-bin/carddisp.pl?gene={0}", item.Genes[0].Name)
                            };
                            cell.CellStyle = hlinkStyle;
                            cell.SetCellValue((from g in item.Genes select g.Name).Merge("\n"));

                            //gene description
                            var desCell = row.CreateCell(geneIndex + 1);
                            desCell.CellStyle = wrapStyle;
                            desCell.SetCellValue((from gene in item.Genes
                                                  let description = genenames.ContainsKey(gene.Name) ? genenames[gene.Name] : " "
                                                                    select description).Merge("\n"));
                        }

                        //add other information
                        for (int i = geneIndex + 1; i < headers.Count; i++)
                        {
                            row.CreateCell(i + 1).SetCellValue(parts[i]);
                        }

                        lastcol = headers.Count + 1;
                        if (hasLod)
                        {
                            row.CreateCell(lastcol++).SetCellValue(parts[oldInfoIndex].StringAfter("LOD=").StringBefore(";"));
                        }

                        if (isMutect)
                        {
                            Match normal = SomaticMutationUtils.MutectPattern.Match(parts[normalIndex]);
                            Match tumor  = SomaticMutationUtils.MutectPattern.Match(parts[tumorIndex]);

                            var fetr = new FisherExactTestResult();
                            fetr.Sample1.Succeed = int.Parse(normal.Groups[1].Value);
                            fetr.Sample1.Failed  = int.Parse(normal.Groups[2].Value);
                            fetr.Sample2.Succeed = int.Parse(tumor.Groups[1].Value);
                            fetr.Sample2.Failed  = int.Parse(tumor.Groups[2].Value);

                            row.CreateCell(lastcol++).SetCellValue(string.Format("{0}:{1}", fetr.Sample1.Succeed, fetr.Sample1.Failed));
                            row.CreateCell(lastcol++).SetCellValue(string.Format("{0}:{1}", fetr.Sample2.Succeed, fetr.Sample2.Failed));
                            row.CreateCell(lastcol).SetCellValue(fetr.CalculateTwoTailPValue());
                        }

                        if (dbsnpIndex > 0)
                        {
                            var dbsnpcell = row.GetCell(dbsnpIndex);
                            var dbsnp     = dbsnpcell.StringCellValue;
                            if (!string.IsNullOrEmpty(dbsnp))
                            {
                                dbsnpcell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
                                {
                                    Address = string.Format("http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs={0}", dbsnp.Substring(2))
                                };
                                dbsnpcell.CellStyle = (hlinkStyle);
                            }
                        }
                    }


                    all.SetColumnWidth(chrIndex, 5 * 256);
                    all.SetColumnWidth(startIndex, 11 * 256);
                    all.SetColumnWidth(endIndex, 11 * 256);
                    all.SetColumnWidth(funcIndex, 15 * 256);
                    all.SetColumnWidth(geneIndex, 15 * 256);
                    all.SetColumnWidth(geneIndex + 1, 60 * 256);
                    all.SetColumnWidth(exonicIndex, 20 * 256);
                    all.SetColumnWidth(dbsnpIndex, 15 * 256);

                    lastcol = headers.Count + 1;
                    if (hasLod)
                    {
                        all.SetColumnWidth(newInfoIndex, 15 * 256);
                        all.SetColumnWidth(lastcol++, 10 * 256);
                    }

                    if (isMutect)
                    {
                        all.SetColumnWidth(lastcol++, 10 * 256);
                        all.SetColumnWidth(lastcol++, 10 * 256);
                        all.SetColumnWidth(lastcol, 10 * 256);
                    }
                }
                book.Write(file);
            }
            return(new string[] { options.OutputFile });
        }