Exemple #1
0
        public override IEnumerable <string> Process(string fileName)
        {
            //Write the stream data of workbook to the root directory
            using (FileStream file = new FileStream(this.targetFile, FileMode.Create))
            {
                var book = new HSSFWorkbook();

                //cell style for hyperlinks
                //by default hyperlinks are blue and underlined
                var   hlink_style = book.CreateCellStyle();
                IFont hlink_font  = book.CreateFont();
                hlink_font.Underline = (byte)FontUnderlineType.SINGLE;
                hlink_font.Color     = HSSFColor.BLUE.index;
                hlink_style.SetFont(hlink_font);
                hlink_style.WrapText = true;

                var wrap_style = book.CreateCellStyle();
                wrap_style.WrapText = true;

                var numeric_style = book.CreateCellStyle();
                numeric_style.DataFormat = 0xb;

                Dictionary <string, string> genenames = File.Exists(this.affyAnnotationFile) ? AnnotationFile.GetGeneSymbolDescriptionMap(this.affyAnnotationFile) : new Dictionary <string, string>();
                ISheet all = book.CreateSheet("all");

                AnnovarGenomeSummaryItem item = new AnnovarGenomeSummaryItem();

                var sr      = new StreamReader(fileName);
                var headers = sr.ReadLine().Split(',').ToList();

                var geneIndex   = headers.FindIndex(m => m.Equals("Gene") || m.Equals("Gene.refGene"));
                var funcIndex   = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("Func") || m.Equals("Func.refGene")));
                var exonicIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("ExonicFunc") || m.Equals("ExonicFunc.refGene")));
                var dbsnpIndex  = FindIndex(geneIndex, headers.FindIndex(m => m.ToLower().StartsWith("dbsnp") || m.ToLower().StartsWith("snp")));
                var chrIndex    = headers.IndexOf("Chr");
                var startIndex  = headers.IndexOf("Start");
                var endIndex    = headers.IndexOf("End");

                var otherInfoIndex = headers.IndexOf("Otherinfo");

                //handle the headers. The length of headers may less than the data.
                var firstrow = all.CreateRow(0);
                for (int i = 0; i <= geneIndex; i++)
                {
                    firstrow.CreateCell(i).SetCellValue(headers[i]);
                }
                firstrow.CreateCell(geneIndex + 1).SetCellValue("Description");
                for (int i = geneIndex + 1; i < otherInfoIndex; i++)
                {
                    firstrow.CreateCell(i + 1).SetCellValue(headers[i]);
                }
                firstrow.CreateCell(otherInfoIndex + 1).SetCellValue("Location");

                bool?  isMuTect       = null;
                bool   isTableVersion = false; //using table_annovar.pl or summarize_annovar.pl
                bool   hasLOD         = false;
                double lod            = 0.0;
                //handle data
                using (CsvReader csv = new CsvReader(sr, false))
                {
                    int nRow = 0;
                    while (csv.ReadNextRecord())
                    {
                        if (!isMuTect.HasValue)
                        {
                            isTableVersion = csv.FieldCount == headers.Count;
                            isMuTect       = mutectRegex.Match(csv[csv.FieldCount - 2]).Success;
                            hasLOD         = double.TryParse(csv[csv.FieldCount - 1], out lod);
                            if (isMuTect.Value)
                            {
                                firstrow.CreateCell(otherInfoIndex + 2).SetCellValue("Normal");
                                firstrow.CreateCell(otherInfoIndex + 3).SetCellValue("Tumor");
                                firstrow.CreateCell(otherInfoIndex + 4).SetCellValue("FisherExactTest");
                                all.SetDefaultColumnStyle(otherInfoIndex + 4, numeric_style);
                                if (hasLOD)
                                {
                                    firstrow.CreateCell(otherInfoIndex + 5).SetCellValue("LOD_FStar");
                                    all.SetDefaultColumnStyle(otherInfoIndex + 5, numeric_style);
                                }
                            }
                            else
                            {
                                for (int i = otherInfoIndex; i < headers.Count; i++)
                                {
                                    firstrow.CreateCell(i + 2).SetCellValue(headers[i]);
                                }
                            }
                        }

                        nRow++;
                        var row = all.CreateRow(nRow);
                        for (int i = 0; i < geneIndex; i++)
                        {
                            row.CreateCell(i).SetCellValue(csv[i]);
                        }

                        //add link for gene symbol
                        item.GeneString = csv[geneIndex];
                        var cell = row.CreateCell(geneIndex);
                        cell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
                        {
                            Address = string.Format("http://www.genecards.org/cgi-bin/carddisp.pl?gene={0}", item.Genes[0].Name)
                        };
                        cell.CellStyle = hlink_style;
                        cell.SetCellValue((from g in item.Genes select g.Name).Merge("\n"));

                        //gene description
                        var desCell = row.CreateCell(geneIndex + 1);
                        desCell.CellStyle = wrap_style;
                        desCell.SetCellValue((from gene in item.Genes
                                              let description = genenames.ContainsKey(gene.Name) ? genenames[gene.Name] : " "
                                                                select description).Merge("\n"));

                        //add location information
                        for (int i = geneIndex + 1; i < otherInfoIndex; i++)
                        {
                            row.CreateCell(i + 1).SetCellValue(csv[i]);
                        }
                        var locationCell = row.CreateCell(otherInfoIndex + 1);
                        locationCell.SetCellValue(string.Format("{0}:{1}-{2}", csv[chrIndex], csv[startIndex], csv[endIndex]));

                        if (isMuTect.Value)
                        {
                            Match normal, tumor;
                            if (isTableVersion)
                            {
                                var parts = csv[csv.FieldCount - 1].Split('\t');
                                if (hasLOD)
                                {
                                    normal = mutectRegex.Match(parts[parts.Length - 3]);
                                    tumor  = mutectRegex.Match(parts[parts.Length - 2]);
                                    lod    = double.Parse(parts[parts.Length - 1]);
                                }
                                else
                                {
                                    normal = mutectRegex.Match(parts[parts.Length - 2]);
                                    tumor  = mutectRegex.Match(parts[parts.Length - 1]);
                                }
                            }
                            else
                            {
                                if (hasLOD)
                                {
                                    tumor  = mutectRegex.Match(csv[csv.FieldCount - 3]);
                                    normal = mutectRegex.Match(csv[csv.FieldCount - 2]);
                                    lod    = double.Parse(csv[csv.FieldCount - 1]);
                                }
                                else
                                {
                                    tumor  = mutectRegex.Match(csv[csv.FieldCount - 2]);
                                    normal = mutectRegex.Match(csv[csv.FieldCount - 1]);
                                }
                            }
                            FisherExactTestResult fetr = new FisherExactTestResult();
                            fetr.Sample1.Succeed = int.Parse(normal.Groups[1].Value);
                            fetr.Sample1.Failed  = int.Parse(normal.Groups[2].Value);
                            fetr.Sample2.Succeed = int.Parse(tumor.Groups[1].Value);
                            fetr.Sample2.Failed  = int.Parse(tumor.Groups[2].Value);

                            row.CreateCell(otherInfoIndex + 2).SetCellValue(string.Format("{0}:{1}", fetr.Sample1.Succeed, fetr.Sample1.Failed));
                            row.CreateCell(otherInfoIndex + 3).SetCellValue(string.Format("{0}:{1}", fetr.Sample2.Succeed, fetr.Sample2.Failed));
                            row.CreateCell(otherInfoIndex + 4).SetCellValue(fetr.CalculateTwoTailPValue());
                            if (hasLOD)
                            {
                                row.CreateCell(otherInfoIndex + 5).SetCellValue(lod);
                            }
                        }
                        else
                        {
                            for (int i = otherInfoIndex; i < csv.FieldCount; i++)
                            {
                                row.CreateCell(i + 2).SetCellValue(csv[i]);
                            }
                        }

                        if (dbsnpIndex > 0)
                        {
                            var dbsnpcell = row.GetCell(dbsnpIndex);
                            var dbsnp     = dbsnpcell.StringCellValue;
                            if (!string.IsNullOrEmpty(dbsnp))
                            {
                                dbsnpcell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
                                {
                                    Address = string.Format("http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs={0}", dbsnp.Substring(2))
                                };
                                dbsnpcell.CellStyle = (hlink_style);
                            }
                        }
                    }
                }

                all.SetColumnWidth(chrIndex, 5 * 256);
                all.SetColumnWidth(startIndex, 13 * 256);
                all.SetColumnWidth(endIndex, 13 * 256);
                all.SetColumnWidth(funcIndex, 15 * 256);
                all.SetColumnWidth(geneIndex, 13 * 256);
                all.SetColumnWidth(geneIndex + 1, 60 * 256);
                all.SetColumnWidth(exonicIndex, 20 * 256);
                all.SetColumnWidth(dbsnpIndex, 15 * 256);
                all.SetColumnWidth(otherInfoIndex + 1, 22 * 256);

                if (isMuTect.Value)
                {
                    all.SetColumnWidth(otherInfoIndex + 2, 10 * 256);
                    all.SetColumnWidth(otherInfoIndex + 3, 10 * 256);
                    all.SetColumnWidth(otherInfoIndex + 4, 10 * 256);
                    if (hasLOD)
                    {
                        all.SetColumnWidth(otherInfoIndex + 5, 10 * 256);
                    }
                }

                book.Write(file);
            }

            return(new string[] { targetFile });
        }
Exemple #2
0
        public override IEnumerable <string> Process()
        {
            //Write the stream data of workbook to the root directory
            using (FileStream file = new FileStream(options.OutputFile, FileMode.Create))
            {
                var book = new HSSFWorkbook();

                //cell style for hyperlinks
                //by default hyperlinks are blue and underlined
                var hlinkStyle = book.CreateCellStyle();
                var hlinkFont  = book.CreateFont();
                hlinkFont.Underline = (byte)FontUnderlineType.SINGLE;
                hlinkFont.Color     = HSSFColor.BLUE.index;
                hlinkStyle.SetFont(hlinkFont);
                hlinkStyle.WrapText = true;

                var wrapStyle = book.CreateCellStyle();
                wrapStyle.WrapText = true;

                var numericStyle = book.CreateCellStyle();
                numericStyle.DataFormat = 0xb;

                var genenames = File.Exists(options.AffyAnnotationFile) ? AnnotationFile.GetGeneSymbolDescriptionMap(options.AffyAnnotationFile) : new Dictionary <string, string>();
                var all       = book.CreateSheet("all");

                var item = new AnnovarGenomeSummaryItem();

                using (var sr = new StreamReader(options.InputFile))
                {
                    int nRow = 0;

                    bool   isMutect = false;
                    bool   hasLod = false;
                    string tumorSampleName = "", normalSampleName = "";
                    string line;
                    //ignore the comments
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (!line.StartsWith("#"))
                        {
                            break;
                        }

                        var row = all.CreateRow(nRow++);
                        row.CreateCell(0).SetCellValue(line);

                        if (line.StartsWith("##INFO=<ID=LOD"))
                        {
                            hasLod = true;
                        }

                        if (line.StartsWith("##MuTect"))
                        {
                            isMutect         = true;
                            tumorSampleName  = line.StringAfter("tumor_sample_name=").StringBefore(" ");
                            normalSampleName = line.StringAfter("normal_sample_name=").StringBefore(" ");
                        }
                    }

                    if (line == null)
                    {
                        throw new Exception("No entries in file " + options.InputFile);
                    }

                    var headers = line.Split('\t').ToList();

                    //original index
                    var geneIndex    = headers.FindIndex(m => m.Equals("Gene") || m.Equals("Gene.refGene"));
                    var oldInfoIndex = headers.FindIndex(m => m.ToLower().StartsWith("info"));
                    var newInfoIndex = FindIndex(oldInfoIndex, geneIndex);

                    //relative index
                    var funcIndex   = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("Func") || m.Equals("Func.refGene")));
                    var exonicIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("ExonicFunc") || m.Equals("ExonicFunc.refGene")));
                    var dbsnpIndex  = FindIndex(geneIndex, headers.FindIndex(m => m.ToLower().StartsWith("dbsnp") || m.ToLower().StartsWith("snp")));
                    var chrIndex    = headers.IndexOf("Chr");
                    var startIndex  = headers.IndexOf("Start");
                    var endIndex    = headers.IndexOf("End");
                    var tumorIndex  = headers.IndexOf(tumorSampleName);
                    var normalIndex = headers.IndexOf(normalSampleName);

                    hasLod = hasLod && oldInfoIndex != -1;

                    //handle the headers. The length of headers may less than the data.
                    var firstrow = all.CreateRow(nRow++);
                    for (int i = 0; i <= geneIndex; i++)
                    {
                        firstrow.CreateCell(i).SetCellValue(headers[i]);
                    }
                    firstrow.CreateCell(geneIndex + 1).SetCellValue("Description");
                    for (int i = geneIndex + 1; i < headers.Count; i++)
                    {
                        if (isMutect)
                        {
                            if (i == tumorIndex)
                            {
                                firstrow.CreateCell(i + 1).SetCellValue("Tumor:" + tumorSampleName);
                                continue;
                            }

                            if (i == normalIndex)
                            {
                                firstrow.CreateCell(i + 1).SetCellValue("Normal:" + normalSampleName);
                                continue;
                            }
                        }
                        firstrow.CreateCell(i + 1).SetCellValue(headers[i]);
                    }

                    var lastcol = headers.Count + 1;
                    if (hasLod)
                    {
                        firstrow.CreateCell(lastcol++).SetCellValue("TLodFstar");
                    }

                    if (isMutect)
                    {
                        firstrow.CreateCell(lastcol++).SetCellValue("NormalAlleles");
                        firstrow.CreateCell(lastcol++).SetCellValue("TumorAlleles");
                        firstrow.CreateCell(lastcol++).SetCellValue("FisherExactTest");
                    }

                    while ((line = sr.ReadLine()) != null)
                    {
                        var parts = line.Split('\t');
                        if (parts.Length < geneIndex)
                        {
                            break;
                        }

                        var row = all.CreateRow(nRow++);
                        for (int i = 0; i < geneIndex; i++)
                        {
                            row.CreateCell(i).SetCellValue(parts[i]);
                        }

                        item.GeneString = parts[geneIndex];
                        if (item.Genes.Count > 0)
                        {
                            //add link for gene symbol
                            var cell = row.CreateCell(geneIndex);
                            cell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
                            {
                                Address = string.Format("http://www.genecards.org/cgi-bin/carddisp.pl?gene={0}", item.Genes[0].Name)
                            };
                            cell.CellStyle = hlinkStyle;
                            cell.SetCellValue((from g in item.Genes select g.Name).Merge("\n"));

                            //gene description
                            var desCell = row.CreateCell(geneIndex + 1);
                            desCell.CellStyle = wrapStyle;
                            desCell.SetCellValue((from gene in item.Genes
                                                  let description = genenames.ContainsKey(gene.Name) ? genenames[gene.Name] : " "
                                                                    select description).Merge("\n"));
                        }

                        //add other information
                        for (int i = geneIndex + 1; i < headers.Count; i++)
                        {
                            row.CreateCell(i + 1).SetCellValue(parts[i]);
                        }

                        lastcol = headers.Count + 1;
                        if (hasLod)
                        {
                            row.CreateCell(lastcol++).SetCellValue(parts[oldInfoIndex].StringAfter("LOD=").StringBefore(";"));
                        }

                        if (isMutect)
                        {
                            Match normal = SomaticMutationUtils.MutectPattern.Match(parts[normalIndex]);
                            Match tumor  = SomaticMutationUtils.MutectPattern.Match(parts[tumorIndex]);

                            var fetr = new FisherExactTestResult();
                            fetr.Sample1.Succeed = int.Parse(normal.Groups[1].Value);
                            fetr.Sample1.Failed  = int.Parse(normal.Groups[2].Value);
                            fetr.Sample2.Succeed = int.Parse(tumor.Groups[1].Value);
                            fetr.Sample2.Failed  = int.Parse(tumor.Groups[2].Value);

                            row.CreateCell(lastcol++).SetCellValue(string.Format("{0}:{1}", fetr.Sample1.Succeed, fetr.Sample1.Failed));
                            row.CreateCell(lastcol++).SetCellValue(string.Format("{0}:{1}", fetr.Sample2.Succeed, fetr.Sample2.Failed));
                            row.CreateCell(lastcol).SetCellValue(fetr.CalculateTwoTailPValue());
                        }

                        if (dbsnpIndex > 0)
                        {
                            var dbsnpcell = row.GetCell(dbsnpIndex);
                            var dbsnp     = dbsnpcell.StringCellValue;
                            if (!string.IsNullOrEmpty(dbsnp))
                            {
                                dbsnpcell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL)
                                {
                                    Address = string.Format("http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs={0}", dbsnp.Substring(2))
                                };
                                dbsnpcell.CellStyle = (hlinkStyle);
                            }
                        }
                    }


                    all.SetColumnWidth(chrIndex, 5 * 256);
                    all.SetColumnWidth(startIndex, 11 * 256);
                    all.SetColumnWidth(endIndex, 11 * 256);
                    all.SetColumnWidth(funcIndex, 15 * 256);
                    all.SetColumnWidth(geneIndex, 15 * 256);
                    all.SetColumnWidth(geneIndex + 1, 60 * 256);
                    all.SetColumnWidth(exonicIndex, 20 * 256);
                    all.SetColumnWidth(dbsnpIndex, 15 * 256);

                    lastcol = headers.Count + 1;
                    if (hasLod)
                    {
                        all.SetColumnWidth(newInfoIndex, 15 * 256);
                        all.SetColumnWidth(lastcol++, 10 * 256);
                    }

                    if (isMutect)
                    {
                        all.SetColumnWidth(lastcol++, 10 * 256);
                        all.SetColumnWidth(lastcol++, 10 * 256);
                        all.SetColumnWidth(lastcol, 10 * 256);
                    }
                }
                book.Write(file);
            }
            return(new string[] { options.OutputFile });
        }