public override IEnumerable <string> Process(string fileName) { //Write the stream data of workbook to the root directory using (FileStream file = new FileStream(this.targetFile, FileMode.Create)) { var book = new HSSFWorkbook(); //cell style for hyperlinks //by default hyperlinks are blue and underlined var hlink_style = book.CreateCellStyle(); IFont hlink_font = book.CreateFont(); hlink_font.Underline = (byte)FontUnderlineType.SINGLE; hlink_font.Color = HSSFColor.BLUE.index; hlink_style.SetFont(hlink_font); hlink_style.WrapText = true; var wrap_style = book.CreateCellStyle(); wrap_style.WrapText = true; var numeric_style = book.CreateCellStyle(); numeric_style.DataFormat = 0xb; Dictionary <string, string> genenames = File.Exists(this.affyAnnotationFile) ? AnnotationFile.GetGeneSymbolDescriptionMap(this.affyAnnotationFile) : new Dictionary <string, string>(); ISheet all = book.CreateSheet("all"); AnnovarGenomeSummaryItem item = new AnnovarGenomeSummaryItem(); var sr = new StreamReader(fileName); var headers = sr.ReadLine().Split(',').ToList(); var geneIndex = headers.FindIndex(m => m.Equals("Gene") || m.Equals("Gene.refGene")); var funcIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("Func") || m.Equals("Func.refGene"))); var exonicIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("ExonicFunc") || m.Equals("ExonicFunc.refGene"))); var dbsnpIndex = FindIndex(geneIndex, headers.FindIndex(m => m.ToLower().StartsWith("dbsnp") || m.ToLower().StartsWith("snp"))); var chrIndex = headers.IndexOf("Chr"); var startIndex = headers.IndexOf("Start"); var endIndex = headers.IndexOf("End"); var otherInfoIndex = headers.IndexOf("Otherinfo"); //handle the headers. The length of headers may less than the data. var firstrow = all.CreateRow(0); for (int i = 0; i <= geneIndex; i++) { firstrow.CreateCell(i).SetCellValue(headers[i]); } firstrow.CreateCell(geneIndex + 1).SetCellValue("Description"); for (int i = geneIndex + 1; i < otherInfoIndex; i++) { firstrow.CreateCell(i + 1).SetCellValue(headers[i]); } firstrow.CreateCell(otherInfoIndex + 1).SetCellValue("Location"); bool? isMuTect = null; bool isTableVersion = false; //using table_annovar.pl or summarize_annovar.pl bool hasLOD = false; double lod = 0.0; //handle data using (CsvReader csv = new CsvReader(sr, false)) { int nRow = 0; while (csv.ReadNextRecord()) { if (!isMuTect.HasValue) { isTableVersion = csv.FieldCount == headers.Count; isMuTect = mutectRegex.Match(csv[csv.FieldCount - 2]).Success; hasLOD = double.TryParse(csv[csv.FieldCount - 1], out lod); if (isMuTect.Value) { firstrow.CreateCell(otherInfoIndex + 2).SetCellValue("Normal"); firstrow.CreateCell(otherInfoIndex + 3).SetCellValue("Tumor"); firstrow.CreateCell(otherInfoIndex + 4).SetCellValue("FisherExactTest"); all.SetDefaultColumnStyle(otherInfoIndex + 4, numeric_style); if (hasLOD) { firstrow.CreateCell(otherInfoIndex + 5).SetCellValue("LOD_FStar"); all.SetDefaultColumnStyle(otherInfoIndex + 5, numeric_style); } } else { for (int i = otherInfoIndex; i < headers.Count; i++) { firstrow.CreateCell(i + 2).SetCellValue(headers[i]); } } } nRow++; var row = all.CreateRow(nRow); for (int i = 0; i < geneIndex; i++) { row.CreateCell(i).SetCellValue(csv[i]); } //add link for gene symbol item.GeneString = csv[geneIndex]; var cell = row.CreateCell(geneIndex); cell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL) { Address = string.Format("http://www.genecards.org/cgi-bin/carddisp.pl?gene={0}", item.Genes[0].Name) }; cell.CellStyle = hlink_style; cell.SetCellValue((from g in item.Genes select g.Name).Merge("\n")); //gene description var desCell = row.CreateCell(geneIndex + 1); desCell.CellStyle = wrap_style; desCell.SetCellValue((from gene in item.Genes let description = genenames.ContainsKey(gene.Name) ? genenames[gene.Name] : " " select description).Merge("\n")); //add location information for (int i = geneIndex + 1; i < otherInfoIndex; i++) { row.CreateCell(i + 1).SetCellValue(csv[i]); } var locationCell = row.CreateCell(otherInfoIndex + 1); locationCell.SetCellValue(string.Format("{0}:{1}-{2}", csv[chrIndex], csv[startIndex], csv[endIndex])); if (isMuTect.Value) { Match normal, tumor; if (isTableVersion) { var parts = csv[csv.FieldCount - 1].Split('\t'); if (hasLOD) { normal = mutectRegex.Match(parts[parts.Length - 3]); tumor = mutectRegex.Match(parts[parts.Length - 2]); lod = double.Parse(parts[parts.Length - 1]); } else { normal = mutectRegex.Match(parts[parts.Length - 2]); tumor = mutectRegex.Match(parts[parts.Length - 1]); } } else { if (hasLOD) { tumor = mutectRegex.Match(csv[csv.FieldCount - 3]); normal = mutectRegex.Match(csv[csv.FieldCount - 2]); lod = double.Parse(csv[csv.FieldCount - 1]); } else { tumor = mutectRegex.Match(csv[csv.FieldCount - 2]); normal = mutectRegex.Match(csv[csv.FieldCount - 1]); } } FisherExactTestResult fetr = new FisherExactTestResult(); fetr.Sample1.Succeed = int.Parse(normal.Groups[1].Value); fetr.Sample1.Failed = int.Parse(normal.Groups[2].Value); fetr.Sample2.Succeed = int.Parse(tumor.Groups[1].Value); fetr.Sample2.Failed = int.Parse(tumor.Groups[2].Value); row.CreateCell(otherInfoIndex + 2).SetCellValue(string.Format("{0}:{1}", fetr.Sample1.Succeed, fetr.Sample1.Failed)); row.CreateCell(otherInfoIndex + 3).SetCellValue(string.Format("{0}:{1}", fetr.Sample2.Succeed, fetr.Sample2.Failed)); row.CreateCell(otherInfoIndex + 4).SetCellValue(fetr.CalculateTwoTailPValue()); if (hasLOD) { row.CreateCell(otherInfoIndex + 5).SetCellValue(lod); } } else { for (int i = otherInfoIndex; i < csv.FieldCount; i++) { row.CreateCell(i + 2).SetCellValue(csv[i]); } } if (dbsnpIndex > 0) { var dbsnpcell = row.GetCell(dbsnpIndex); var dbsnp = dbsnpcell.StringCellValue; if (!string.IsNullOrEmpty(dbsnp)) { dbsnpcell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL) { Address = string.Format("http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs={0}", dbsnp.Substring(2)) }; dbsnpcell.CellStyle = (hlink_style); } } } } all.SetColumnWidth(chrIndex, 5 * 256); all.SetColumnWidth(startIndex, 13 * 256); all.SetColumnWidth(endIndex, 13 * 256); all.SetColumnWidth(funcIndex, 15 * 256); all.SetColumnWidth(geneIndex, 13 * 256); all.SetColumnWidth(geneIndex + 1, 60 * 256); all.SetColumnWidth(exonicIndex, 20 * 256); all.SetColumnWidth(dbsnpIndex, 15 * 256); all.SetColumnWidth(otherInfoIndex + 1, 22 * 256); if (isMuTect.Value) { all.SetColumnWidth(otherInfoIndex + 2, 10 * 256); all.SetColumnWidth(otherInfoIndex + 3, 10 * 256); all.SetColumnWidth(otherInfoIndex + 4, 10 * 256); if (hasLOD) { all.SetColumnWidth(otherInfoIndex + 5, 10 * 256); } } book.Write(file); } return(new string[] { targetFile }); }
public override IEnumerable <string> Process(string fileName) { Dictionary <string, string> genenames = File.Exists(this.affyAnnotationFile) ? AnnotationFile.GetGeneSymbolDescriptionMap(this.affyAnnotationFile) : new Dictionary <string, string>(); var map = (from file in this.significantFiles select ReadGeneDirectionMap(file)).ToDictionary(m => m.Values.First()); using (StreamWriter sw = new StreamWriter(fileName)) { sw.Write("test_id\tgene_id\tgene\tdescription\tlocus"); foreach (var item in map.Keys) { var comp = string.Format("({0}/{1})", item.Sample2, item.Sample1); sw.Write("\t{0}\t{1}\tstatus{2}\tlog2FoldChange{2}\tpvalue{2}\tqvalue{2}\tsignificant{2}", item.Sample1, item.Sample2, comp); } sw.WriteLine(); var keys = (from k in map.First().Value.Keys orderby k select k).ToList(); foreach (var key in keys) { var v = map.First().Value[key]; var titles = (from a in v.Gene.Split(',') let b = a.Trim() let t = genenames.ContainsKey(b) ? genenames[b] : string.Empty select t).ToList(); var tt = titles.All(l => l.Equals(string.Empty)) ? string.Empty : titles.Merge("/"); sw.Write("{0}\t{1}\t{2}\t{3}\t{4}", v.TestId, v.GeneId, v.Gene, tt, v.Locus); foreach (var mv in map.Values) { var vv = mv[key]; sw.Write("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", vv.Value1, vv.Value2, vv.Status, vv.Log2FoldChangeString, vv.PValue, vv.QValue, vv.SignificantString); } sw.WriteLine(); } } return(new string[] { fileName }); }
public override IEnumerable <string> Process(string fileName) { Dictionary <string, string> genenames = File.Exists(this.affyAnnotationFile) ? AnnotationFile.GetGeneSymbolDescriptionMap(this.affyAnnotationFile) : new Dictionary <string, string>(); var items = new AnnovarGenomeSummaryItemReader().ReadFromFile(fileName); var shFile = this.targetDir + "/" + suffix + ".sh"; using (StreamWriter sw = new StreamWriter(shFile)) { foreach (var item in items) { var targetFile = string.Format("{0}/{1}_{2}-{3}_{4}_{5}", this.targetDir, item.Seqname, item.Start, item.End, (from g in item.Genes select g.Name).Merge("_"), this.suffix); sw.WriteLine("echo \"{0}.bam\"", Path.GetFileName(targetFile)); sw.WriteLine("samtools view -b {0} {1}:{2}-{3} | samtools sort - {4}", this.bamFile, item.Seqname, item.Start, item.End, targetFile); sw.WriteLine("samtools index {0}.bam", targetFile); } } return(new string[] { shFile }); }
public override IEnumerable <string> Process() { //Write the stream data of workbook to the root directory using (FileStream file = new FileStream(options.OutputFile, FileMode.Create)) { var book = new HSSFWorkbook(); //cell style for hyperlinks //by default hyperlinks are blue and underlined var hlinkStyle = book.CreateCellStyle(); var hlinkFont = book.CreateFont(); hlinkFont.Underline = (byte)FontUnderlineType.SINGLE; hlinkFont.Color = HSSFColor.BLUE.index; hlinkStyle.SetFont(hlinkFont); hlinkStyle.WrapText = true; var wrapStyle = book.CreateCellStyle(); wrapStyle.WrapText = true; var numericStyle = book.CreateCellStyle(); numericStyle.DataFormat = 0xb; var genenames = File.Exists(options.AffyAnnotationFile) ? AnnotationFile.GetGeneSymbolDescriptionMap(options.AffyAnnotationFile) : new Dictionary <string, string>(); var all = book.CreateSheet("all"); var item = new AnnovarGenomeSummaryItem(); using (var sr = new StreamReader(options.InputFile)) { int nRow = 0; bool isMutect = false; bool hasLod = false; string tumorSampleName = "", normalSampleName = ""; string line; //ignore the comments while ((line = sr.ReadLine()) != null) { if (!line.StartsWith("#")) { break; } var row = all.CreateRow(nRow++); row.CreateCell(0).SetCellValue(line); if (line.StartsWith("##INFO=<ID=LOD")) { hasLod = true; } if (line.StartsWith("##MuTect")) { isMutect = true; tumorSampleName = line.StringAfter("tumor_sample_name=").StringBefore(" "); normalSampleName = line.StringAfter("normal_sample_name=").StringBefore(" "); } } if (line == null) { throw new Exception("No entries in file " + options.InputFile); } var headers = line.Split('\t').ToList(); //original index var geneIndex = headers.FindIndex(m => m.Equals("Gene") || m.Equals("Gene.refGene")); var oldInfoIndex = headers.FindIndex(m => m.ToLower().StartsWith("info")); var newInfoIndex = FindIndex(oldInfoIndex, geneIndex); //relative index var funcIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("Func") || m.Equals("Func.refGene"))); var exonicIndex = FindIndex(geneIndex, headers.FindIndex(m => m.Equals("ExonicFunc") || m.Equals("ExonicFunc.refGene"))); var dbsnpIndex = FindIndex(geneIndex, headers.FindIndex(m => m.ToLower().StartsWith("dbsnp") || m.ToLower().StartsWith("snp"))); var chrIndex = headers.IndexOf("Chr"); var startIndex = headers.IndexOf("Start"); var endIndex = headers.IndexOf("End"); var tumorIndex = headers.IndexOf(tumorSampleName); var normalIndex = headers.IndexOf(normalSampleName); hasLod = hasLod && oldInfoIndex != -1; //handle the headers. The length of headers may less than the data. var firstrow = all.CreateRow(nRow++); for (int i = 0; i <= geneIndex; i++) { firstrow.CreateCell(i).SetCellValue(headers[i]); } firstrow.CreateCell(geneIndex + 1).SetCellValue("Description"); for (int i = geneIndex + 1; i < headers.Count; i++) { if (isMutect) { if (i == tumorIndex) { firstrow.CreateCell(i + 1).SetCellValue("Tumor:" + tumorSampleName); continue; } if (i == normalIndex) { firstrow.CreateCell(i + 1).SetCellValue("Normal:" + normalSampleName); continue; } } firstrow.CreateCell(i + 1).SetCellValue(headers[i]); } var lastcol = headers.Count + 1; if (hasLod) { firstrow.CreateCell(lastcol++).SetCellValue("TLodFstar"); } if (isMutect) { firstrow.CreateCell(lastcol++).SetCellValue("NormalAlleles"); firstrow.CreateCell(lastcol++).SetCellValue("TumorAlleles"); firstrow.CreateCell(lastcol++).SetCellValue("FisherExactTest"); } while ((line = sr.ReadLine()) != null) { var parts = line.Split('\t'); if (parts.Length < geneIndex) { break; } var row = all.CreateRow(nRow++); for (int i = 0; i < geneIndex; i++) { row.CreateCell(i).SetCellValue(parts[i]); } item.GeneString = parts[geneIndex]; if (item.Genes.Count > 0) { //add link for gene symbol var cell = row.CreateCell(geneIndex); cell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL) { Address = string.Format("http://www.genecards.org/cgi-bin/carddisp.pl?gene={0}", item.Genes[0].Name) }; cell.CellStyle = hlinkStyle; cell.SetCellValue((from g in item.Genes select g.Name).Merge("\n")); //gene description var desCell = row.CreateCell(geneIndex + 1); desCell.CellStyle = wrapStyle; desCell.SetCellValue((from gene in item.Genes let description = genenames.ContainsKey(gene.Name) ? genenames[gene.Name] : " " select description).Merge("\n")); } //add other information for (int i = geneIndex + 1; i < headers.Count; i++) { row.CreateCell(i + 1).SetCellValue(parts[i]); } lastcol = headers.Count + 1; if (hasLod) { row.CreateCell(lastcol++).SetCellValue(parts[oldInfoIndex].StringAfter("LOD=").StringBefore(";")); } if (isMutect) { Match normal = SomaticMutationUtils.MutectPattern.Match(parts[normalIndex]); Match tumor = SomaticMutationUtils.MutectPattern.Match(parts[tumorIndex]); var fetr = new FisherExactTestResult(); fetr.Sample1.Succeed = int.Parse(normal.Groups[1].Value); fetr.Sample1.Failed = int.Parse(normal.Groups[2].Value); fetr.Sample2.Succeed = int.Parse(tumor.Groups[1].Value); fetr.Sample2.Failed = int.Parse(tumor.Groups[2].Value); row.CreateCell(lastcol++).SetCellValue(string.Format("{0}:{1}", fetr.Sample1.Succeed, fetr.Sample1.Failed)); row.CreateCell(lastcol++).SetCellValue(string.Format("{0}:{1}", fetr.Sample2.Succeed, fetr.Sample2.Failed)); row.CreateCell(lastcol).SetCellValue(fetr.CalculateTwoTailPValue()); } if (dbsnpIndex > 0) { var dbsnpcell = row.GetCell(dbsnpIndex); var dbsnp = dbsnpcell.StringCellValue; if (!string.IsNullOrEmpty(dbsnp)) { dbsnpcell.Hyperlink = new HSSFHyperlink(HyperlinkType.URL) { Address = string.Format("http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs={0}", dbsnp.Substring(2)) }; dbsnpcell.CellStyle = (hlinkStyle); } } } all.SetColumnWidth(chrIndex, 5 * 256); all.SetColumnWidth(startIndex, 11 * 256); all.SetColumnWidth(endIndex, 11 * 256); all.SetColumnWidth(funcIndex, 15 * 256); all.SetColumnWidth(geneIndex, 15 * 256); all.SetColumnWidth(geneIndex + 1, 60 * 256); all.SetColumnWidth(exonicIndex, 20 * 256); all.SetColumnWidth(dbsnpIndex, 15 * 256); lastcol = headers.Count + 1; if (hasLod) { all.SetColumnWidth(newInfoIndex, 15 * 256); all.SetColumnWidth(lastcol++, 10 * 256); } if (isMutect) { all.SetColumnWidth(lastcol++, 10 * 256); all.SetColumnWidth(lastcol++, 10 * 256); all.SetColumnWidth(lastcol, 10 * 256); } } book.Write(file); } return(new string[] { options.OutputFile }); }