public static List <SomaticItem> ParseAnnovarFile(string fileName, Regex reg, string scorePrefix, Func <string, bool> acceptChromosome) { var result = new List <SomaticItem>(); var bar = Path.GetFileName(fileName).StringBefore("."); var annos = new AnnotationFormat("^#").ReadFromFile(fileName); var headers = File.ReadAllLines(fileName).Where(l => !l.StartsWith("#")).First().Split('\t'); foreach (var ann in annos) { //Chr Start End Ref Alt Func.refGene Gene.refGene GeneDetail.refGene ExonicFunc.refGene AAChange.refGene snp138 cosmic70 FILTER INFO FORMAT H_LS-A7-A0D9-01A-31W-A071-09-1 H_LS-A7-A0D9-10A-01W-A071-09-1 var chr = ann.Annotations["Chr"].ToString(); if (!acceptChromosome(chr)) { continue; } var m1 = reg.Match(ann.Annotations[headers[headers.Length - 2]].ToString()); var m1Major = int.Parse(m1.Groups[1].Value); var m1Minor = int.Parse(m1.Groups[2].Value); var m2 = reg.Match(ann.Annotations[headers[headers.Length - 1]].ToString()); var m2Major = int.Parse(m2.Groups[1].Value); var m2Minor = int.Parse(m2.Groups[2].Value); var isNormalFirst = (((double)m1Minor) / (m1Major + m1Minor)) < (((double)m2Minor) / (m2Major + m2Minor)); var info = ann.Annotations["INFO"].ToString(); var item = new SomaticItem() { Sample = bar, Chrom = chr, StartPosition = int.Parse(ann.Annotations["Start"].ToString()), RefAllele = ann.Annotations["Ref"].ToString(), AltAllele = ann.Annotations["Alt"].ToString(), NormalMajorCount = isNormalFirst ? m1Major : m2Major, NormalMinorCount = isNormalFirst ? m1Minor : m2Minor, TumorMajorCount = isNormalFirst ? m2Major : m1Major, TumorMinorCount = isNormalFirst ? m2Minor : m1Minor, Score = double.Parse(info.StringAfter(scorePrefix)), RefGeneFunc = GetDictionaryValue(ann.Annotations, "Func.refGene", string.Empty), RefGeneName = GetDictionaryValue(ann.Annotations, "Gene.refGene", string.Empty), RefGeneExonicFunc = GetDictionaryValue(ann.Annotations, "ExonicFunc.refGene", string.Empty) }; result.Add(item); } return(result); }
public static List <SomaticItem> ParseGlmvcFile(string fileName, Func <string, bool> acceptChromosome) { var result = new List <SomaticItem>(); var bar = Path.GetFileName(fileName).StringBefore("."); var annos = new AnnotationFormat().ReadFromFile(fileName); foreach (var ann in annos) { var chr = ann.Annotations["chr"].ToString(); if (!acceptChromosome(chr)) { continue; } var fdr = ann.Annotations["brglm_group_fdr"].ToString(); var item = new SomaticItem() { Sample = bar, Chrom = chr, StartPosition = int.Parse(ann.Annotations["start"].ToString()), RefAllele = ann.Annotations["major_allele"].ToString(), AltAllele = ann.Annotations["minor_allele"].ToString(), NormalMajorCount = int.Parse(ann.Annotations["normal_major_count"].ToString()), NormalMinorCount = int.Parse(ann.Annotations["normal_minor_count"].ToString()), TumorMajorCount = int.Parse(ann.Annotations["tumor_major_count"].ToString()), TumorMinorCount = int.Parse(ann.Annotations["tumor_minor_count"].ToString()), LogisticScore = ann.Annotations["brglm_score"].ToString(), LogisticStrand = ann.Annotations["brglm_strand"].ToString(), LogisticPosition = ann.Annotations["brglm_position"].ToString(), LogisticGroupFdr = fdr, Score = -Math.Log(double.Parse(fdr)), RefGeneFunc = GetDictionaryValue(ann.Annotations, "annovar_Func.refGene", string.Empty), RefGeneName = GetDictionaryValue(ann.Annotations, "annovar_Gene.refGene", string.Empty), RefGeneExonicFunc = GetDictionaryValue(ann.Annotations, "annovar_ExonicFunc.refGene", string.Empty), RefGeneAAChange = GetDictionaryValue(ann.Annotations, "annovar_AAChange.refGene", string.Empty), }; result.Add(item); } return(result); }
public static List<SomaticItem> ParseAnnovarFile(string fileName, Regex reg, string scorePrefix, Func<string, bool> acceptChromosome) { var result = new List<SomaticItem>(); var bar = Path.GetFileName(fileName).StringBefore("."); var annos = new AnnotationFormat("^#").ReadFromFile(fileName); var headers = File.ReadAllLines(fileName).Where(l => !l.StartsWith("#")).First().Split('\t'); foreach (var ann in annos) { //Chr Start End Ref Alt Func.refGene Gene.refGene GeneDetail.refGene ExonicFunc.refGene AAChange.refGene snp138 cosmic70 FILTER INFO FORMAT H_LS-A7-A0D9-01A-31W-A071-09-1 H_LS-A7-A0D9-10A-01W-A071-09-1 var chr = ann.Annotations["Chr"].ToString(); if (!acceptChromosome(chr)) { continue; } var m1 = reg.Match(ann.Annotations[headers[headers.Length - 2]].ToString()); var m1Major = int.Parse(m1.Groups[1].Value); var m1Minor = int.Parse(m1.Groups[2].Value); var m2 = reg.Match(ann.Annotations[headers[headers.Length - 1]].ToString()); var m2Major = int.Parse(m2.Groups[1].Value); var m2Minor = int.Parse(m2.Groups[2].Value); var isNormalFirst = (((double)m1Minor) / (m1Major + m1Minor)) < (((double)m2Minor) / (m2Major + m2Minor)); var info = ann.Annotations["INFO"].ToString(); var item = new SomaticItem() { Sample = bar, Chrom = chr, StartPosition = int.Parse(ann.Annotations["Start"].ToString()), RefAllele = ann.Annotations["Ref"].ToString(), AltAllele = ann.Annotations["Alt"].ToString(), NormalMajorCount = isNormalFirst ? m1Major : m2Major, NormalMinorCount = isNormalFirst ? m1Minor : m2Minor, TumorMajorCount = isNormalFirst ? m2Major : m1Major, TumorMinorCount = isNormalFirst ? m2Minor : m1Minor, Score = double.Parse(info.StringAfter(scorePrefix)), RefGeneFunc = GetDictionaryValue(ann.Annotations, "Func.refGene", string.Empty), RefGeneName = GetDictionaryValue(ann.Annotations, "Gene.refGene", string.Empty), RefGeneExonicFunc = GetDictionaryValue(ann.Annotations, "ExonicFunc.refGene", string.Empty) }; result.Add(item); } return result; }
public static List<SomaticItem> ParseGlmvcFile(string fileName, Func<string, bool> acceptChromosome) { var result = new List<SomaticItem>(); var bar = Path.GetFileName(fileName).StringBefore("."); var annos = new AnnotationFormat().ReadFromFile(fileName); foreach (var ann in annos) { var chr = ann.Annotations["chr"].ToString(); if (!acceptChromosome(chr)) { continue; } var fdr = ann.Annotations["brglm_group_fdr"].ToString(); var item = new SomaticItem() { Sample = bar, Chrom = chr, StartPosition = int.Parse(ann.Annotations["start"].ToString()), RefAllele = ann.Annotations["major_allele"].ToString(), AltAllele = ann.Annotations["minor_allele"].ToString(), NormalMajorCount = int.Parse(ann.Annotations["normal_major_count"].ToString()), NormalMinorCount = int.Parse(ann.Annotations["normal_minor_count"].ToString()), TumorMajorCount = int.Parse(ann.Annotations["tumor_major_count"].ToString()), TumorMinorCount = int.Parse(ann.Annotations["tumor_minor_count"].ToString()), LogisticScore = ann.Annotations["brglm_score"].ToString(), LogisticStrand = ann.Annotations["brglm_strand"].ToString(), LogisticPosition = ann.Annotations["brglm_position"].ToString(), LogisticGroupFdr = fdr, Score = -Math.Log(double.Parse(fdr)), RefGeneFunc = GetDictionaryValue(ann.Annotations, "annovar_Func.refGene", string.Empty), RefGeneName = GetDictionaryValue(ann.Annotations, "annovar_Gene.refGene", string.Empty), RefGeneExonicFunc = GetDictionaryValue(ann.Annotations, "annovar_ExonicFunc.refGene", string.Empty), RefGeneAAChange = GetDictionaryValue(ann.Annotations, "annovar_AAChange.refGene", string.Empty), }; result.Add(item); } return result; }