public void TestReadFromFile() { var items = new FilterItemTextFormat().ReadFromFile("../../../data/TCGA-A7-A0D9-DNA-TP-NB.tsv"); Assert.AreEqual(117, items.Count); Assert.AreEqual("1", items[0].Chr); Assert.AreEqual("37568524", items[0].Start); Assert.AreEqual("37568524", items[0].End); Assert.AreEqual("A", items[0].MajorAllele); Assert.AreEqual("G", items[0].MinorAllele); Assert.AreEqual("a", items[0].ReferenceAllele); Assert.AreEqual(14, items[0].NormalMajorCount); Assert.AreEqual(0, items[0].NormalMinorCount); Assert.AreEqual(7, items[0].TumorMajorCount); Assert.AreEqual(5, items[0].TumorMinorCount); Assert.AreEqual(0.0120401337792642, items[0].FisherGroup); Assert.AreEqual("1", items[0].FisherNormal); Assert.AreEqual("TRUE", items[0].BrglmConverged); Assert.AreEqual(0.0559533239203086, items[0].BrglmGroup); Assert.AreEqual("", items[0].BrglmScore); Assert.AreEqual("", items[0].BrglmStrand); Assert.AreEqual("", items[0].BrglmPosition); Assert.AreEqual(0.0891174471997679, items[0].BrglmGroupFdr); Assert.AreEqual("GLM_FDR", items[0].Filter); Assert.AreEqual("1_37568524_a_A_G_14_0_7_5_1.2E-02", items[0].Identity); }
public void TestWriteToFile() { var items = new FilterItemTextFormat().ReadFromFile("../../../data/TCGA-A7-A0D9-DNA-TP-NB.tsv"); var writer = new FilterItemVcfWriter(new FilterProcessorOptions() { GlmPvalue = 0.01 }); var value = writer.GetValue(items[0]); Assert.AreEqual("1\t37568524\t.\tA\tG\t1.05\tGLM_FDR\tBGP=5.6E-2;BGF=8.9E-2;BC=TRUE\tGT:AD:FA\t0/0:14,0:0\t0/1:7,5:0.417", value); }
public override IEnumerable<string> Process() { Progress.SetMessage("filter process started at {0}", DateTime.Now); var watch = new Stopwatch(); watch.Start(); var tsvfile = _options.OutputFile + ".rtsv"; var roptions = new RProcessorOptions() { RExecute = _options.GetRCommand(), RFile = _options.TargetRFile, ExpectResultFile = _options.ROutputFile }; new RProcessor(roptions).Process(); if (!File.Exists(_options.ROutputFile)) { throw new Exception(string.Format("R command failed, look at the file {0}!\nMake sure that your R and R packages brglm, stringr have been installed.", roptions.RFile + ".log")); } else if (!_options.IsValidation) { var items = new FilterItemTextFormat().ReadFromFile(_options.ROutputFile); var unfilteredfile = Path.ChangeExtension(_options.ROutputFile, ".vcf"); new FilterItemVcfWriter(_options).WriteToFile(unfilteredfile, items); items.RemoveAll(m => !m.Filter.Equals("PASS")); var vcfFile = Path.ChangeExtension(_options.OutputFile, ".vcf"); new FilterItemVcfWriter(_options).WriteToFile(vcfFile, items); new FilterItemTextFormat().WriteToFile(_options.OutputFile, items); } watch.Stop(); Progress.SetMessage("filter process ended at {0}, cost {1}", DateTime.Now, watch.Elapsed); return new[] { _options.OutputFile }; }
public override IEnumerable<string> Process() { if (!File.Exists(_options.BaseFilename) || new FileInfo(_options.BaseFilename).Length == 0) { base.Process(); } else { Progress.SetMessage("Base file {0} exists, ignore pileup ...", _options.BaseFilename); } var filterOptions = options.GetFilterOptions(); if (new FileInfo(_options.BaseFilename).Length > 0) { if (!filterOptions.PrepareOptions()) { throw new Exception("Filter options failed: " + filterOptions.ParsingErrors.Merge("\n")); } new FilterProcessor(filterOptions).Process(); var lines = File.ReadAllLines(filterOptions.ROutputFile).Skip(1).ToArray(); var glmfailed = lines.Count(m => m.Contains("GLM_PVALUE")); var summarylines = File.ReadAllLines(_options.SummaryFilename).ToList(); if (summarylines.Last().StartsWith("glm pvalue")) { summarylines.RemoveAt(summarylines.Count - 1); } summarylines.Add(string.Format("glm pvalue > {0}\t{1}\t{2}", options.GlmPvalue, glmfailed, lines.Length - glmfailed)); File.WriteAllLines(_options.SummaryFilename, summarylines); } var mutationList = GetValidationList(); var candidates = new MpileupFisherResultFileFormat().ReadFromFile(options.CandidatesFilename).ToDictionary(m => GenomeUtils.GetKey(m.Item.SequenceIdentifier, m.Item.Position)); var items = new FilterItemTextFormat().ReadFromFile(filterOptions.ROutputFile).ToDictionary(m => GenomeUtils.GetKey(m.Chr, m.Start)); var result = new List<FilterItem>(); foreach (var mutation in mutationList.Items) { var key = GenomeUtils.GetKey(mutation.Chr, mutation.Pos); if (items.ContainsKey(key)) { result.Add(items[key]); } else { var item = new FilterItem(); item.Chr = mutation.Chr; item.Start = mutation.Pos; item.End = item.Start; item.FisherNormal = string.Empty; item.BrglmConverged = string.Empty; item.BrglmGroup = 1.0; item.BrglmGroupFdr = 1.0; item.BrglmScore = string.Empty; item.BrglmStrand = string.Empty; item.BrglmPosition = string.Empty; item.Identity = string.Empty; result.Add(item); if (candidates.ContainsKey(key)) { var cand = candidates[key]; item.ReferenceAllele = cand.Item.Nucleotide.ToString(); item.MajorAllele = cand.Group.SucceedName; item.MinorAllele = cand.Group.FailedName; item.NormalMajorCount = cand.Group.Sample1.Succeed; item.NormalMinorCount = cand.Group.Sample1.Failed; item.TumorMajorCount = cand.Group.Sample2.Succeed; item.TumorMinorCount = cand.Group.Sample2.Failed; item.FisherGroup = cand.Group.PValue; item.Filter = cand.FailedReason; Console.WriteLine("In candidates : " + item.Filter); } else { item.NormalMajorCount = 0; item.NormalMinorCount = 0; item.TumorMajorCount = 0; item.TumorMinorCount = 0; item.Filter = "No coverage"; Console.WriteLine("No read : " + item.Filter); } } } new FilterItemVcfWriter(filterOptions).WriteToFile(_options.OutputSuffix + ".vcf", result); new FilterItemTextFormat().WriteToFile(_options.OutputSuffix + ".tsv", result); return new string[] { _options.OutputSuffix + ".tsv", _options.OutputSuffix + ".vcf" }; }