Ejemplo n.º 1
0
 public void TestReadFromFile()
 {
   var items = new FilterItemTextFormat().ReadFromFile("../../../data/TCGA-A7-A0D9-DNA-TP-NB.tsv");
   Assert.AreEqual(117, items.Count);
   Assert.AreEqual("1", items[0].Chr);
   Assert.AreEqual("37568524", items[0].Start);
   Assert.AreEqual("37568524", items[0].End);
   Assert.AreEqual("A", items[0].MajorAllele);
   Assert.AreEqual("G", items[0].MinorAllele);
   Assert.AreEqual("a", items[0].ReferenceAllele);
   Assert.AreEqual(14, items[0].NormalMajorCount);
   Assert.AreEqual(0, items[0].NormalMinorCount);
   Assert.AreEqual(7, items[0].TumorMajorCount);
   Assert.AreEqual(5, items[0].TumorMinorCount);
   Assert.AreEqual(0.0120401337792642, items[0].FisherGroup);
   Assert.AreEqual("1", items[0].FisherNormal);
   Assert.AreEqual("TRUE", items[0].BrglmConverged);
   Assert.AreEqual(0.0559533239203086, items[0].BrglmGroup);
   Assert.AreEqual("", items[0].BrglmScore);
   Assert.AreEqual("", items[0].BrglmStrand);
   Assert.AreEqual("", items[0].BrglmPosition);
   Assert.AreEqual(0.0891174471997679, items[0].BrglmGroupFdr);
   Assert.AreEqual("GLM_FDR", items[0].Filter);
   Assert.AreEqual("1_37568524_a_A_G_14_0_7_5_1.2E-02", items[0].Identity);
 }
Ejemplo n.º 2
0
 public void TestWriteToFile()
 {
   var items = new FilterItemTextFormat().ReadFromFile("../../../data/TCGA-A7-A0D9-DNA-TP-NB.tsv");
   var writer = new FilterItemVcfWriter(new FilterProcessorOptions()
   {
     GlmPvalue = 0.01
   });
   var value = writer.GetValue(items[0]);
   Assert.AreEqual("1\t37568524\t.\tA\tG\t1.05\tGLM_FDR\tBGP=5.6E-2;BGF=8.9E-2;BC=TRUE\tGT:AD:FA\t0/0:14,0:0\t0/1:7,5:0.417", value);
 }
Ejemplo n.º 3
0
    public override IEnumerable<string> Process()
    {
      Progress.SetMessage("filter process started at {0}", DateTime.Now);
      var watch = new Stopwatch();
      watch.Start();

      var tsvfile = _options.OutputFile + ".rtsv";

      var roptions = new RProcessorOptions()
      {
        RExecute = _options.GetRCommand(),
        RFile = _options.TargetRFile,
        ExpectResultFile = _options.ROutputFile
      };

      new RProcessor(roptions).Process();

      if (!File.Exists(_options.ROutputFile))
      {
        throw new Exception(string.Format("R command failed, look at the file {0}!\nMake sure that your R and R packages brglm, stringr have been installed.", roptions.RFile + ".log"));
      }
      else if (!_options.IsValidation)
      {
        var items = new FilterItemTextFormat().ReadFromFile(_options.ROutputFile);

        var unfilteredfile = Path.ChangeExtension(_options.ROutputFile, ".vcf");
        new FilterItemVcfWriter(_options).WriteToFile(unfilteredfile, items);

        items.RemoveAll(m => !m.Filter.Equals("PASS"));
        var vcfFile = Path.ChangeExtension(_options.OutputFile, ".vcf");
        new FilterItemVcfWriter(_options).WriteToFile(vcfFile, items);

        new FilterItemTextFormat().WriteToFile(_options.OutputFile, items);
      }

      watch.Stop();
      Progress.SetMessage("filter process ended at {0}, cost {1}", DateTime.Now, watch.Elapsed);

      return new[] { _options.OutputFile };
    }
Ejemplo n.º 4
0
    public override IEnumerable<string> Process()
    {
      if (!File.Exists(_options.BaseFilename) || new FileInfo(_options.BaseFilename).Length == 0)
      {
        base.Process();
      }
      else
      {
        Progress.SetMessage("Base file {0} exists, ignore pileup ...", _options.BaseFilename);
      }

      var filterOptions = options.GetFilterOptions();

      if (new FileInfo(_options.BaseFilename).Length > 0)
      {
        if (!filterOptions.PrepareOptions())
        {
          throw new Exception("Filter options failed: " + filterOptions.ParsingErrors.Merge("\n"));
        }

        new FilterProcessor(filterOptions).Process();

        var lines = File.ReadAllLines(filterOptions.ROutputFile).Skip(1).ToArray();
        var glmfailed = lines.Count(m => m.Contains("GLM_PVALUE"));
        var summarylines = File.ReadAllLines(_options.SummaryFilename).ToList();
        if (summarylines.Last().StartsWith("glm pvalue"))
        {
          summarylines.RemoveAt(summarylines.Count - 1);
        }
        summarylines.Add(string.Format("glm pvalue > {0}\t{1}\t{2}", options.GlmPvalue, glmfailed, lines.Length - glmfailed));
        File.WriteAllLines(_options.SummaryFilename, summarylines);
      }

      var mutationList = GetValidationList();
      var candidates = new MpileupFisherResultFileFormat().ReadFromFile(options.CandidatesFilename).ToDictionary(m => GenomeUtils.GetKey(m.Item.SequenceIdentifier, m.Item.Position));
      var items = new FilterItemTextFormat().ReadFromFile(filterOptions.ROutputFile).ToDictionary(m => GenomeUtils.GetKey(m.Chr, m.Start));

      var result = new List<FilterItem>();
      foreach (var mutation in mutationList.Items)
      {
        var key = GenomeUtils.GetKey(mutation.Chr, mutation.Pos);
        if (items.ContainsKey(key))
        {
          result.Add(items[key]);
        }
        else
        {
          var item = new FilterItem();
          item.Chr = mutation.Chr;
          item.Start = mutation.Pos;
          item.End = item.Start;
          item.FisherNormal = string.Empty;
          item.BrglmConverged = string.Empty;
          item.BrglmGroup = 1.0;
          item.BrglmGroupFdr = 1.0;
          item.BrglmScore = string.Empty;
          item.BrglmStrand = string.Empty;
          item.BrglmPosition = string.Empty;
          item.Identity = string.Empty;
          result.Add(item);

          if (candidates.ContainsKey(key))
          {
            var cand = candidates[key];
            item.ReferenceAllele = cand.Item.Nucleotide.ToString();
            item.MajorAllele = cand.Group.SucceedName;
            item.MinorAllele = cand.Group.FailedName;
            item.NormalMajorCount = cand.Group.Sample1.Succeed;
            item.NormalMinorCount = cand.Group.Sample1.Failed;
            item.TumorMajorCount = cand.Group.Sample2.Succeed;
            item.TumorMinorCount = cand.Group.Sample2.Failed;
            item.FisherGroup = cand.Group.PValue;
            item.Filter = cand.FailedReason;
            Console.WriteLine("In candidates : " + item.Filter);
          }
          else
          {
            item.NormalMajorCount = 0;
            item.NormalMinorCount = 0;
            item.TumorMajorCount = 0;
            item.TumorMinorCount = 0;
            item.Filter = "No coverage";
            Console.WriteLine("No read : " + item.Filter);
          }
        }
      }

      new FilterItemVcfWriter(filterOptions).WriteToFile(_options.OutputSuffix + ".vcf", result);
      new FilterItemTextFormat().WriteToFile(_options.OutputSuffix + ".tsv", result);

      return new string[] { _options.OutputSuffix + ".tsv", _options.OutputSuffix + ".vcf" };
    }