예제 #1
0
        public override IEnumerable <string> Process()
        {
            Progress.SetMessage("filter process started at {0}", DateTime.Now);
            var watch = new Stopwatch();

            watch.Start();

            var tsvfile = _options.OutputFile + ".rtsv";

            var roptions = new RProcessorOptions()
            {
                RExecute         = _options.GetRCommand(),
                RFile            = _options.TargetRFile,
                ExpectResultFile = _options.ROutputFile
            };

            new RProcessor(roptions).Process();

            if (!File.Exists(_options.ROutputFile))
            {
                throw new Exception(string.Format("R command failed, look at the file {0}!\nMake sure that your R and R packages brglm, stringr have been installed.", roptions.RFile + ".log"));
            }
            else if (!_options.IsValidation)
            {
                var items = new FilterItemTextFormat().ReadFromFile(_options.ROutputFile);

                var unfilteredfile = Path.ChangeExtension(_options.ROutputFile, ".vcf");
                new FilterItemVcfWriter(_options).WriteToFile(unfilteredfile, items);

                items.RemoveAll(m => !m.Filter.Equals("PASS"));
                var vcfFile = Path.ChangeExtension(_options.OutputFile, ".vcf");
                new FilterItemVcfWriter(_options).WriteToFile(vcfFile, items);

                new FilterItemTextFormat().WriteToFile(_options.OutputFile, items);
            }

            watch.Stop();
            Progress.SetMessage("filter process ended at {0}, cost {1}", DateTime.Now, watch.Elapsed);

            return(new[] { _options.OutputFile });
        }
예제 #2
0
        public override IEnumerable <string> Process()
        {
            if (!File.Exists(_options.BaseFilename) || new FileInfo(_options.BaseFilename).Length == 0)
            {
                base.Process();
            }
            else
            {
                Progress.SetMessage("Base file {0} exists, ignore pileup ...", _options.BaseFilename);
            }

            var filterOptions = options.GetFilterOptions();

            if (new FileInfo(_options.BaseFilename).Length > 0)
            {
                if (!filterOptions.PrepareOptions())
                {
                    throw new Exception("Filter options failed: " + filterOptions.ParsingErrors.Merge("\n"));
                }

                new FilterProcessor(filterOptions).Process();

                var lines        = File.ReadAllLines(filterOptions.ROutputFile).Skip(1).ToArray();
                var glmfailed    = lines.Count(m => m.Contains("GLM_PVALUE"));
                var summarylines = File.ReadAllLines(_options.SummaryFilename).ToList();
                if (summarylines.Last().StartsWith("glm pvalue"))
                {
                    summarylines.RemoveAt(summarylines.Count - 1);
                }
                summarylines.Add(string.Format("glm pvalue > {0}\t{1}\t{2}", options.GlmPvalue, glmfailed, lines.Length - glmfailed));
                File.WriteAllLines(_options.SummaryFilename, summarylines);
            }

            var mutationList = GetValidationList();
            var candidates   = new MpileupFisherResultFileFormat().ReadFromFile(options.CandidatesFilename).ToDictionary(m => GenomeUtils.GetKey(m.Item.SequenceIdentifier, m.Item.Position));
            var items        = new FilterItemTextFormat().ReadFromFile(filterOptions.ROutputFile).ToDictionary(m => GenomeUtils.GetKey(m.Chr, m.Start));

            var result = new List <FilterItem>();

            foreach (var mutation in mutationList.Items)
            {
                var key = GenomeUtils.GetKey(mutation.Chr, mutation.Pos);
                if (items.ContainsKey(key))
                {
                    result.Add(items[key]);
                }
                else
                {
                    var item = new FilterItem();
                    item.Chr            = mutation.Chr;
                    item.Start          = mutation.Pos.ToString();
                    item.End            = item.Start;
                    item.FisherNormal   = string.Empty;
                    item.BrglmConverged = string.Empty;
                    item.BrglmGroup     = 1.0;
                    item.BrglmGroupFdr  = 1.0;
                    item.BrglmScore     = string.Empty;
                    item.BrglmStrand    = string.Empty;
                    item.BrglmPosition  = string.Empty;
                    item.Identity       = string.Empty;
                    result.Add(item);

                    if (candidates.ContainsKey(key))
                    {
                        var cand = candidates[key];
                        item.ReferenceAllele  = cand.Item.Nucleotide.ToString();
                        item.MajorAllele      = cand.Group.SucceedName;
                        item.MinorAllele      = cand.Group.FailedName;
                        item.NormalMajorCount = cand.Group.Sample1.Succeed;
                        item.NormalMinorCount = cand.Group.Sample1.Failed;
                        item.TumorMajorCount  = cand.Group.Sample2.Succeed;
                        item.TumorMinorCount  = cand.Group.Sample2.Failed;
                        item.FisherGroup      = cand.Group.PValue;
                        item.Filter           = cand.FailedReason;
                        Console.WriteLine("In candidates : " + item.Filter);
                    }
                    else
                    {
                        item.NormalMajorCount = 0;
                        item.NormalMinorCount = 0;
                        item.TumorMajorCount  = 0;
                        item.TumorMinorCount  = 0;
                        item.Filter           = "No coverage";
                        Console.WriteLine("No read : " + item.Filter);
                    }
                }
            }

            new FilterItemVcfWriter(filterOptions).WriteToFile(_options.OutputSuffix + ".vcf", result);
            new FilterItemTextFormat().WriteToFile(_options.OutputSuffix + ".tsv", result);

            return(new string[] { _options.OutputSuffix + ".tsv", _options.OutputSuffix + ".vcf" });
        }