Exemple #1
0
        protected override MpileupResult GetMpileupResult()
        {
            Progress.SetMessage("Multiple thread mode, parallel by chromosome ...");

            _threadCount = 0;

            var chromosomes = new ConcurrentQueue <string>();

            foreach (var chr in _options.ChromosomeNames)
            {
                chromosomes.Enqueue(chr);
            }

            var cts = new CancellationTokenSource();

            var maxThreadCount = Math.Min(_options.ThreadCount, _options.ChromosomeNames.Count);

            for (int i = 0; i < maxThreadCount; i++)
            {
                ThreadPool.QueueUserWorkItem(ParallelChromosome, new Tuple <CancellationTokenSource, ConcurrentQueue <string> >(cts, chromosomes));
            }

            Thread.Sleep(5000);

            while (_threadCount > 0)
            {
                Thread.Sleep(100);
            }

            Progress.SetMessage("After thread finished ...");
            var result = new MpileupResult(string.Empty, _options.CandidatesDirectory);

            Progress.SetMessage("Merging summary information ...");
            foreach (var chr in _options.ChromosomeNames)
            {
                var summaryFile = new MpileupResult(chr, _options.CandidatesDirectory).CandidateSummary;
                var summary     = new MpileupResultCountFormat(_options, false).ReadFromFile(summaryFile);
                result.MergeWith(summary);
            }

            Progress.SetMessage("Loading candidates ...");
            foreach (var file in Directory.GetFiles(_options.CandidatesDirectory, "*.wsm"))
            {
                var res = MpileupFisherResultFileFormat.ParseString(Path.GetFileNameWithoutExtension(file));
                res.CandidateFile = file;
                result.Results.Add(res);
            }

            return(result);
        }
        public MpileupFisherResult Parse(string line, bool writeCandidateFile = true)
        {
            MpileupFisherResult result = new MpileupFisherResult();

            var item = _parser.GetValue(line);

            item.Samples[0].SampleName = "NORMAL";
            item.Samples[1].SampleName = "TUMOR";
            var events       = item.GetPairedEvent();
            var fisherresult = item.InitializeTable(events);

            result.Item  = item;
            result.Group = fisherresult;

            if (!_rdFilter.Accept(item))
            {
                result.FailedReason = _rdFilter.RejectReason;
                _result.MinimumReadDepthFailed++;
                return(result);
            }

            //If the bases from all samples are same, ignore the entry.
            if (item.OnlyOneEvent())
            {
                result.FailedReason = "Only one allele detected";
                _result.OneEventFailed++;
                return(result);
            }

            if (fisherresult.Sample1.FailedPercentage > fisherresult.Sample2.FailedPercentage)
            {
                result.FailedReason = "MAF decreased in tumor";
                _result.MinorAlleleDecreasedFailed++;
                return(result);
            }

            if (!_normalTest.Accept(fisherresult))
            {
                result.FailedReason = _normalTest.RejectReason;
                _result.MinorAlleleFailedInNormalSample++;
                return(result);
            }

            if (!_tumorTest.Accept(fisherresult))
            {
                result.FailedReason = _tumorTest.RejectReason;
                _result.MinorAlleleFailedInTumorSample++;
                return(result);
            }

            //group fisher exact test
            fisherresult.CalculateTwoTailPValue();
            if (fisherresult.PValue > _options.FisherPvalue)
            {
                result.FailedReason = string.Format("Fisher pvalue > {0}", _options.FisherPvalue);
                _result.GroupFisherFailed++;
                return(result);
            }

            if (writeCandidateFile)
            {
                //get major and second alleles
                var bases = new HashSet <string>(new[] { events.MajorEvent, events.MinorEvent });

                //save to file
                var piFile = new PileupItemFile(bases);
                result.CandidateFile = string.Format("{0}/{1}.wsm", _options.CandidatesDirectory, MpileupFisherResultFileFormat.GetString(result, '_', false));
                piFile.WriteToFile(result.CandidateFile, item);
            }

            Progress.SetMessage("{0}\t{1}\t{2}", item.SequenceIdentifier, item.Position, fisherresult);

            return(result);
        }
Exemple #3
0
        public override IEnumerable <string> Process()
        {
            if (!File.Exists(_options.BaseFilename) || new FileInfo(_options.BaseFilename).Length == 0)
            {
                base.Process();
            }
            else
            {
                Progress.SetMessage("Base file {0} exists, ignore pileup ...", _options.BaseFilename);
            }

            var filterOptions = options.GetFilterOptions();

            if (new FileInfo(_options.BaseFilename).Length > 0)
            {
                if (!filterOptions.PrepareOptions())
                {
                    throw new Exception("Filter options failed: " + filterOptions.ParsingErrors.Merge("\n"));
                }

                new FilterProcessor(filterOptions).Process();

                var lines        = File.ReadAllLines(filterOptions.ROutputFile).Skip(1).ToArray();
                var glmfailed    = lines.Count(m => m.Contains("GLM_PVALUE"));
                var summarylines = File.ReadAllLines(_options.SummaryFilename).ToList();
                if (summarylines.Last().StartsWith("glm pvalue"))
                {
                    summarylines.RemoveAt(summarylines.Count - 1);
                }
                summarylines.Add(string.Format("glm pvalue > {0}\t{1}\t{2}", options.GlmPvalue, glmfailed, lines.Length - glmfailed));
                File.WriteAllLines(_options.SummaryFilename, summarylines);
            }

            var mutationList = GetValidationList();
            var candidates   = new MpileupFisherResultFileFormat().ReadFromFile(options.CandidatesFilename).ToDictionary(m => GenomeUtils.GetKey(m.Item.SequenceIdentifier, m.Item.Position));
            var items        = new FilterItemTextFormat().ReadFromFile(filterOptions.ROutputFile).ToDictionary(m => GenomeUtils.GetKey(m.Chr, m.Start));

            var result = new List <FilterItem>();

            foreach (var mutation in mutationList.Items)
            {
                var key = GenomeUtils.GetKey(mutation.Chr, mutation.Pos);
                if (items.ContainsKey(key))
                {
                    result.Add(items[key]);
                }
                else
                {
                    var item = new FilterItem();
                    item.Chr            = mutation.Chr;
                    item.Start          = mutation.Pos.ToString();
                    item.End            = item.Start;
                    item.FisherNormal   = string.Empty;
                    item.BrglmConverged = string.Empty;
                    item.BrglmGroup     = 1.0;
                    item.BrglmGroupFdr  = 1.0;
                    item.BrglmScore     = string.Empty;
                    item.BrglmStrand    = string.Empty;
                    item.BrglmPosition  = string.Empty;
                    item.Identity       = string.Empty;
                    result.Add(item);

                    if (candidates.ContainsKey(key))
                    {
                        var cand = candidates[key];
                        item.ReferenceAllele  = cand.Item.Nucleotide.ToString();
                        item.MajorAllele      = cand.Group.SucceedName;
                        item.MinorAllele      = cand.Group.FailedName;
                        item.NormalMajorCount = cand.Group.Sample1.Succeed;
                        item.NormalMinorCount = cand.Group.Sample1.Failed;
                        item.TumorMajorCount  = cand.Group.Sample2.Succeed;
                        item.TumorMinorCount  = cand.Group.Sample2.Failed;
                        item.FisherGroup      = cand.Group.PValue;
                        item.Filter           = cand.FailedReason;
                        Console.WriteLine("In candidates : " + item.Filter);
                    }
                    else
                    {
                        item.NormalMajorCount = 0;
                        item.NormalMinorCount = 0;
                        item.TumorMajorCount  = 0;
                        item.TumorMinorCount  = 0;
                        item.Filter           = "No coverage";
                        Console.WriteLine("No read : " + item.Filter);
                    }
                }
            }

            new FilterItemVcfWriter(filterOptions).WriteToFile(_options.OutputSuffix + ".vcf", result);
            new FilterItemTextFormat().WriteToFile(_options.OutputSuffix + ".tsv", result);

            return(new string[] { _options.OutputSuffix + ".tsv", _options.OutputSuffix + ".vcf" });
        }
        public MpileupFisherResult Parse(string line, bool writeCandidateFile = true)
        {
            var parts = line.Split('\t');

            if (bedMap.Count > 0)
            {
                var sp = _parser.GetSequenceIdentifierAndPosition(parts);
                if (IsIgnored(sp.SequenceIdentifier, sp.Position))
                {
                    _result.Ignored++;
                    return(null);
                }
            }

            //for debug
            //var sptemp = _parser.GetSequenceIdentifierAndPosition(parts);
            //if (sptemp.SequenceIdentifier == "2" && sptemp.Position == 89161431)
            //{
            //  var debugFile = string.Format("{0}/debug.txt", _options.CandidatesDirectory);
            //  File.WriteAllLines(debugFile, new[] { line });
            //  Console.WriteLine("Catched the line");
            //  System.Windows.Forms.Application.Exit();
            //}

            //didn't consider minimum score requirement
            if (!_parser.HasEnoughReads(parts))
            {
                _result.MinimumReadDepthFailed++;
                return(null);
            }

            //didn't consider minimum score requirement
            if (!_parser.HasMinorAllele(parts))
            {
                _result.OneEventFailed++;
                return(null);
            }

            //parsing full result considering score limitation
            var item = _parser.GetValue(parts);

            if (item == null)
            {
                _result.MinimumReadDepthFailed++;
                return(null);
            }

            //If the bases from all samples are same, ignore the entry.
            if (item.OnlyOneEvent())
            {
                _result.OneEventFailed++;
                return(null);
            }

            item.Samples[0].SampleName = "NORMAL";
            item.Samples[1].SampleName = "TUMOR";

            var events = item.GetPairedEvent();

            var fisherresult = item.InitializeTable(events);

            if (fisherresult.Sample1.FailedPercentage > fisherresult.Sample2.FailedPercentage)
            {
                _result.MinorAlleleDecreasedFailed++;
                return(null);
            }

            if (!_tumorTest.Accept(fisherresult))
            {
                _result.MinorAlleleFailedInTumorSample++;
                return(null);
            }

            if (!_normalTest.Accept(fisherresult))
            {
                _result.MinorAlleleFailedInNormalSample++;
                return(null);
            }

            //group fisher exact test
            fisherresult.CalculateTwoTailPValue();
            if (_options.UseZeroMinorAlleleStrategy && fisherresult.Sample1.Failed == 0)
            {
                //Console.WriteLine("UseZeroMinorAlleleStrategy : {0}", fisherresult);
                if (fisherresult.PValue > _options.ZeroMinorAlleleStrategyFisherPvalue)
                {
                    _result.GroupFisherFailed++;
                    return(null);
                }
            }
            else if (fisherresult.PValue > _options.FisherPvalue)
            {
                _result.GroupFisherFailed++;
                return(null);
            }

            //passed all criteria
            _result.CandidateCount++;

            var result = new MpileupFisherResult
            {
                Item  = item,
                Group = fisherresult,
            };

            //save to file
            if (writeCandidateFile)
            {
                //get major and second alleles
                var bases  = new HashSet <string>(new[] { events.MajorEvent, events.MinorEvent });
                var piFile = new PileupItemFile(bases);
                result.CandidateFile = string.Format("{0}/{1}.wsm", _options.CandidatesDirectory, MpileupFisherResultFileFormat.GetString(result, '_', false));
                piFile.WriteToFile(result.CandidateFile, item);
            }

            Progress.SetMessage("{0}\t{1}\t{2}", item.SequenceIdentifier, item.Position, fisherresult);

            return(result);
        }