protected override MpileupResult GetMpileupResult() { Progress.SetMessage("Multiple thread mode, parallel by chromosome ..."); _threadCount = 0; var chromosomes = new ConcurrentQueue <string>(); foreach (var chr in _options.ChromosomeNames) { chromosomes.Enqueue(chr); } var cts = new CancellationTokenSource(); var maxThreadCount = Math.Min(_options.ThreadCount, _options.ChromosomeNames.Count); for (int i = 0; i < maxThreadCount; i++) { ThreadPool.QueueUserWorkItem(ParallelChromosome, new Tuple <CancellationTokenSource, ConcurrentQueue <string> >(cts, chromosomes)); } Thread.Sleep(5000); while (_threadCount > 0) { Thread.Sleep(100); } Progress.SetMessage("After thread finished ..."); var result = new MpileupResult(string.Empty, _options.CandidatesDirectory); Progress.SetMessage("Merging summary information ..."); foreach (var chr in _options.ChromosomeNames) { var summaryFile = new MpileupResult(chr, _options.CandidatesDirectory).CandidateSummary; var summary = new MpileupResultCountFormat(_options, false).ReadFromFile(summaryFile); result.MergeWith(summary); } Progress.SetMessage("Loading candidates ..."); foreach (var file in Directory.GetFiles(_options.CandidatesDirectory, "*.wsm")) { var res = MpileupFisherResultFileFormat.ParseString(Path.GetFileNameWithoutExtension(file)); res.CandidateFile = file; result.Results.Add(res); } return(result); }
public MpileupFisherResult Parse(string line, bool writeCandidateFile = true) { MpileupFisherResult result = new MpileupFisherResult(); var item = _parser.GetValue(line); item.Samples[0].SampleName = "NORMAL"; item.Samples[1].SampleName = "TUMOR"; var events = item.GetPairedEvent(); var fisherresult = item.InitializeTable(events); result.Item = item; result.Group = fisherresult; if (!_rdFilter.Accept(item)) { result.FailedReason = _rdFilter.RejectReason; _result.MinimumReadDepthFailed++; return(result); } //If the bases from all samples are same, ignore the entry. if (item.OnlyOneEvent()) { result.FailedReason = "Only one allele detected"; _result.OneEventFailed++; return(result); } if (fisherresult.Sample1.FailedPercentage > fisherresult.Sample2.FailedPercentage) { result.FailedReason = "MAF decreased in tumor"; _result.MinorAlleleDecreasedFailed++; return(result); } if (!_normalTest.Accept(fisherresult)) { result.FailedReason = _normalTest.RejectReason; _result.MinorAlleleFailedInNormalSample++; return(result); } if (!_tumorTest.Accept(fisherresult)) { result.FailedReason = _tumorTest.RejectReason; _result.MinorAlleleFailedInTumorSample++; return(result); } //group fisher exact test fisherresult.CalculateTwoTailPValue(); if (fisherresult.PValue > _options.FisherPvalue) { result.FailedReason = string.Format("Fisher pvalue > {0}", _options.FisherPvalue); _result.GroupFisherFailed++; return(result); } if (writeCandidateFile) { //get major and second alleles var bases = new HashSet <string>(new[] { events.MajorEvent, events.MinorEvent }); //save to file var piFile = new PileupItemFile(bases); result.CandidateFile = string.Format("{0}/{1}.wsm", _options.CandidatesDirectory, MpileupFisherResultFileFormat.GetString(result, '_', false)); piFile.WriteToFile(result.CandidateFile, item); } Progress.SetMessage("{0}\t{1}\t{2}", item.SequenceIdentifier, item.Position, fisherresult); return(result); }
public override IEnumerable <string> Process() { if (!File.Exists(_options.BaseFilename) || new FileInfo(_options.BaseFilename).Length == 0) { base.Process(); } else { Progress.SetMessage("Base file {0} exists, ignore pileup ...", _options.BaseFilename); } var filterOptions = options.GetFilterOptions(); if (new FileInfo(_options.BaseFilename).Length > 0) { if (!filterOptions.PrepareOptions()) { throw new Exception("Filter options failed: " + filterOptions.ParsingErrors.Merge("\n")); } new FilterProcessor(filterOptions).Process(); var lines = File.ReadAllLines(filterOptions.ROutputFile).Skip(1).ToArray(); var glmfailed = lines.Count(m => m.Contains("GLM_PVALUE")); var summarylines = File.ReadAllLines(_options.SummaryFilename).ToList(); if (summarylines.Last().StartsWith("glm pvalue")) { summarylines.RemoveAt(summarylines.Count - 1); } summarylines.Add(string.Format("glm pvalue > {0}\t{1}\t{2}", options.GlmPvalue, glmfailed, lines.Length - glmfailed)); File.WriteAllLines(_options.SummaryFilename, summarylines); } var mutationList = GetValidationList(); var candidates = new MpileupFisherResultFileFormat().ReadFromFile(options.CandidatesFilename).ToDictionary(m => GenomeUtils.GetKey(m.Item.SequenceIdentifier, m.Item.Position)); var items = new FilterItemTextFormat().ReadFromFile(filterOptions.ROutputFile).ToDictionary(m => GenomeUtils.GetKey(m.Chr, m.Start)); var result = new List <FilterItem>(); foreach (var mutation in mutationList.Items) { var key = GenomeUtils.GetKey(mutation.Chr, mutation.Pos); if (items.ContainsKey(key)) { result.Add(items[key]); } else { var item = new FilterItem(); item.Chr = mutation.Chr; item.Start = mutation.Pos.ToString(); item.End = item.Start; item.FisherNormal = string.Empty; item.BrglmConverged = string.Empty; item.BrglmGroup = 1.0; item.BrglmGroupFdr = 1.0; item.BrglmScore = string.Empty; item.BrglmStrand = string.Empty; item.BrglmPosition = string.Empty; item.Identity = string.Empty; result.Add(item); if (candidates.ContainsKey(key)) { var cand = candidates[key]; item.ReferenceAllele = cand.Item.Nucleotide.ToString(); item.MajorAllele = cand.Group.SucceedName; item.MinorAllele = cand.Group.FailedName; item.NormalMajorCount = cand.Group.Sample1.Succeed; item.NormalMinorCount = cand.Group.Sample1.Failed; item.TumorMajorCount = cand.Group.Sample2.Succeed; item.TumorMinorCount = cand.Group.Sample2.Failed; item.FisherGroup = cand.Group.PValue; item.Filter = cand.FailedReason; Console.WriteLine("In candidates : " + item.Filter); } else { item.NormalMajorCount = 0; item.NormalMinorCount = 0; item.TumorMajorCount = 0; item.TumorMinorCount = 0; item.Filter = "No coverage"; Console.WriteLine("No read : " + item.Filter); } } } new FilterItemVcfWriter(filterOptions).WriteToFile(_options.OutputSuffix + ".vcf", result); new FilterItemTextFormat().WriteToFile(_options.OutputSuffix + ".tsv", result); return(new string[] { _options.OutputSuffix + ".tsv", _options.OutputSuffix + ".vcf" }); }
public MpileupFisherResult Parse(string line, bool writeCandidateFile = true) { var parts = line.Split('\t'); if (bedMap.Count > 0) { var sp = _parser.GetSequenceIdentifierAndPosition(parts); if (IsIgnored(sp.SequenceIdentifier, sp.Position)) { _result.Ignored++; return(null); } } //for debug //var sptemp = _parser.GetSequenceIdentifierAndPosition(parts); //if (sptemp.SequenceIdentifier == "2" && sptemp.Position == 89161431) //{ // var debugFile = string.Format("{0}/debug.txt", _options.CandidatesDirectory); // File.WriteAllLines(debugFile, new[] { line }); // Console.WriteLine("Catched the line"); // System.Windows.Forms.Application.Exit(); //} //didn't consider minimum score requirement if (!_parser.HasEnoughReads(parts)) { _result.MinimumReadDepthFailed++; return(null); } //didn't consider minimum score requirement if (!_parser.HasMinorAllele(parts)) { _result.OneEventFailed++; return(null); } //parsing full result considering score limitation var item = _parser.GetValue(parts); if (item == null) { _result.MinimumReadDepthFailed++; return(null); } //If the bases from all samples are same, ignore the entry. if (item.OnlyOneEvent()) { _result.OneEventFailed++; return(null); } item.Samples[0].SampleName = "NORMAL"; item.Samples[1].SampleName = "TUMOR"; var events = item.GetPairedEvent(); var fisherresult = item.InitializeTable(events); if (fisherresult.Sample1.FailedPercentage > fisherresult.Sample2.FailedPercentage) { _result.MinorAlleleDecreasedFailed++; return(null); } if (!_tumorTest.Accept(fisherresult)) { _result.MinorAlleleFailedInTumorSample++; return(null); } if (!_normalTest.Accept(fisherresult)) { _result.MinorAlleleFailedInNormalSample++; return(null); } //group fisher exact test fisherresult.CalculateTwoTailPValue(); if (_options.UseZeroMinorAlleleStrategy && fisherresult.Sample1.Failed == 0) { //Console.WriteLine("UseZeroMinorAlleleStrategy : {0}", fisherresult); if (fisherresult.PValue > _options.ZeroMinorAlleleStrategyFisherPvalue) { _result.GroupFisherFailed++; return(null); } } else if (fisherresult.PValue > _options.FisherPvalue) { _result.GroupFisherFailed++; return(null); } //passed all criteria _result.CandidateCount++; var result = new MpileupFisherResult { Item = item, Group = fisherresult, }; //save to file if (writeCandidateFile) { //get major and second alleles var bases = new HashSet <string>(new[] { events.MajorEvent, events.MinorEvent }); var piFile = new PileupItemFile(bases); result.CandidateFile = string.Format("{0}/{1}.wsm", _options.CandidatesDirectory, MpileupFisherResultFileFormat.GetString(result, '_', false)); piFile.WriteToFile(result.CandidateFile, item); } Progress.SetMessage("{0}\t{1}\t{2}", item.SequenceIdentifier, item.Position, fisherresult); return(result); }