public FalseDiscoveryRateComputationTask(string sInputFile, string sOutputFile) { InitializeComponent(); m_fdrTask = null; m_bCancel = false; m_sInputFile = sInputFile; m_sOutputFile = sOutputFile; }
private void btnCompute_Click(object sender, EventArgs e) { bool bHasColumnHeaders = chkColumnHeaders.Checked; bool bFullOutput = chkFullOutput.Checked; bool bReportProgress = chkReportProgress.Checked; bool bHuge = chkHuge.Checked; FalseDiscoveryRate.PiMethod mPi = FalseDiscoveryRate.PiMethod.One; if (cmbPIMethod.SelectedIndex == 0) { mPi = FalseDiscoveryRate.PiMethod.One; } else if (cmbPIMethod.SelectedIndex == 1) { mPi = FalseDiscoveryRate.PiMethod.WeightedSum; } if (cmbPIMethod.SelectedIndex == 2) { mPi = FalseDiscoveryRate.PiMethod.DoubleAverage; } if (chkFiltering.Checked) { mPi = FalseDiscoveryRate.PiMethod.Filtering; } double dCutoff = (double)udFDRCutoff.Value; int cTableNamesColumns = (int)udNameColumns.Value - 1; bool bPositiveFDR = chkPFDR.Checked; bool bSampling = chkSampling.Checked; bool bAutomatedSampling = false; int iSampleSize = 0; double dConvergenceEpsilon = 0.0; if (bSampling) { iSampleSize = (int)udSampleSize.Value; bAutomatedSampling = chkAutomatedSampling.Checked; if (bAutomatedSampling) { dConvergenceEpsilon = 0.001; } } string sInputFile = txtInputFile.Text; string sOutputFile = txtOutputFile.Text; DateTime dtBefore = DateTime.Now; FalseDiscoveryRateComputationTask task = new FalseDiscoveryRateComputationTask(sInputFile, sOutputFile); FalseDiscoveryRate t = new FalseDiscoveryRate(cTableNamesColumns, bReportProgress, dCutoff, bHuge, iSampleSize, dConvergenceEpsilon, bHasColumnHeaders, mPi, bPositiveFDR, bFullOutput, task); task.setTask(t); task.run(); DateTime dtAfter = DateTime.Now; }
public void LoadFromFile(string fileName) { if (!File.Exists(fileName)) { throw new FileNotFoundException("Parameter file not found", fileName); } XElement docRoot = XElement.Load(fileName); ApplicationTitle = docRoot.Element("Version").Value; MergeResult = Convert.ToBoolean(docRoot.Element("MergeResult").Value); ConflictType = ResolveSearchEngineConflictTypeFactory.Find(docRoot.GetChildValue("ConflictType", ResolveSearchEngineConflictTypeFactory.DiscardAll.Name)); if (docRoot.Element("MinimumEngineAgreeCount") != null) { MinimumEngineAgreeCount = int.Parse(docRoot.Element("MinimumEngineAgreeCount").Value); } if (docRoot.Element("MergeResultFromSameEngineButDifferentSearchParameters") != null) { KeepTopPeptideFromSameEngineButDifferentSearchParameters = bool.Parse(docRoot.Element("MergeResultFromSameEngineButDifferentSearchParameters").Value); } if (docRoot.Element("PeptideRetrieval") != null) { PeptideRetrieval = bool.Parse(docRoot.Element("PeptideRetrieval").Value); } Database.Load(docRoot); FalseDiscoveryRate.Load(docRoot); Classification.Load(docRoot); PeptideFilter.Load(docRoot); try { DatasetList.Load(docRoot); DatasetList.ForEach(m => m.Parent = this); } catch (Exception ex) { MessageBox.Show("Load dataset error :" + ex.Message); } }
private void CalculateBasicFDR(IList <InputFile> csvFiles, bool isBatched = false) { string msg = "Calculating first order FDR threshold"; if (isBatched) { msg += " in batch..."; Log(msg); // Calculate global threshold double threshold = FalseDiscoveryRate <Peptide, double> .CalculateThreshold(_allPeptides, _maximumFalseDiscoveryRate); foreach (InputFile csvFile in csvFiles) { csvFile.ScoreThreshold = threshold; } } else { msg += " separately..."; Log(msg); // Calculate each file separately foreach (InputFile csvFile in csvFiles) { csvFile.ScoreThreshold = FalseDiscoveryRate <Peptide, double> .CalculateThreshold(csvFile.Peptides, _maximumFalseDiscoveryRate); } } foreach (InputFile csvFile in csvFiles) { double threshold = csvFile.ScoreThreshold; List <Peptide> passingPeptides = csvFile.Peptides.Where(peptide => peptide.FdrScoreMetric <= threshold).ToList(); csvFile.FdrFilteredPeptides = passingPeptides; List <PSM> passingPsms = csvFile.PeptideSpectralMatches.Where(psm => psm.FdrScoreMetric <= threshold).ToList(); csvFile.FdrFilteredPSMs = passingPsms; int total = csvFile.FdrFilteredPeptides.Count; int decoys = csvFile.FdrFilteredPeptides.Count(peptide => peptide.IsDecoy); int targets = total - decoys; Log( string.Format( "{0:N0} peptides ({1:N0} decoys FDR = {2:F4}) pass the e-value threshold of {3:G4} for {4}", targets, decoys, 100.0 * decoys / (double)targets, csvFile.ScoreThreshold, csvFile.Name)); } }
private Tuple <double, double> CalculateBestPPMError(IEnumerable <Peptide> inputPeptides, double maximumFalseDisoveryRate = 0.01, int steps = 10, double minimumIncrement = 0.05) { List <Peptide> peptides = inputPeptides.OrderBy(pep => pep.CorrectedPrecursorErrorPPM).ToList(); double[] precursorPPMs = peptides.Select(pep => pep.CorrectedPrecursorErrorPPM).ToArray(); double bestppmError = 0; double max = peptides[peptides.Count - 1].CorrectedPrecursorErrorPPM; double maxPrecursorError = Math.Min(max, _maximumPPMError); double minPrecursorError = 0; double increment = (maxPrecursorError - minPrecursorError) / steps; increment = Math.Max(increment, minimumIncrement); double bestCount = 0; for (double ppmError = minPrecursorError; ppmError <= maxPrecursorError; ppmError += increment) { int index = Array.BinarySearch(precursorPPMs, ppmError); if (index < 0) { index = ~index; } int count = FalseDiscoveryRate <Peptide, double> .Count(peptides.Take(index).ToList(), maximumFalseDisoveryRate); if (count <= bestCount) { continue; } bestCount = count; bestppmError = ppmError; } List <Peptide> filteredPeptides = new List <Peptide>(peptides.Where(pep => pep.CorrectedPrecursorErrorPPM <= bestppmError)); // Calculate the e-value threshold for those filtered peptides double threshold = FalseDiscoveryRate <Peptide, double> .CalculateThreshold(filteredPeptides, maximumFalseDisoveryRate); return(new Tuple <double, double>(bestppmError, threshold)); }
public XElement ToXml() { XElement docRoot = new XElement("BuildSummaryOption", new XElement("Version", ApplicationTitle), new XElement("MergeResult", MergeResult), new XElement("ConflictType", ConflictType), new XElement("MinimumEngineAgreeCount", MinimumEngineAgreeCount), new XElement("MergeResultFromSameEngineButDifferentSearchParameters", KeepTopPeptideFromSameEngineButDifferentSearchParameters), new XElement("PeptideRetrieval", PeptideRetrieval)); Database.Save(docRoot); FalseDiscoveryRate.Save(docRoot); Classification.Save(docRoot); PeptideFilter.Save(docRoot); DatasetList.Save(docRoot); return(docRoot); }
public IIdentifiedSpectrumBuilder GetSpectrumBuilder() { return(FalseDiscoveryRate.GetSpectrumBuilder()); }
internal void setTask(FalseDiscoveryRate t) { m_fdrTask = t; }
/* * static void Main(string[] args) * { * DirectoryInfo dir = new DirectoryInfo( "D:\\fdr\\SyntheticMultiple" ); * FileInfo[] aFiles = dir.GetFiles(); * int iFile = 0, cFiles = aFiles.Length; * foreach (FileInfo f in aFiles) * { * if (f.Name.StartsWith("synth") && f.Name.EndsWith(".txt")) * { * Console.WriteLine("Processing file " + iFile + " out of " + cFiles); * FalseDiscoveryRate t = new FalseDiscoveryRate(1, true, double.PositiveInfinity, false, -1, 0.0, true, FalseDiscoveryRate.PiMethod.WeightedSum); * t.computeFDR(f.FullName, f.FullName + ".FDR"); * iFile++; * } * } * } */ static void Main(string[] args) { if (args.Length < 2) { Console.WriteLine("FisherExactTest utility for computing Fisher Exact Test and False Discovery Rate (FDR) for contingency tables."); Console.WriteLine("Written by Guy Shani, Microsoft Research, October 2008."); Console.WriteLine("Use the format: FisherExactTest inputfile outputfile <options>"); Console.WriteLine("inputfile - name of the text file containing the contingency table data. Each contingency table resides in a single row with the format a b c d, separateed by tabs."); Console.WriteLine("outputfile - name of the text file where the results should be written."); Console.WriteLine("Additional available options:"); Console.WriteLine("[-UseTableNames:<column count>] - each contingency table can be perceeded by table names columns. Use this option to set the number of columns perceeding each table. The default is 0 - no table names."); Console.WriteLine("[-SilentMode] - execute the application without any progress messages."); Console.WriteLine("[-FDRCutoff:x] - Allows to avoid outputing tables with an FDR exciding the threshold x."); Console.WriteLine("[-Huge] - When the input data is huge (more than 100K tables) this option avoids several caching procedures to allow handling more data."); Console.WriteLine("[-UseSampling[:n]] - Computes FDRs using only n sampled tables. Default value for n is 100K."); Console.WriteLine("[-AutomatedSampling[:d]] - Activates the automated sampling algorithm, computing FDRs repeatedly with increasing sample sizes until the largest change in FDR drops below d. Default value for d is 0.1."); Console.WriteLine("[-ColumnHeaders] - use when the input files has column headers."); Console.WriteLine("[-EvaluatePi] - by default pi0=1, set this parameter to evaluate pi0=sum(observed p-values)/sum(null p-values)."); Console.WriteLine("[-Filtering] - Computes pi0 using only relevant tables. This flag supersedes the EvaluatePi flag."); Console.WriteLine("[-FullOutput] - outputs all the statistics that were computed. By default only p-values and q-values are written to the output file."); Console.WriteLine("[-pFDR] - Compute positive FDR. The default is to compute FDR rather than pFDR."); } else { string sInputFileName = args[0]; string sOutputFileName = args[1]; int cTableNamesColumns = 0; bool bReportProgress = (findArgument(args, "-SilentMode") == -1); bool bHuge = (findArgument(args, "-Huge") != -1); double dCutoff = double.PositiveInfinity; int iSampleSize = -1; double dMinimalChangeBetweenSamples = -1.0; bool bHasColumnHeaders = false; FalseDiscoveryRate.PiMethod mPi = FalseDiscoveryRate.PiMethod.One; bool bFullOutput = false; bool bPositiveFDR = false; int iColumnHeaders = findArgument(args, "-ColumnHeaders"); if (iColumnHeaders != -1) { bHasColumnHeaders = true; } int iFullOutput = findArgument(args, "-FullOutput"); if (iFullOutput != -1) { bFullOutput = true; } int iPFDR = findArgument(args, "-pFDR"); if (iPFDR != -1) { bPositiveFDR = true; } int iEvaluatePi = findArgument(args, "-EvaluatePi"); if (iEvaluatePi != -1) { mPi = FalseDiscoveryRate.PiMethod.WeightedSum; } iEvaluatePi = findArgument(args, "-Filtering"); if (iEvaluatePi != -1) { mPi = FalseDiscoveryRate.PiMethod.Filtering; } int iCutoff = findArgument(args, "-FDRCutoff"); if (iCutoff != -1) { int idx = args[iCutoff].IndexOf(':'); dCutoff = double.Parse(args[iCutoff].Substring(idx + 1)); } int iTableNames = findArgument(args, "-UseTableNames"); if (iTableNames != -1) { int idx = args[iTableNames].IndexOf(':'); cTableNamesColumns = int.Parse(args[iTableNames].Substring(idx + 1)); } int iSampling = findArgument(args, "-UseSampling"); if (iSampling != -1) { int idx = args[iSampling].IndexOf(':'); if (idx > 0) { iSampleSize = int.Parse(args[iSampling].Substring(idx + 1)); } else { iSampleSize = 100000; } } int iAutomatedSampling = findArgument(args, "-AutomatedSampling"); if (iAutomatedSampling != -1) { int idx = args[iAutomatedSampling].IndexOf(':'); if (iSampleSize == -1) { iSampleSize = 100000; } if (idx < 0) { dMinimalChangeBetweenSamples = 0.01; } else { dMinimalChangeBetweenSamples = double.Parse(args[iAutomatedSampling].Substring(idx + 1)); } } DateTime dtBefore = DateTime.Now; ProgressReport pr = new ConsoleProgressReport(); FalseDiscoveryRate t = new FalseDiscoveryRate(cTableNamesColumns, bReportProgress, dCutoff, bHuge, iSampleSize, dMinimalChangeBetweenSamples, bHasColumnHeaders, mPi, bPositiveFDR, bFullOutput, pr); t.computeFDR(sInputFileName, sOutputFileName); DateTime dtAfter = DateTime.Now; if (bReportProgress) { Console.WriteLine("Total execution time " + dtAfter.Subtract(dtBefore)); } } }