public FalseDiscoveryRateComputationTask(string sInputFile, string sOutputFile)
 {
     InitializeComponent();
     m_fdrTask     = null;
     m_bCancel     = false;
     m_sInputFile  = sInputFile;
     m_sOutputFile = sOutputFile;
 }
Exemple #2
0
        private void btnCompute_Click(object sender, EventArgs e)
        {
            bool bHasColumnHeaders = chkColumnHeaders.Checked;
            bool bFullOutput       = chkFullOutput.Checked;
            bool bReportProgress   = chkReportProgress.Checked;
            bool bHuge             = chkHuge.Checked;

            FalseDiscoveryRate.PiMethod mPi = FalseDiscoveryRate.PiMethod.One;
            if (cmbPIMethod.SelectedIndex == 0)
            {
                mPi = FalseDiscoveryRate.PiMethod.One;
            }
            else if (cmbPIMethod.SelectedIndex == 1)
            {
                mPi = FalseDiscoveryRate.PiMethod.WeightedSum;
            }
            if (cmbPIMethod.SelectedIndex == 2)
            {
                mPi = FalseDiscoveryRate.PiMethod.DoubleAverage;
            }
            if (chkFiltering.Checked)
            {
                mPi = FalseDiscoveryRate.PiMethod.Filtering;
            }

            double dCutoff = (double)udFDRCutoff.Value;

            int cTableNamesColumns = (int)udNameColumns.Value - 1;

            bool bPositiveFDR = chkPFDR.Checked;

            bool   bSampling           = chkSampling.Checked;
            bool   bAutomatedSampling  = false;
            int    iSampleSize         = 0;
            double dConvergenceEpsilon = 0.0;

            if (bSampling)
            {
                iSampleSize        = (int)udSampleSize.Value;
                bAutomatedSampling = chkAutomatedSampling.Checked;
                if (bAutomatedSampling)
                {
                    dConvergenceEpsilon = 0.001;
                }
            }


            string sInputFile  = txtInputFile.Text;
            string sOutputFile = txtOutputFile.Text;

            DateTime dtBefore = DateTime.Now;
            FalseDiscoveryRateComputationTask task = new FalseDiscoveryRateComputationTask(sInputFile, sOutputFile);
            FalseDiscoveryRate t = new FalseDiscoveryRate(cTableNamesColumns, bReportProgress, dCutoff, bHuge, iSampleSize, dConvergenceEpsilon, bHasColumnHeaders, mPi, bPositiveFDR, bFullOutput, task);

            task.setTask(t);
            task.run();
            DateTime dtAfter = DateTime.Now;
        }
        public void LoadFromFile(string fileName)
        {
            if (!File.Exists(fileName))
            {
                throw new FileNotFoundException("Parameter file not found", fileName);
            }

            XElement docRoot = XElement.Load(fileName);

            ApplicationTitle = docRoot.Element("Version").Value;

            MergeResult = Convert.ToBoolean(docRoot.Element("MergeResult").Value);

            ConflictType = ResolveSearchEngineConflictTypeFactory.Find(docRoot.GetChildValue("ConflictType", ResolveSearchEngineConflictTypeFactory.DiscardAll.Name));

            if (docRoot.Element("MinimumEngineAgreeCount") != null)
            {
                MinimumEngineAgreeCount = int.Parse(docRoot.Element("MinimumEngineAgreeCount").Value);
            }

            if (docRoot.Element("MergeResultFromSameEngineButDifferentSearchParameters") != null)
            {
                KeepTopPeptideFromSameEngineButDifferentSearchParameters = bool.Parse(docRoot.Element("MergeResultFromSameEngineButDifferentSearchParameters").Value);
            }

            if (docRoot.Element("PeptideRetrieval") != null)
            {
                PeptideRetrieval = bool.Parse(docRoot.Element("PeptideRetrieval").Value);
            }

            Database.Load(docRoot);

            FalseDiscoveryRate.Load(docRoot);

            Classification.Load(docRoot);

            PeptideFilter.Load(docRoot);

            try
            {
                DatasetList.Load(docRoot);
                DatasetList.ForEach(m => m.Parent = this);
            }
            catch (Exception ex)
            {
                MessageBox.Show("Load dataset error :" + ex.Message);
            }
        }
Exemple #4
0
        private void CalculateBasicFDR(IList <InputFile> csvFiles, bool isBatched = false)
        {
            string msg = "Calculating first order FDR threshold";

            if (isBatched)
            {
                msg += " in batch...";
                Log(msg);
                // Calculate global threshold
                double threshold = FalseDiscoveryRate <Peptide, double> .CalculateThreshold(_allPeptides, _maximumFalseDiscoveryRate);

                foreach (InputFile csvFile in csvFiles)
                {
                    csvFile.ScoreThreshold = threshold;
                }
            }
            else
            {
                msg += " separately...";
                Log(msg);
                // Calculate each file separately
                foreach (InputFile csvFile in csvFiles)
                {
                    csvFile.ScoreThreshold = FalseDiscoveryRate <Peptide, double> .CalculateThreshold(csvFile.Peptides, _maximumFalseDiscoveryRate);
                }
            }

            foreach (InputFile csvFile in csvFiles)
            {
                double         threshold       = csvFile.ScoreThreshold;
                List <Peptide> passingPeptides = csvFile.Peptides.Where(peptide => peptide.FdrScoreMetric <= threshold).ToList();
                csvFile.FdrFilteredPeptides = passingPeptides;

                List <PSM> passingPsms = csvFile.PeptideSpectralMatches.Where(psm => psm.FdrScoreMetric <= threshold).ToList();
                csvFile.FdrFilteredPSMs = passingPsms;
                int total   = csvFile.FdrFilteredPeptides.Count;
                int decoys  = csvFile.FdrFilteredPeptides.Count(peptide => peptide.IsDecoy);
                int targets = total - decoys;
                Log(
                    string.Format(
                        "{0:N0} peptides ({1:N0} decoys FDR = {2:F4}) pass the e-value threshold of {3:G4} for {4}",
                        targets, decoys, 100.0 * decoys / (double)targets, csvFile.ScoreThreshold, csvFile.Name));
            }
        }
Exemple #5
0
        private Tuple <double, double> CalculateBestPPMError(IEnumerable <Peptide> inputPeptides, double maximumFalseDisoveryRate = 0.01, int steps = 10, double minimumIncrement = 0.05)
        {
            List <Peptide> peptides = inputPeptides.OrderBy(pep => pep.CorrectedPrecursorErrorPPM).ToList();

            double[] precursorPPMs = peptides.Select(pep => pep.CorrectedPrecursorErrorPPM).ToArray();

            double bestppmError      = 0;
            double max               = peptides[peptides.Count - 1].CorrectedPrecursorErrorPPM;
            double maxPrecursorError = Math.Min(max, _maximumPPMError);
            double minPrecursorError = 0;

            double increment = (maxPrecursorError - minPrecursorError) / steps;

            increment = Math.Max(increment, minimumIncrement);

            double bestCount = 0;

            for (double ppmError = minPrecursorError; ppmError <= maxPrecursorError; ppmError += increment)
            {
                int index = Array.BinarySearch(precursorPPMs, ppmError);
                if (index < 0)
                {
                    index = ~index;
                }

                int count = FalseDiscoveryRate <Peptide, double> .Count(peptides.Take(index).ToList(), maximumFalseDisoveryRate);

                if (count <= bestCount)
                {
                    continue;
                }
                bestCount    = count;
                bestppmError = ppmError;
            }

            List <Peptide> filteredPeptides = new List <Peptide>(peptides.Where(pep => pep.CorrectedPrecursorErrorPPM <= bestppmError));

            // Calculate the e-value threshold for those filtered peptides
            double threshold = FalseDiscoveryRate <Peptide, double> .CalculateThreshold(filteredPeptides, maximumFalseDisoveryRate);

            return(new Tuple <double, double>(bestppmError, threshold));
        }
        public XElement ToXml()
        {
            XElement docRoot = new XElement("BuildSummaryOption",
                                            new XElement("Version", ApplicationTitle),
                                            new XElement("MergeResult", MergeResult),
                                            new XElement("ConflictType", ConflictType),
                                            new XElement("MinimumEngineAgreeCount", MinimumEngineAgreeCount),
                                            new XElement("MergeResultFromSameEngineButDifferentSearchParameters", KeepTopPeptideFromSameEngineButDifferentSearchParameters),
                                            new XElement("PeptideRetrieval", PeptideRetrieval));

            Database.Save(docRoot);

            FalseDiscoveryRate.Save(docRoot);

            Classification.Save(docRoot);

            PeptideFilter.Save(docRoot);

            DatasetList.Save(docRoot);

            return(docRoot);
        }
 public IIdentifiedSpectrumBuilder GetSpectrumBuilder()
 {
     return(FalseDiscoveryRate.GetSpectrumBuilder());
 }
 internal void setTask(FalseDiscoveryRate t)
 {
     m_fdrTask = t;
 }
Exemple #9
0
        /*
         * static void Main(string[] args)
         * {
         *  DirectoryInfo dir = new DirectoryInfo( "D:\\fdr\\SyntheticMultiple" );
         *  FileInfo[] aFiles = dir.GetFiles();
         *  int iFile = 0, cFiles = aFiles.Length;
         *  foreach (FileInfo f in aFiles)
         *  {
         *      if (f.Name.StartsWith("synth") && f.Name.EndsWith(".txt"))
         *      {
         *          Console.WriteLine("Processing file " + iFile + " out of " + cFiles);
         *          FalseDiscoveryRate t = new FalseDiscoveryRate(1, true, double.PositiveInfinity, false, -1, 0.0, true, FalseDiscoveryRate.PiMethod.WeightedSum);
         *          t.computeFDR(f.FullName, f.FullName + ".FDR");
         *          iFile++;
         *      }
         *  }
         * }
         */
        static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                Console.WriteLine("FisherExactTest utility for computing Fisher Exact Test and False Discovery Rate (FDR) for contingency tables.");
                Console.WriteLine("Written by Guy Shani, Microsoft Research, October 2008.");
                Console.WriteLine("Use the format: FisherExactTest inputfile outputfile <options>");
                Console.WriteLine("inputfile - name of the text file containing the contingency table data. Each contingency table resides in a single row with the format a b c d, separateed by tabs.");
                Console.WriteLine("outputfile - name of the text file where the results should be written.");
                Console.WriteLine("Additional available options:");
                Console.WriteLine("[-UseTableNames:<column count>] - each contingency table can be perceeded by table names columns. Use this option to set the number of columns perceeding each table. The default is 0 - no table names.");
                Console.WriteLine("[-SilentMode] - execute the application without any progress messages.");
                Console.WriteLine("[-FDRCutoff:x] - Allows to avoid outputing tables with an FDR exciding the threshold x.");
                Console.WriteLine("[-Huge] - When the input data is huge (more than 100K tables) this option avoids several caching procedures to allow handling more data.");
                Console.WriteLine("[-UseSampling[:n]] - Computes FDRs using only n sampled tables. Default value for n is 100K.");
                Console.WriteLine("[-AutomatedSampling[:d]] - Activates the automated sampling algorithm, computing FDRs repeatedly with increasing sample sizes until the largest change in FDR drops below d. Default value for d is 0.1.");
                Console.WriteLine("[-ColumnHeaders] - use when the input files has column headers.");
                Console.WriteLine("[-EvaluatePi] - by default pi0=1, set this parameter to evaluate pi0=sum(observed p-values)/sum(null p-values).");
                Console.WriteLine("[-Filtering] - Computes pi0 using only relevant tables. This flag supersedes the EvaluatePi flag.");
                Console.WriteLine("[-FullOutput] - outputs all the statistics that were computed. By default only p-values and q-values are written to the output file.");
                Console.WriteLine("[-pFDR] - Compute positive FDR. The default is to compute FDR rather than pFDR.");
            }
            else
            {
                string sInputFileName     = args[0];
                string sOutputFileName    = args[1];
                int    cTableNamesColumns = 0;
                bool   bReportProgress    = (findArgument(args, "-SilentMode") == -1);
                bool   bHuge       = (findArgument(args, "-Huge") != -1);
                double dCutoff     = double.PositiveInfinity;
                int    iSampleSize = -1;
                double dMinimalChangeBetweenSamples = -1.0;
                bool   bHasColumnHeaders            = false;
                FalseDiscoveryRate.PiMethod mPi     = FalseDiscoveryRate.PiMethod.One;
                bool bFullOutput  = false;
                bool bPositiveFDR = false;

                int iColumnHeaders = findArgument(args, "-ColumnHeaders");
                if (iColumnHeaders != -1)
                {
                    bHasColumnHeaders = true;
                }

                int iFullOutput = findArgument(args, "-FullOutput");
                if (iFullOutput != -1)
                {
                    bFullOutput = true;
                }

                int iPFDR = findArgument(args, "-pFDR");
                if (iPFDR != -1)
                {
                    bPositiveFDR = true;
                }

                int iEvaluatePi = findArgument(args, "-EvaluatePi");
                if (iEvaluatePi != -1)
                {
                    mPi = FalseDiscoveryRate.PiMethod.WeightedSum;
                }
                iEvaluatePi = findArgument(args, "-Filtering");
                if (iEvaluatePi != -1)
                {
                    mPi = FalseDiscoveryRate.PiMethod.Filtering;
                }

                int iCutoff = findArgument(args, "-FDRCutoff");
                if (iCutoff != -1)
                {
                    int idx = args[iCutoff].IndexOf(':');
                    dCutoff = double.Parse(args[iCutoff].Substring(idx + 1));
                }
                int iTableNames = findArgument(args, "-UseTableNames");
                if (iTableNames != -1)
                {
                    int idx = args[iTableNames].IndexOf(':');
                    cTableNamesColumns = int.Parse(args[iTableNames].Substring(idx + 1));
                }

                int iSampling = findArgument(args, "-UseSampling");
                if (iSampling != -1)
                {
                    int idx = args[iSampling].IndexOf(':');
                    if (idx > 0)
                    {
                        iSampleSize = int.Parse(args[iSampling].Substring(idx + 1));
                    }
                    else
                    {
                        iSampleSize = 100000;
                    }
                }

                int iAutomatedSampling = findArgument(args, "-AutomatedSampling");
                if (iAutomatedSampling != -1)
                {
                    int idx = args[iAutomatedSampling].IndexOf(':');
                    if (iSampleSize == -1)
                    {
                        iSampleSize = 100000;
                    }
                    if (idx < 0)
                    {
                        dMinimalChangeBetweenSamples = 0.01;
                    }
                    else
                    {
                        dMinimalChangeBetweenSamples = double.Parse(args[iAutomatedSampling].Substring(idx + 1));
                    }
                }

                DateTime           dtBefore = DateTime.Now;
                ProgressReport     pr       = new ConsoleProgressReport();
                FalseDiscoveryRate t        = new FalseDiscoveryRate(cTableNamesColumns, bReportProgress, dCutoff, bHuge, iSampleSize, dMinimalChangeBetweenSamples, bHasColumnHeaders, mPi, bPositiveFDR, bFullOutput, pr);
                t.computeFDR(sInputFileName, sOutputFileName);

                DateTime dtAfter = DateTime.Now;
                if (bReportProgress)
                {
                    Console.WriteLine("Total execution time " + dtAfter.Subtract(dtBefore));
                }
            }
        }