示例#1
0
        public static AnnotatedSpectrum TestNeo4j(IConSol console)
        {
            DBOptions dbOptions     = GetDBOptions(false, false, console);
            Samples   ProjectRatios = new Samples(@"C:\_IRIC\DATA\NB\ProjectTest_MonoAce_Spiked_19Oct.csv", 0, dbOptions);//Group 2 (all)

            Result rez = Propheus.Start(dbOptions, ProjectRatios, false, false, false);

            Database.Neo4j.ResultsToNeo4j.Export(rez);
            PeptideSpectrumMatch bestPsm = null;

            foreach (Query query in rez.queries)
            {
                foreach (PeptideSpectrumMatch psm in query.psms)
                {
                    if (psm.Peptide.Sequence.CompareTo("GK(acetylation of K)GGK(propionylation of K)GLGK(propionylation of K)GGAK(propionylation of K)R") == 0 &&
                        (bestPsm == null || bestPsm.ProbabilityScore() < query.psms[0].ProbabilityScore()))
                    {
                        bestPsm = psm;
                    }
                }
            }
            AnnotatedSpectrum aSpec = new AnnotatedSpectrum(ProjectRatios[0], bestPsm.Query.spectrum, bestPsm.Peptide);

            return(aSpec);
        }
示例#2
0
        public static void Launch()//Trinity.UnitTest.SettePeptideSample.Launch()
        {
            try
            {
                string projectFile = @"C:\_IRIC\DATA\MHC\project.csv"; //Patient C
                //@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL03_2013\ProjectFile_SETTEpep_OneRAW.csv";//Sette Peptides
                //C:\_IRIC\DATA\MHC\project.csv";//MHC M One sample

                DBOptions dbOptions = CreateOptions(@"C:\_IRIC\DATA\Test\testMhc\");
                Samples   Project   = new Samples(projectFile, 0, dbOptions);


                //ClusterOptions clusterOptions = new ClusterOptions(Project, outputDir, 5, true, 90, true);//TODO validate its in seconds for all file types

                /*
                 * Propheus propheus = new Propheus(dbOptions, Project);
                 * propheus.PrepareForSearch();
                 * //foreach (Sample s in propheus.AllSpectras.Keys)
                 * //    propheus.AllSpectras[s].tracks.Export(dbOptions.outputFolder + vsCSV.GetFileName(s.sSDF) + "_Tracks.csv");
                 * //Result tmp = propheus.Search(propheus.AllQueries, 1.0, false, false, null);
                 *
                 * //Save file
                 * Result result = new Result();
                 * result.dbOptions = dbOptions;
                 * result.queries = propheus.AllQueries;
                 * result.Save();
                 * //*/

                //Load file
                Result   result   = Result.Import(dbOptions.OutputFolder + "State.GraphML");
                Propheus propheus = new Propheus(result.dbOptions, Project);
                propheus.Load(result);
                //*/
                Result tmp = propheus.SearchLatestVersion(result.queries, true);//, 1.0, false, false, null);

                tmp.WriteInfoToCsv(true);

                tmp.Export(0.05, "05_");
                tmp.Export(0.02, "02_");
                tmp.Export(1.0, "All_");

                UnitTest.Tests.MatchAllFragments(tmp);
                //tmp.WriteInfoToConsole();
                //tmp.Save();

                /*
                 * Optimizer op = new Optimizer(propheus);
                 * op.LaunchBestPSMOptimization(tmp);
                 * op.LaunchPrecursorScoreOptimization(tmp);
                 * /*
                 * propheus.Align(tmp);
                 *
                 * Result tmp2 = propheus.Search(1.0, false, null, propheus.CreateQueries(propheus.AllSpectras));
                 * tmp2.Export(0.05, "Aligned_05_");
                 * tmp2.Export(0.02, "Aligned_02_");
                 * tmp2.Export(double.MaxValue, "Aligned_All_");
                 * MSSearcher.Export(dbOptions.outputFolder + "Aligned_5PercentOptimized_precursors.csv", Optimizer.PrecursorOptimizer(tmp2.precursors, 0.05));
                 * tmp2.WriteInfoToConsole();//*/
            }
            catch (Exception ex)
            {
                Console.WriteLine("Error in SettePeptideSample : " + ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
        }
        public static void Launch(bool restrain = false)
        {
            //@"G:\Thibault\Olivier\MnR\Databases\BD_RefGenome_WithReverse_2012-06-20.fasta";
            //Trypsin

            string outputDir = @"C:\_IRIC\DATA\Test\testPK\";
            string fastaFile = @"C:\_IRIC\DATA\Yeast\Yeast_SwissProt.fasta";//Yeast
            //@"G:\Thibault\Olivier\MQ_vs_Morpheus\Yeast_SwissProt.fasta";//Yeast
            //@"G:\Thibault\Olivier\Databases\SProHNoIso_20130430\current\sequences_2013-05-30.fa";
            //G:\Thibault\Olivier\MnR\Databases\mini_human_reference_2013-26-03.fasta";//Yeast
            string projectFile = @"C:\_IRIC\DATA\PK\NotCalibrated\project_fr1.csv";//Yeast
            //@"G:\Thibault\Olivier\MQ_vs_Morpheus\project.csv";//Yeast
            //@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JAN22_2013\_Project_FL_Single.csv";
            //G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUN27_2012\MR 4Rep DS\MassSense\_Test_ProjectFile_MF3.csv";
            //G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\MAR18_2013\ProjectFile_TestForProPheus.csv";

            DBOptions dbOptions = new DBOptions(fastaFile);
            Samples   Project   = new Samples(projectFile, 0, dbOptions);

            dbOptions.precursorMassTolerance = new MassTolerance(80, MassToleranceUnits.ppm);
            dbOptions.productMassTolerance   = new MassTolerance(80, MassToleranceUnits.ppm);//0.034 is a 60 000 resolution over 2000 range in mz
            dbOptions.MaximumPeptideMass     = 200000;
            dbOptions.OutputFolder           = outputDir;
            ProteaseDictionary proteases = ProteaseDictionary.Instance;

            dbOptions.DigestionEnzyme = proteases["trypsin (no proline rule)"]; //"no enzyme"];
            dbOptions.NoEnzymeSearch  = false;                                  // true;
            dbOptions.DecoyFusion     = false;

            //dbOptions.protease = proteases["trypsin (no proline rule)"];
            dbOptions.ToleratedMissedCleavages = 2;
            dbOptions.MinimumPeptideLength     = 5;
            dbOptions.MaximumPeptideLength     = 300;

            GraphML_List <Modification> fixMods = new GraphML_List <Modification>();

            fixMods.Add(ModificationDictionary.Instance["carbamidomethylation of C"]);
            dbOptions.fixedModifications = fixMods;

            GraphML_List <Modification> varMods = new GraphML_List <Modification>();

            //Oxidation (M);Acetyl (Protein N-term);Phospho (STY)
            //Mods for Yeast
            if (!restrain)
            {
                //varMods.Add(ModificationDictionary.Instance["oxidation of M"]);
                //varMods.Add(ModificationDictionary.Instance["acetylation of protein N-terminus"]);
                //varMods.Add(ModificationDictionary.Instance["phosphorylation of S"]);
                //varMods.Add(ModificationDictionary.Instance["phosphorylation of T"]);
                //varMods.Add(ModificationDictionary.Instance["phosphorylation of Y"]);//*/
                varMods.Add(ModificationDictionary.Instance["Pionylation of peptide N-terminus"]);
                dbOptions.maximumVariableModificationIsoforms = 1024;// 2 * (varMods.Count + fixMods.Count);//TODO Evaluate the viability of this parameter
            }
            else
            {
                dbOptions.maximumVariableModificationIsoforms = 2;
            }
            dbOptions.variableModifications = varMods;

            dbOptions.addFragmentLoss = false;
            dbOptions.addFragmentMods = false;
            dbOptions.fragments       = new Fragments();
            //dbOptions.fragments.Add(new FragmentA());
            dbOptions.fragments.Add(new FragmentB());
            //dbOptions.fragments.Add(new FragmentC());
            //dbOptions.fragments.Add(new FragmentX());
            dbOptions.fragments.Add(new FragmentY());
            //dbOptions.fragments.Add(new FragmentZ());

            //ClusterOptions clusterOptions = new ClusterOptions(Project, outputDir, 5, true, 90, true);//TODO validate its in seconds for all file types

            Propheus propheus = new Propheus(dbOptions, Project);

            dbOptions.SaveMS1Peaks       = false;
            dbOptions.SaveMSMSPeaks      = true;
            dbOptions.LoadSpectraIfFound = true;
            dbOptions.NbPSMToKeep        = 10;
            propheus.Preload(false, false);
            propheus.PrepareQueries();

            //First pass (used to optimize parameters and score weights)
            Result tmp = propheus.SearchLatestVersion(propheus.AllQueries, false);

            //Compute number of Moded (Pionylated peptides) at given Ms tolerance

            long   nbPsmDecoy  = 0;
            long   nbPsmTarget = 0;
            double avgShift    = 0;

            foreach (Query query in tmp.queries)
            {
                foreach (PeptideSpectrumMatch psm in query.psms)
                {
                    if (string.IsNullOrEmpty(psm.Peptide.VariableModificationsInString))
                    {
                        nbPsmTarget++;
                        avgShift += psm.PrecursorMzError;
                    }
                    else
                    {
                        nbPsmDecoy++;
                    }
                }
            }
            avgShift /= (double)nbPsmTarget;
            dbOptions.ConSole.WriteLine("Average shift : " + avgShift);

            //tmp.WriteInfoToCsv(true);
            //tmp.Export(0.02, "FirstPass_02_");
        }
        public static void Launch(IConSol console)
        {
            //@"G:\Thibault\Olivier\MnR\Databases\BD_RefGenome_WithReverse_2012-06-20.fasta";
            //Trypsin

            string outputDir = @"C:\_IRIC\DATA\Yeast\Results\";
            string fastaFile = @"C:\_IRIC\DATA\Yeast\Yeast_SwissProt.fasta";//Yeast
            //@"G:\Thibault\Olivier\MQ_vs_Morpheus\Yeast_SwissProt.fasta";//Yeast
            //@"G:\Thibault\Olivier\Databases\SProHNoIso_20130430\current\sequences_2013-05-30.fa";
            //G:\Thibault\Olivier\MnR\Databases\mini_human_reference_2013-26-03.fasta";//Yeast
            string projectFile = @"C:\_IRIC\DATA\Yeast\project.csv";//Yeast
            //@"G:\Thibault\Olivier\MQ_vs_Morpheus\project.csv";//Yeast
            //@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JAN22_2013\_Project_FL_Single.csv";
            //G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUN27_2012\MR 4Rep DS\MassSense\_Test_ProjectFile_MF3.csv";
            //G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\MAR18_2013\ProjectFile_TestForProPheus.csv";
            DBOptions dbOptions = new DBOptions(fastaFile, console);
            Samples   Project   = new Samples(projectFile, 0, dbOptions);

            dbOptions.precursorMassTolerance = new MassTolerance(8 /*8*//*8withoutisotopes*/, MassToleranceUnits.ppm);
            dbOptions.productMassTolerance   = new MassTolerance(0.034 /*0.034*//*without isotopes*/, MassToleranceUnits.Da);//0.034 is a 60 000 resolution over 2000 range in mz
            //dbOptions.productMassTolerance = new MassTolerance(20, MassToleranceUnits.ppm);
            dbOptions.MaximumPeptideMass = 200000;
            dbOptions.OutputFolder       = outputDir;
            ProteaseDictionary proteases = ProteaseDictionary.Instance;

            dbOptions.DigestionEnzyme = proteases["trypsin (no proline rule)"]; //"no enzyme"];
            dbOptions.NoEnzymeSearch  = false;                                  // true;
            dbOptions.DecoyFusion     = false;

            //dbOptions.protease = proteases["trypsin (no proline rule)"];
            dbOptions.ToleratedMissedCleavages = 2;
            dbOptions.MinimumPeptideLength     = 5;
            dbOptions.MaximumPeptideLength     = 300;

            GraphML_List <Modification> fixMods = new GraphML_List <Modification>();

            fixMods.Add(ModificationDictionary.Instance["carbamidomethylation of C"]);
            dbOptions.fixedModifications = fixMods;

            GraphML_List <Modification> varMods = new GraphML_List <Modification>();

            //Oxidation (M);Acetyl (Protein N-term);Phospho (STY)
            //Mods for Yeast
            varMods.Add(ModificationDictionary.Instance["oxidation of M"]);
            varMods.Add(ModificationDictionary.Instance["acetylation of protein N-terminus"]);
            varMods.Add(ModificationDictionary.Instance["phosphorylation of S"]);
            varMods.Add(ModificationDictionary.Instance["phosphorylation of T"]);
            varMods.Add(ModificationDictionary.Instance["phosphorylation of Y"]);                //*/
            dbOptions.maximumVariableModificationIsoforms = 2 * (varMods.Count + fixMods.Count); //TODO Evaluate the viability of this parameter

            dbOptions.variableModifications = varMods;

            dbOptions.NbPSMToKeep = 16;

            dbOptions.addFragmentLoss = false;
            dbOptions.addFragmentMods = false;
            dbOptions.fragments       = new Fragments();
            dbOptions.fragments.Add(new FragmentA());
            dbOptions.fragments.Add(new FragmentB());
            dbOptions.fragments.Add(new FragmentC());
            dbOptions.fragments.Add(new FragmentX());
            dbOptions.fragments.Add(new FragmentY());
            dbOptions.fragments.Add(new FragmentZ());

            dbOptions.dProduct   = 0.0;
            dbOptions.dPrecursor = 0.1;               // 0.12;
            dbOptions.dMatchingProductFraction = 0.8; // 0.45;
            dbOptions.dMatchingProduct         = 0.0; // 0.5;
            dbOptions.dIntensityFraction       = 0.1; // 45;// 0.0;//0.13;
            dbOptions.dIntensity     = 0;
            dbOptions.dProtein       = 0;
            dbOptions.dPeptideScore  = 0.0; // 0.3;
            dbOptions.dFragmentScore = 0.0; // 0.5;

            //ClusterOptions clusterOptions = new ClusterOptions(Project, outputDir, 5, true, 90, true);//TODO validate its in seconds for all file types

            dbOptions.SaveMS1Peaks       = true;
            dbOptions.SaveMSMSPeaks      = true;
            dbOptions.LoadSpectraIfFound = true;
            Propheus propheus = new Propheus(dbOptions, Project);

            propheus.Preload(false, false);
            propheus.PrepareQueries();

            //To beat : 4653 (MaxQuant) Psm at 2%FDR
            //First pass (used to optimize parameters and score weights)
            Result tmp = propheus.SearchLatestVersion(propheus.AllQueries, true, false);//, 1.0, false, false, null);

            tmp.WriteInfoToCsv(true);
            tmp.Export(0.02, "FirstPass_02_");


            //Second search
            propheus.Preload(true);
            propheus.PrepareQueries();
            Result finalRez = propheus.SearchLatestVersion(propheus.AllQueries, false);//, 1.0, false, false, null);

            //tmp.Export(0.05, "05_");
            tmp.Export(0.02, "02_");

            //tmp.Export(0.05, "05_AllFragments");
            // tmp.Export(0.01, "01_");
            //tmp.Export(double.MaxValue, "All_");
            //tmp.WriteInfoToConsole();

            /*
             * Optimizer op = new Optimizer(propheus);
             * op.LaunchBestPSMOptimization(tmp);//.proteins, propheus.AllQueries);
             * //*/
            //Optimizer op = new Optimizer(propheus);
            //MSSearcher.Export(dbOptions.outputFolder + "5PercentOptimized_precursors.csv", Optimizer.PrecursorOptimizer(tmp.precursors, 0.05));
            //op.LaunchBestPSMOptimization(tmp);//.proteins, propheus.AllQueries);
            //op.LaunchPrecursorScoreOptimization(tmp);//.proteins, propheus.AllQueries);
            //op.Launch(tmp.proteins, propheus.AllQueries);

            /*
             * propheus.Align(tmp);
             *
             * Result tmp2 = propheus.Search(1.0, false, null, propheus.CreateQueries(propheus.AllSpectras));
             * tmp2.Export(0.05, "Aligned_05_");
             * tmp2.Export(double.MaxValue, "Aligned_All_");
             * MSSearcher.Export(dbOptions.outputFolder + "Aligned_5PercentOptimized_precursors.csv", Optimizer.PrecursorOptimizer(tmp2.precursors, 0.05));
             * tmp.WriteInfoToConsole();//*/
        }
        public static bool Run()
        {
            //TODO test GPU instead
            DBOptions dbOptions = MhcSample.CreateOptions("");
            Dictionary <string, int> sequences = new Dictionary <string, int>();
            List <Protein>           proteins  = Propheus.ReadProteomeFromFasta(Path.Combine(Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]), "UnitTest", "proteins.fasta"), false, dbOptions);//ETLPAMCNVYYVNCMAPLTE
            string sequence = proteins[0].BaseSequence;

            double[] proteinMasses = new double[sequence.Length];

            List <double> precursors = new List <double>();

            for (int i = 0; i < sequence.Length; i++)
            {
                for (int j = i + dbOptions.MinimumPeptideLength - 1; j < sequence.Length; j++)
                {
                    int size = j - i + 1;
                    if (size <= dbOptions.MaximumPeptideLength)
                    {
                        string subStr = sequence.Substring(i, j - i + 1);
                        if (!sequences.ContainsKey(subStr))
                        {
                            sequences.Add(subStr, 1);
                        }
                        else
                        {
                            sequences[subStr]++;
                        }

                        double mass = Constants.WATER_MONOISOTOPIC_MASS;
                        for (int k = 0; k < subStr.Length; k++)
                        {
                            mass += AminoAcidMasses.GetMonoisotopicMass(subStr[k]);
                        }
                        precursors.Add(mass);
                    }
                }
                proteinMasses[i] = AminoAcidMasses.GetMonoisotopicMass(sequence[i]);
            }
            precursors.Sort();

            Queries        queries = new Queries(dbOptions, precursors.ToArray());
            Digestion      ps      = new Digestion(dbOptions);
            List <Protein> lProt   = new List <Protein>();

            lProt.Add(proteins[0]);
            //for each protein, build matrix of mass
            //Trinity_Gpu.ProteinDigest pg = new Trinity_Gpu.ProteinDigest(precursors.ToArray(), sequence.Length);
            //Test twice to test that precursor list stays in gpu memory
            for (int iter = 0; iter < 2; iter++)
            {
                Dictionary <string, int> sequencesTmp = new Dictionary <string, int>(sequences);

                foreach (Tuple <Peptide, int> item in ps.DigestProteomeOnTheFlyNoEnzyme(lProt, queries))
                {
                    sequencesTmp[item.Item1.BaseSequence] -= 1;//TODO add modifications
                }

                /*
                 * foreach (Trinity_Gpu.ProteinPrecursorMatch match in pg.Execute(proteinMasses, 0.00005, 10000000))//TODO compute correct tolerance window
                 * {
                 *  int size = match.proteinEndPos - match.proteinStartPos;
                 *  string str = sequence.Substring(match.proteinStartPos, size);
                 *  if (size >= dbOptions.MinimumPeptideLength)
                 *  {
                 *      sequencesTmp[str] -= 1;//TODO add modifications
                 *  }
                 * }//*/

                foreach (int val in sequencesTmp.Values)
                {
                    if (val != 0)
                    {
                        return(false);//*/
                    }
                }
            }
            //pg.Dispose();
            return(true);
        }
示例#6
0
        public static void Launch()//Trinity.UnitTest.SettePeptideSample.Launch()
        {
            string outputDir   = @"C:\_IRIC\DATA\Test\testRTMHC\";
            string fastaFile   = @"C:\_IRIC\DATA\MHC Sette\MHC_Sette_Peptides_20091001.fasta";
            string projectFile = //@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\AUG06_2013\RT_MHC\Project_TEST_600mM.csv";
                                 @"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\MAR26_2013\Project_NonFAIMS.csv";
            //                 @"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\SEP10_2013\Project_TEST_75_100_300mM.csv";

            DBOptions dbOptions = new DBOptions(fastaFile);

            try
            {
                Samples Project = new Samples(projectFile, 0, dbOptions);
                dbOptions.precursorMassTolerance = new MassTolerance(5, MassToleranceUnits.ppm);
                dbOptions.productMassTolerance   = new MassTolerance(0.068, MassToleranceUnits.Da);//0.034 is a 60 000 resolution over 2000 range in mz
                dbOptions.MaximumPeptideMass     = 200000;
                dbOptions.OutputFolder           = outputDir;
                ProteaseDictionary proteases = ProteaseDictionary.Instance;
                dbOptions.DigestionEnzyme          = proteases["no enzyme"];
                dbOptions.NoEnzymeSearch           = false;
                dbOptions.ToleratedMissedCleavages = 20;// 2;


                GraphML_List <Modification> fixMods = new GraphML_List <Modification>();
                //fixMods.Add(ModificationDictionary.Instance["carbamidomethylation of C"]);
                dbOptions.fixedModifications = fixMods;

                GraphML_List <Modification> varMods = new GraphML_List <Modification>();
                varMods.Add(ModificationDictionary.Instance["oxidation of M"]);    //+ Deamidation M Q
                varMods.Add(ModificationDictionary.Instance["phosphorylation of S"]);
                varMods.Add(ModificationDictionary.Instance["phosphorylation of T"]);
                varMods.Add(ModificationDictionary.Instance["phosphorylation of Y"]);    //*/
                varMods.Add(ModificationDictionary.Instance["deamidation of N"]);
                varMods.Add(ModificationDictionary.Instance["deamidation of Q"]);
                varMods.Add(ModificationDictionary.Instance["cysteinylation of C"]);
                dbOptions.variableModifications = varMods;

                dbOptions.maximumVariableModificationIsoforms = 1024;// 2 * (varMods.Count + fixMods.Count);//TODO Evaluate the viability of this parameter

                dbOptions.PSMFalseDiscoveryRate = 0.05;
                dbOptions.addFragmentMods       = false;
                dbOptions.addFragmentLoss       = false;
                dbOptions.fragments             = new Fragments();
                dbOptions.fragments.Add(new FragmentB());
                dbOptions.fragments.Add(new FragmentY());

                dbOptions.MinimumPrecursorIntensityRatioInIsolationWindow = 0.05;
                Propheus propheus = new Propheus(dbOptions, Project);

                dbOptions.SaveMS1Peaks       = true;
                dbOptions.SaveMSMSPeaks      = true;
                dbOptions.LoadSpectraIfFound = true;
                propheus.Preload(true);
                propheus.PrepareQueries();
            }
            catch (Exception ex)
            {
                dbOptions.ConSole.WriteLine("Error in SettePeptideSample : " + ex.Message);
                dbOptions.ConSole.WriteLine(ex.StackTrace);
            }
        }
示例#7
0
        public void Solve(string[] spikedRaws, string[] mixedRaws, string fastaFile, string folderToOutputTo, IConSol conSol)
        {
            dbOptions     = CreateOptions(fastaFile, folderToOutputTo, conSol);
            SpikedSamples = new Samples(dbOptions);
            for (int i = 0; i < spikedRaws.Length; i++)
            {
                SpikedSamples.Add(new Sample(i + 1, 1, 1, spikedRaws[i], spikedRaws[i], 0, ""));
            }

            //Precompute Spiked peptide identifications
            SpikedResult = Propheus.Start(dbOptions, SpikedSamples, false, false, true, false);
            SpikedResult.ExportPSMs(1, dbOptions.OutputFolder + "Identifications" + System.IO.Path.DirectorySeparatorChar + "SpikedSamplesPSMs.csv");

            MixedSamples = new Samples(dbOptions);
            for (int i = 0; i < mixedRaws.Length; i++)
            {
                MixedSamples.Add(new Sample(i + 1, 1, 1, mixedRaws[i], mixedRaws[i], 0, ""));
            }

            //Precompute Mixed peptide identifications
            mixedResult = Propheus.Start(dbOptions, MixedSamples, false, false, true, false);
            mixedResult.ExportPSMs(1, dbOptions.OutputFolder + "Identifications" + System.IO.Path.DirectorySeparatorChar + "MixedSamplesPSMs.csv");

            conSol.WriteLine("Computing gradient descents...");

            //Compute all usable spiked peptides
            characterizedPeptides = CharacterizedPrecursor.GetSpikedPrecursors(SpikedSamples, SpikedResult, dbOptions, nbMinFragments, nbMaxFragments, precision);
            ExportSpikedSampleResult(characterizedPeptides, dbOptions);

            vsCSVWriter writerCumul        = new vsCSVWriter(OutputFolder + "Results.csv");
            string      titleCombined      = "Mixed Sample,Precursor";
            string      curveStr           = "Polynomial Curve,";
            string      spikedIntensityStr = "Area under the curve,";

            foreach (double precursor in characterizedPeptides.Keys)
            {
                foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values)
                {
                    titleCombined += "," + charPrec.Peptide.Sequence + " Charge " + charPrec.Charge;

                    if (charPrec.eCurve.Coefficients != null && charPrec.eCurve.Coefficients.Length == 3)
                    {
                        curveStr += "," + charPrec.eCurve.Coefficients[0] + "x^2 + " + charPrec.eCurve.Coefficients[1] + "x" + charPrec.eCurve.Coefficients[2];
                    }
                    else
                    {
                        curveStr += ",NA";
                    }

                    spikedIntensityStr += "," + charPrec.eCurve.Area;
                }
            }
            writerCumul.AddLine(titleCombined);
            writerCumul.AddLine(curveStr);
            writerCumul.AddLine(spikedIntensityStr);

            //mixedPrecursors = new Dictionary<Sample, Dictionary<double, MixedPrecursor>>();
            mixedPrecursors = new Dictionary <Sample, List <MixedPrecursor> >();

            foreach (Sample mixedSample in MixedSamples)
            {
                mixedPrecursors.Add(mixedSample, MixedPrecursor.GetMixedPrecursors(mixedSample, mixedResult, dbOptions, characterizedPeptides));
            }

            //Get the list of precursors to characterize
            foreach (Sample mixedSample in MixedSamples)
            {
                foreach (double keyMz in characterizedPeptides.Keys)
                {
                    List <Dictionary <CharacterizedPrecursor, ElutionCurve> > listOfRatios = new List <Dictionary <CharacterizedPrecursor, ElutionCurve> >();
                    foreach (MixedPrecursor mPrec in mixedPrecursors[mixedSample])
                    {
                        if (mPrec.MZ == keyMz)
                        {
                            // Compute Max Flow for this precursor
                            Dictionary <CharacterizedPrecursor, ElutionCurve> ratios = GetRatios(characterizedPeptides, mPrec);
                            listOfRatios.Add(ratios);

                            ExportMixedSampleResult(ratios, mixedSample, mPrec, keyMz, dbOptions);
                        }
                    }

                    bool   isEmpty   = true;
                    string resultStr = vsCSV.GetFileName(mixedSample.sSDF) + "," + keyMz;
                    foreach (double precursor in characterizedPeptides.Keys)
                    {
                        foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values)
                        {
                            double cumulArea = 0.0;
                            foreach (Dictionary <CharacterizedPrecursor, ElutionCurve> ratios in listOfRatios)
                            {
                                if (ratios.ContainsKey(charPrec))
                                {
                                    cumulArea += ratios[charPrec].Area;
                                }
                            }
                            resultStr += "," + cumulArea;
                            if (cumulArea > 0)
                            {
                                isEmpty = false;
                            }
                        }
                    }
                    if (!isEmpty)
                    {
                        writerCumul.AddLine(resultStr);
                    }
                }
            }
            writerCumul.WriteToFile();

            //List Modifications
            Dictionary <Modification, double> dicOfIntensityPerMod = new Dictionary <Modification, double>();

            foreach (Sample sample in mixedPrecursors.Keys)
            {
                foreach (MixedPrecursor mP in mixedPrecursors[sample])
                {
                    foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys)
                    {
                        if (cP.Peptide.VariableModifications != null)
                        {
                            foreach (Modification mod in cP.Peptide.VariableModifications.Values)
                            {
                                if (!dicOfIntensityPerMod.ContainsKey(mod))
                                {
                                    dicOfIntensityPerMod.Add(mod, 0.0);
                                }
                            }
                        }
                    }
                }
            }

            //Compute site occupancy for identical sequences (real positionnal isomers)
            vsCSVWriter    writerSitesOccupancy = new vsCSVWriter(OutputFolder + "Results_SiteOccupancy.csv");
            List <Protein> AllProteins          = Propheus.ReadProteomeFromFasta(fastaFile, false, dbOptions);

            foreach (Protein protein in AllProteins)
            {
                string newTitleProtein = protein.Description.Replace(',', ' ') + "," + protein.Sequence;
                for (int i = 0; i < protein.Sequence.Length; i++)
                {
                    newTitleProtein += "," + protein[i].ToString();
                }
                writerSitesOccupancy.AddLine(newTitleProtein);

                foreach (Sample mixedSample in mixedPrecursors.Keys)
                {
                    string coverage = "Coverage," + mixedSample.Name;
                    for (int i = 0; i < protein.Sequence.Length; i++)
                    {
                        double cumulSite = 0.0;
                        newTitleProtein += "," + protein[i].ToString();
                        foreach (MixedPrecursor mP in mixedPrecursors[mixedSample])
                        {
                            foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys)
                            {
                                if (i + 1 >= cP.Peptide.StartResidueNumber && i + 1 <= cP.Peptide.EndResidueNumber)
                                {
                                    cumulSite += mP.PeptideRatios[cP].Area;
                                }
                            }
                        }
                        coverage += "," + cumulSite;
                    }
                    writerSitesOccupancy.AddLine(coverage);
                }

                foreach (Modification mod in dicOfIntensityPerMod.Keys)
                {
                    Dictionary <Sample, string> dicOfLines = new Dictionary <Sample, string>();
                    for (int i = 0; i < protein.Sequence.Length; i++)
                    {
                        foreach (Sample mixedSample in mixedPrecursors.Keys)
                        {
                            double cumulModArea = 0.0;
                            foreach (MixedPrecursor mP in mixedPrecursors[mixedSample])
                            {
                                foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys)
                                {
                                    if (i + 1 >= cP.Peptide.StartResidueNumber && i + 1 <= cP.Peptide.EndResidueNumber &&
                                        cP.Peptide.VariableModifications != null)
                                    {
                                        foreach (int pos in cP.Peptide.VariableModifications.Keys)
                                        {
                                            if (cP.Peptide.StartResidueNumber + pos - 2 == i + 1 && cP.Peptide.VariableModifications[pos] == mod)
                                            {
                                                cumulModArea += mP.PeptideRatios[cP].Area;
                                            }
                                        }
                                    }
                                }
                            }
                            if (!dicOfLines.ContainsKey(mixedSample))
                            {
                                dicOfLines.Add(mixedSample, mod.Description + "," + mixedSample.Name + "," + cumulModArea);
                            }
                            else
                            {
                                dicOfLines[mixedSample] += "," + cumulModArea;
                            }
                        }
                    }
                    foreach (string line in dicOfLines.Values)
                    {
                        writerSitesOccupancy.AddLine(line);
                    }
                }
            }
            writerSitesOccupancy.WriteToFile();
        }
示例#8
0
        public static void Launch()//Trinity.UnitTest.SettePeptideSample.Launch()
        {
            string outputDir   = @"C:\_IRIC\DATA\Test\testMHCSette\";
            string fastaFile   = @"C:\_IRIC\DATA\MHC Sette\MHC_Sette_Peptides_20091001.fasta";
            string projectFile = @"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL03_2013\ProjectFile_SETTEpep_OneRAW.csv";

            DBOptions dbOptions = new DBOptions(fastaFile);

            try
            {
                Samples Project = new Samples(projectFile, 0, dbOptions);
                dbOptions.precursorMassTolerance = new MassTolerance(5, MassToleranceUnits.ppm);
                dbOptions.productMassTolerance   = new MassTolerance(0.068, MassToleranceUnits.Da);//0.034 is a 60 000 resolution over 2000 range in mz
                dbOptions.MaximumPeptideMass     = 200000;
                dbOptions.OutputFolder           = outputDir;
                ProteaseDictionary proteases = ProteaseDictionary.Instance;
                dbOptions.DigestionEnzyme          = proteases["no enzyme"];
                dbOptions.NoEnzymeSearch           = false;
                dbOptions.ToleratedMissedCleavages = 20;// 2;


                GraphML_List <Modification> fixMods = new GraphML_List <Modification>();
                //fixMods.Add(ModificationDictionary.Instance["carbamidomethylation of C"]);
                dbOptions.fixedModifications = fixMods;

                GraphML_List <Modification> varMods = new GraphML_List <Modification>();
                varMods.Add(ModificationDictionary.Instance["oxidation of M"]);    //+ Deamidation M Q
                varMods.Add(ModificationDictionary.Instance["phosphorylation of S"]);
                varMods.Add(ModificationDictionary.Instance["phosphorylation of T"]);
                varMods.Add(ModificationDictionary.Instance["phosphorylation of Y"]);    //*/
                varMods.Add(ModificationDictionary.Instance["deamidation of N"]);
                varMods.Add(ModificationDictionary.Instance["deamidation of Q"]);
                varMods.Add(ModificationDictionary.Instance["cysteinylation of C"]);
                dbOptions.variableModifications = varMods;

                dbOptions.maximumVariableModificationIsoforms = 1024;// 2 * (varMods.Count + fixMods.Count);//TODO Evaluate the viability of this parameter

                dbOptions.PSMFalseDiscoveryRate = 0.05;
                dbOptions.addFragmentMods       = false;
                dbOptions.addFragmentLoss       = false;
                dbOptions.fragments             = new Fragments();
//                dbOptions.fragments.Add(new FragmentA());
                dbOptions.fragments.Add(new FragmentB());
//                dbOptions.fragments.Add(new FragmentC());
//                dbOptions.fragments.Add(new FragmentX());
                dbOptions.fragments.Add(new FragmentY());
//                dbOptions.fragments.Add(new FragmentZ());

                //ClusterOptions clusterOptions = new ClusterOptions(Project, outputDir, 5, true, 90, true);//TODO validate its in seconds for all file types

                Propheus propheus = new Propheus(dbOptions, Project);

                dbOptions.SaveMS1Peaks       = false;
                dbOptions.SaveMSMSPeaks      = false;
                dbOptions.LoadSpectraIfFound = true;
                propheus.Preload(true);
                propheus.PrepareQueries();

                //First pass (used to optimize parameters and score weights)
                Result tmp = propheus.SearchVersionAugust2013(propheus.AllQueries, true);//, 1.0, false, false, null);

                tmp.WriteInfoToCsv(true);
                tmp.Export(0.02, "FirstPass_02_");

                dbOptions.SaveMS1Peaks  = true;
                dbOptions.SaveMSMSPeaks = true;

                //Second search
                propheus.Preload(true);
                propheus.PrepareQueries();
                Result finalRez = propheus.SearchLatestVersion(propheus.AllQueries, false);//, 1.0, false, false, null);

                //tmp.Export(0.05, "05_");
                tmp.Export(0.02, "02_");
                //tmp.Export(1.0, "All_");

                //UnitTest.Tests.MatchAllFragments(tmp);
                //tmp.WriteInfoToConsole();
                //tmp.Save();

                /*
                 * Optimizer op = new Optimizer(propheus);
                 * op.LaunchBestPSMOptimization(tmp);
                 * op.LaunchPrecursorScoreOptimization(tmp);
                 * /*
                 * propheus.Align(tmp);
                 *
                 * Result tmp2 = propheus.Search(1.0, false, null, propheus.CreateQueries(propheus.AllSpectras));
                 * tmp2.Export(0.05, "Aligned_05_");
                 * tmp2.Export(0.02, "Aligned_02_");
                 * tmp2.Export(double.MaxValue, "Aligned_All_");
                 * MSSearcher.Export(dbOptions.outputFolder + "Aligned_5PercentOptimized_precursors.csv", Optimizer.PrecursorOptimizer(tmp2.precursors, 0.05));
                 * tmp2.WriteInfoToConsole();//*/
            }
            catch (Exception ex)
            {
                dbOptions.ConSole.WriteLine("Error in SettePeptideSample : " + ex.Message);
                dbOptions.ConSole.WriteLine(ex.StackTrace);
            }
        }
        public void Solve(string[] mixedRaws, string fastaFile, string folderToOutputTo, IConSol conSol)
        {
            dbOptions = CreateOptions(fastaFile, folderToOutputTo, conSol);

            MixedSamples = new Samples(dbOptions);
            for (int i = 0; i < mixedRaws.Length; i++)
            {
                MixedSamples.Add(new Sample(i + 1, 1, 1, mixedRaws[i], mixedRaws[i], 0, ""));
            }

            //Precompute Mixed peptide identifications
            mixedResult = Propheus.Start(dbOptions, MixedSamples, false, false, true, false);

            conSol.WriteLine("Computing gradient descents...");

            //Compute all usable spiked peptides
            characterizedPeptides = CharacterizedPrecursor.GetSpikedPrecursors(MixedSamples, mixedResult, dbOptions, nbMinFragments, nbMaxFragments, precision);
            ExportSpikedSampleResult(characterizedPeptides, dbOptions);

            vsCSVWriter writerCumul        = new vsCSVWriter(OutputFolder + "Results.csv");
            string      titleCombined      = "Mixed Sample,Precursor";
            string      curveStr           = "Polynomial Curve,";
            string      spikedIntensityStr = "Area under the curve,";

            foreach (double precursor in characterizedPeptides.Keys)
            {
                foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values)
                {
                    titleCombined += "," + charPrec.Peptide.Sequence + " Charge " + charPrec.Charge;

                    if (charPrec.eCurve.Coefficients != null && charPrec.eCurve.Coefficients.Length == 3)
                    {
                        curveStr += "," + charPrec.eCurve.Coefficients[0] + "x^2 + " + charPrec.eCurve.Coefficients[1] + "x" + charPrec.eCurve.Coefficients[2];
                    }
                    else
                    {
                        curveStr += ",NA";
                    }

                    spikedIntensityStr += "," + charPrec.eCurve.Area;
                }
            }
            writerCumul.AddLine(titleCombined);
            writerCumul.AddLine(curveStr);
            writerCumul.AddLine(spikedIntensityStr);

            //mixedPrecursors = new Dictionary<Sample, Dictionary<double, MixedPrecursor>>();
            mixedPrecursors = new Dictionary <Sample, List <MixedPrecursor> >();

            foreach (Sample mixedSample in MixedSamples)
            {
                mixedPrecursors.Add(mixedSample, MixedPrecursor.GetMixedPrecursors(mixedSample, mixedResult, dbOptions, characterizedPeptides));
            }

            //Get the list of precursors to characterize
            foreach (Sample mixedSample in MixedSamples)
            {
                foreach (double keyMz in characterizedPeptides.Keys)
                {
                    List <Dictionary <Peptide, ElutionCurve> > listOfRatios = new List <Dictionary <Peptide, ElutionCurve> >();
                    foreach (MixedPrecursor mPrec in mixedPrecursors[mixedSample])
                    {
                        if (mPrec.MZ == keyMz)
                        {
                            // Compute Max Flow for this precursor
                            Dictionary <Peptide, ElutionCurve> ratios = GetRatiosNoSpikes(mPrec, precision);
                            listOfRatios.Add(ratios);

                            ExportMixedSampleResult(ratios, mixedSample, mPrec, keyMz, dbOptions);
                        }
                    }

                    /*
                     * string resultStr = vsCSV.GetFileName(mixedSample.sSDF) + "," + keyMz;
                     * foreach (double precursor in characterizedPeptides.Keys)
                     * {
                     *  foreach (Peptide charPrec in characterizedPeptides[precursor].Values)
                     *  {
                     *      double cumulArea = 0.0;
                     *      foreach (Dictionary<Peptide, ElutionCurve> ratios in listOfRatios)
                     *          if (ratios.ContainsKey(charPrec))
                     *              cumulArea += ratios[charPrec].Area;
                     *      resultStr += "," + cumulArea;
                     *  }
                     * }
                     * writerCumul.AddLine(resultStr);//*/
                }
            }
            writerCumul.WriteToFile();
        }