private static void ExportSpikedSampleResult(Dictionary <double, Dictionary <Sample, CharacterizedPrecursor> > characterizedPeptides, DBOptions dbOptions)
        {
            foreach (double keyMz in characterizedPeptides.Keys)
            {
                foreach (Sample sample in characterizedPeptides[keyMz].Keys)
                {
                    vsCSVWriter writerRatio     = new vsCSVWriter(dbOptions.OutputFolder + @"IndividualNoSpike\" + vsCSV.GetFileName_NoExtension(sample.sSDF) + "_" + keyMz + "MZ.csv");
                    string      titleIndividual = "Scan time,Precursor Intensity,Intensity Per Millisecond";
                    foreach (ProductMatch pm in characterizedPeptides[keyMz][sample].AllFragments)
                    {
                        titleIndividual += "," + pm.Fragment.Name + pm.fragmentPos + "^" + pm.charge;
                    }
                    writerRatio.AddLine(titleIndividual);

                    foreach (Query query in characterizedPeptides[keyMz][sample].Queries)
                    {
                        string line = query.spectrum.RetentionTimeInMin + "," + query.spectrum.PrecursorIntensity + "," + query.spectrum.PrecursorIntensityPerMilliSecond;
                        foreach (ProductMatch pm in characterizedPeptides[keyMz][sample].AllFragments)
                        {
                            double intensity = 0.0;
                            foreach (ProductMatch pmSpec in query.psms[0].AllProductMatches)
                            {
                                if (pmSpec.charge == pm.charge && pmSpec.Fragment == pm.Fragment && pmSpec.fragmentPos == pm.fragmentPos)
                                {
                                    intensity = pmSpec.obsIntensity;
                                }
                            }
                            line += "," + intensity;
                        }
                        writerRatio.AddLine(line);
                    }
                    writerRatio.WriteToFile();
                }
            }
        }
        private static void ExportMixedSampleResult(Dictionary <Peptide, MaxFlowElutionCurve> ratios, Sample mixedSample, MixedPrecursor mixedPrecursor, double keyMz, DBOptions dbOptions)
        {
            // Export results in a file
            vsCSVWriter writerRatio     = new vsCSVWriter(dbOptions.OutputFolder + @"IndividualNoSpike\" + vsCSV.GetFileName_NoExtension(mixedSample.sSDF) + "_" + keyMz + "MZ_" + mixedPrecursor.Queries[0].spectrum.RetentionTimeInMin + "min.csv");
            string      titleIndividual = "Scan time,Total Area";

            foreach (Peptide charPep in ratios.Keys)
            {
                titleIndividual += "," + charPep.Sequence;
            }
            writerRatio.AddLine(titleIndividual);

            string line = "Total," + mixedPrecursor.eCurveIntensityCount.Area;

            foreach (Peptide charPep in ratios.Keys)
            {
                line += "," + ratios[charPep].eCurvePerMs.Area;
            }
            writerRatio.AddLine(line);

            for (int i = 0; i < mixedPrecursor.eCurveIntensityCount.intensityCount.Count; i++)
            {
                line = mixedPrecursor.eCurveIntensityCount.time[i] / (1000.0 * 60.0) + "," + mixedPrecursor.eCurveIntensityCount.intensityCount[i];
                foreach (Peptide charPep in ratios.Keys)
                {
                    line += "," + ratios[charPep].eCurvePerMs.InterpolateIntensity(mixedPrecursor.eCurveIntensityCount.time[i]);
                }
                writerRatio.AddLine(line);
            }
            writerRatio.WriteToFile();
        }
Exemplo n.º 3
0
        public static void AppendProteinDescriptionToMascotReport(string csvMascotFile, string fastaFile, string csvFileOut)
        {
            vsCSV       csvMascot = new vsCSV(csvMascotFile);
            vsCSVWriter writer    = new vsCSVWriter(csvFileOut);

            try
            {
                FileStream fs;
                try
                {
                    fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.Read);
                }
                catch (System.Exception)
                {
                    fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
                }

                Dictionary <string, string> DicOfProt = new Dictionary <string, string>();
                foreach (string line in csvMascot.LINES_LIST)
                {
                    string[] splits = line.Split(',');
                    if (splits.Length > 2 && !DicOfProt.ContainsKey(splits[2]))
                    {
                        DicOfProt.Add(splits[2], "");
                    }
                }
                using (StreamReader sr = new StreamReader(fs))
                {
                    string line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (line.StartsWith(">"))
                        {
                            string[] split = line.Substring(1).Split(' ');
                            if (DicOfProt.ContainsKey(split[0]))
                            {
                                DicOfProt[split[0]] = line.Substring(split[0].Length + 1);
                            }
                        }
                    }
                }
                foreach (string line in csvMascot.LINES_LIST)
                {
                    string[] splits      = line.Split(',');
                    string   lineToWrite = line;
                    if (splits.Length > 2 && DicOfProt.ContainsKey(splits[2]))
                    {
                        lineToWrite += "," + DicOfProt[splits[2]];
                    }
                    writer.AddLine(lineToWrite);
                }
                writer.WriteToFile();
            }
            catch (System.Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
        }
Exemplo n.º 4
0
        public static void SeparateForwardAndReverse(string fastaFile)
        {
            try
            {
                FileStream fs;
                try
                {
                    fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.Read);
                }
                catch (System.Exception)
                {
                    fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
                }

                vsCSVWriter wrForward = new vsCSVWriter(vsCSV.GetFolder(fastaFile) + vsCSV.GetFileName_NoExtension(fastaFile) + "_ForwardOnly.fasta");
                vsCSVWriter wrReverse = new vsCSVWriter(vsCSV.GetFolder(fastaFile) + vsCSV.GetFileName_NoExtension(fastaFile) + "_ReverseOnly.fasta");

                using (StreamReader sr = new StreamReader(fs))
                {
                    ProteinIdType idType = ProteinIdType.Unknown;
                    string        line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (line.StartsWith(">"))
                        {
                            if (line.StartsWith(">REVERSE_"))
                            {
                                idType = ProteinIdType.Reverse;
                            }
                            else
                            {
                                idType = ProteinIdType.Forward;
                            }
                        }
                        switch (idType)
                        {
                        case ProteinIdType.Forward: wrForward.AddLine(line); break;

                        case ProteinIdType.Reverse: wrReverse.AddLine(line); break;

                        case ProteinIdType.Unknown: break;
                        }
                    }
                }
                fs.Close();
                wrForward.WriteToFile();
                wrReverse.WriteToFile();
            }
            catch (System.Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
        }
        public static void Export(string filename, List <PeptideMatch> peptides)
        {
            vsCSVWriter writer = new vsCSVWriter(filename);

            writer.AddLine("Sequence,Variable Modification,Score,Decoy,Precursor Mass Error");
            foreach (PeptideMatch pm in peptides)
            {
                writer.AddLine(pm.peptide.BaseSequence + "," + pm.peptide.Sequence + "," + pm.ProbabilityScore() + "," + pm.peptide.Decoy + "," + pm.GetPrecursorMassError());
            }
            writer.WriteToFile();
        }
        public static void Export(string filename, List <ProteinGroupMatch> proteins)
        {
            vsCSVWriter writer = new vsCSVWriter(filename);

            writer.AddLine(ProteinGroupMatch.Header);
            foreach (ProteinGroupMatch group in proteins)
            {
                writer.AddLine(group.ToString());
            }
            writer.WriteToFile();
        }
        public static bool Run()
        {
            string      outputPath = @"C:\_IRIC\DATA\Test\testMhc\Stats\";
            vsCSVWriter writer     = new vsCSVWriter(outputPath + "output.csv");

            writer.AddLine("File,# MS1s,# MSMS,1 Charge,2 Charge,3 Charge,4 Charge,5 Charge,6 Charge,7 Charge,8 Charge,9 Charge,10 Charge,11 Charge,12 Charge,13 Charge,14 Charge");

            DBOptions options = MhcSample.CreateOptions(outputPath);

            string[] files = new string[] { @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL.raw",
                                            @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS60_MSMS15.raw",
                                            @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS60_MSMS30.raw",
                                            @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS60_MSMS60.raw",
                                            @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS120_MSMS15.raw",
                                            @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS120_MSMS30.raw",
                                            @"N:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUL29_2013\Settepeptides_300713_10uL_MS120_MSMS60.raw" };
            foreach (string file in files)
            {
                pwiz.CLI.msdata.MSDataFile msFile = new pwiz.CLI.msdata.MSDataFile(file);
                Spectra spectra = Spectra.Load(msFile, options, file);
                spectra.Sort(ProductSpectrum.AscendingPrecursorMassComparison);

                Dictionary <Track, Precursor> DicOfComputedTracks = new Dictionary <Track, Precursor>();
                int[] charges = new int[14];
                foreach (Track track in spectra.tracks)
                {
                    if (!DicOfComputedTracks.ContainsKey(track))
                    {
                        DicOfComputedTracks.Add(track, null);
                        int charge = 0;
                        foreach (Precursor precursor in Queries.GetIsotopes(track, options, spectra.tracks, null))
                        {
                            if (precursor.Charge > 0)
                            {
                                charge = precursor.Charge;
                            }
                            if (!DicOfComputedTracks.ContainsKey(precursor.Track))
                            {
                                DicOfComputedTracks.Add(precursor.Track, precursor);
                            }
                        }
                        charges[charge]++;
                    }
                }
                string line = file + "," + spectra.MS1s.Count + "," + spectra.Count;
                for (int i = 0; i < charges.Length; i++)
                {
                    line += "," + charges[i];
                }
                writer.AddLine(line);
            }
            writer.WriteToFile();
            return(true);
        }
Exemplo n.º 8
0
        public void Export(string filename)
        {
            vsCSVWriter writer = new vsCSVWriter(filename);

            writer.AddLine(Track.TITLE);

            foreach (Track track in this)
            {
                writer.AddLine(track.ToString());
            }
            writer.WriteToFile();
        }
Exemplo n.º 9
0
        public void ExportMSMS(string filename)
        {
            vsCSVWriter writer = new vsCSVWriter(filename);

            writer.AddLine(ProductSpectrum.TITLE);

            foreach (ProductSpectrum spectrum in this)
            {
                writer.AddLine(spectrum.ToString());
            }
            writer.WriteToFile();
        }
Exemplo n.º 10
0
        public static void ShuffleSequences(string fastaFile)
        {
            try
            {
                FileStream fs;
                try
                {
                    fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.Read);
                }
                catch (System.Exception)
                {
                    fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
                }

                vsCSVWriter wrShuffled = new vsCSVWriter(vsCSV.GetFolder(fastaFile) + vsCSV.GetFileName_NoExtension(fastaFile) + "_Shuffled.fasta");

                using (StreamReader sr = new StreamReader(fs))
                {
                    ProteinIdType idType = ProteinIdType.Unknown;
                    string        line;
                    string        aaSeq = "";
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (line.StartsWith(">"))
                        {
                            if (!string.IsNullOrEmpty(aaSeq))
                            {
                                wrShuffled.AddLine(Proteomics.Utilities.Tools.AminoAcidTools.Shuffle(aaSeq));
                            }
                            wrShuffled.AddLine(line);
                            aaSeq = "";
                        }
                        else
                        {
                            aaSeq += line;
                        }
                    }

                    if (!string.IsNullOrEmpty(aaSeq))
                    {
                        wrShuffled.AddLine(Proteomics.Utilities.Tools.AminoAcidTools.Shuffle(aaSeq));
                    }
                }
                fs.Close();
                wrShuffled.WriteToFile();
            }
            catch (System.Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
        }
Exemplo n.º 11
0
        public static bool MascotCompare()
        {
            List <string> listMascotFiles = new List <string>();

            listMascotFiles.Add(@"N:\Thibault\Frederic Lamoliatte\Olivier\no MSed.csv");
            listMascotFiles.Add(@"N:\Thibault\Frederic Lamoliatte\Olivier\MSed1.csv");
            listMascotFiles.Add(@"N:\Thibault\Frederic Lamoliatte\Olivier\MSed2.csv");

            Dictionary <string, string[]> dicOfPep = new Dictionary <string, string[]>();

            for (int i = 0; i < listMascotFiles.Count; i++)
            {
                vsCSV csv = new vsCSV(listMascotFiles[i]);
                foreach (string line in csv.LINES_LIST)
                {
                    string[] splits = line.Split(vsCSV._Generic_Separator);
                    if (splits.Length > 26)
                    {
                        string key = splits[1] + "," + splits[33];
                        if (!dicOfPep.ContainsKey(key))//raw+scan+seq+mod
                        {
                            dicOfPep.Add(key, new string[3]);
                        }

                        dicOfPep[key][i] = "," + splits[13] + "," + splits[14] + "," + splits[18] + "," + splits[26];
                    }
                }
            }

            vsCSVWriter writer = new vsCSVWriter(@"C:\_IRIC\DATA\Sumo\outputCompare.csv");

            foreach (string key in dicOfPep.Keys)
            {
                string str = key;
                for (int i = 0; i < listMascotFiles.Count; i++)
                {
                    if (dicOfPep[key][i] != null)
                    {
                        str += dicOfPep[key][i];
                    }
                    else
                    {
                        str += ",,,,";
                    }
                }
                writer.AddLine(str);
            }
            writer.WriteToFile();

            return(true);
        }
Exemplo n.º 12
0
        public static void Export(string filename, List <PeptideSpectrumMatch> psms)
        {
            vsCSVWriter writer = new vsCSVWriter(filename);

            writer.AddLine("Mz,Rt,Charge,Sequence,Modifications,Precursor Score,Product Score,Intensity Score,Final Score,Precursor Mass Error,Decoy?,Protein Score");

            foreach (PeptideSpectrumMatch psm in psms)
            {
                writer.AddLine(psm.Query.precursor.Track.MZ + "," + psm.Query.spectrum.RetentionTimeInMin + "," + psm.Query.precursor.Charge +
                               "," + psm.Peptide.BaseSequence + "," + psm.Peptide.Sequence + "," + psm.PrecursorScore + "," + psm.ProductScore + "," + psm.IntensityScore + "," + psm.ProbabilityScore() + "," +
                               psm.PrecursorMzError + "," + psm.Decoy + "," + psm.ProteinScore);
            }

            writer.WriteToFile();
        }
Exemplo n.º 13
0
        public static void AddIPIToUbiPredFile(string fastaFile, string csvUbiFile, string csvFileOut)
        {
            vsCSV       csv    = new vsCSV(csvUbiFile);
            vsCSVWriter writer = new vsCSVWriter(csvFileOut);

            try
            {
                FileStream fs;
                try
                {
                    fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.Read);
                }
                catch (System.Exception)
                {
                    fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
                }

                Dictionary <int, string> IDs = new Dictionary <int, string>();
                using (StreamReader sr = new StreamReader(fs))
                {
                    int    idNb = 0;
                    string line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (line.StartsWith(">"))
                        {
                            idNb++;

                            foreach (string csvLine in csv.LINES_LIST)
                            {
                                string[] splits = csvLine.Split(vsCSV._Generic_Separator);
                                if (line.Contains(splits[1]))
                                {
                                    writer.AddLine(csvLine + "," + line.Substring(splits[1].Length + 1, 13));
                                }
                            }
                        }
                    }
                }
                fs.Close();
                writer.WriteToFile();
            }
            catch (System.Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
        }
Exemplo n.º 14
0
        public static void AppendUbiPredToMascotReport(string csvUbiFile, string csvMascotFile, string csvFileOut)
        {
            vsCSV       csvUbi    = new vsCSV(csvUbiFile);
            vsCSV       csvMascot = new vsCSV(csvMascotFile);
            vsCSVWriter writer    = new vsCSVWriter(csvFileOut);

            try
            {
                foreach (string lineMascot in csvMascot.LINES_LIST)
                {
                    string strToAppend = "";
                    try
                    {
                        string[] mSplits = lineMascot.Split(vsCSV._Generic_Separator);
                        if (mSplits.Length >= 17 && !lineMascot.StartsWith("Search"))
                        {
                            int indexStart = int.Parse(mSplits[16]);
                            int indexStop  = int.Parse(mSplits[17]);
                            foreach (string lineUbi in csvUbi.LINES_LIST)
                            {
                                string[] splits   = lineUbi.Split(vsCSV._Generic_Separator);
                                int      indexUbi = int.Parse(splits[2]);
                                if (splits[4].Contains(mSplits[3]) && indexUbi >= indexStart && indexUbi <= indexStop)
                                {
                                    strToAppend += "," + splits[2] + "," + splits[3];
                                }
                            }
                        }
                    }
                    catch (System.Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                        Console.WriteLine(ex.StackTrace);
                    }
                    writer.AddLine(lineMascot + strToAppend);
                }
                writer.WriteToFile();
            }
            catch (System.Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
        }
Exemplo n.º 15
0
        public void ExportFragmentIntensitiesForAllPSM(List <PeptideSpectrumMatch> psms, Peptide peptide, int psmCharge, string fileName)
        {
            vsCSVWriter writer = new vsCSVWriter(fileName);
            string      title  = "Retention Time";

            for (int i = 1; i <= peptide.Length; i++)
            {
                for (int charge = 1; charge <= psmCharge; charge++)
                {
                    foreach (FragmentClass fragment in dbOptions.fragments)
                    {
                        title += "," + i + fragment.Name + " ^" + charge;
                    }
                }
            }
            writer.AddLine(title);

            foreach (PeptideSpectrumMatch psm in psms)
            {
                string line = psm.Query.spectrum.RetentionTimeInMin.ToString();
                for (int i = 1; i <= peptide.Length; i++)
                {
                    for (int charge = 1; charge <= psmCharge; charge++)
                    {
                        foreach (FragmentClass fragment in dbOptions.fragments)
                        {
                            double cumul = 0.0;
                            foreach (ProductMatch match in psm.AllProductMatches)
                            {
                                if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge)
                                {
                                    cumul += match.obsIntensity;
                                }
                            }
                            line += "," + cumul;
                        }
                    }
                }
                writer.AddLine(line);
            }
            writer.WriteToFile();
        }
Exemplo n.º 16
0
        public static void ToCSV(string rawFileName, string csvOutFileName)
        {
            vsCSVWriter csvWriter = new vsCSVWriter(csvOutFileName);

            csvWriter.AddLine("Scan Number,Retention Time (min),Ms Level");

            pwiz.CLI.msdata.MSDataFile msFile = new pwiz.CLI.msdata.MSDataFile(rawFileName);

            int num_spectra = msFile.run.spectrumList.size();

            for (int i = 0; i < num_spectra; i++)
            {
                //Spectrum
                pwiz.CLI.msdata.Spectrum mySpec = msFile.run.spectrumList.spectrum(i, false);

                double retention_time = mySpec.scanList.scans[0].cvParam(pwiz.CLI.cv.CVID.MS_scan_start_time).timeInSeconds() / 60.0;
                csvWriter.AddLine((i + 1) + "," + retention_time + "," + mySpec.cvParam(pwiz.CLI.cv.CVID.MS_ms_level).value);
            }
            csvWriter.WriteToFile();
        }
Exemplo n.º 17
0
        public static void Export(string filename, List <Precursor> precursors)
        {
            vsCSVWriter writer = new vsCSVWriter(filename);

            writer.AddLine("Index.Mz,Rt,Precursor Mz,Charge,Most Intense Charge,Precursor Mass,Peptide Mass,Sequence,Modified Sequence,Precursor Score,Product Score,Intensity Score,Final Score,Precursor Mass Error,Decoy?,Protein Score");

            foreach (Precursor precursor in precursors)
            {
                string line = precursor.INDEX + "," + precursor.Track.RT + "," + precursor.Track.MZ + "," + precursor.Charge + "," + precursor.GetMostIntenseCharge() + "," + precursor.Mass + ",";
                PeptideSpectrumMatch match = precursor.OptimizedBestPsm();
                if (match != null)
                {
                    line += match.Peptide.MonoisotopicMass + "," + match.Peptide.BaseSequence + "," + match.Peptide.Sequence + "," + match.PrecursorScore + "," + match.ProductScore + "," + match.IntensityScore + "," + precursor.ProbabilityScore(match.Peptide) + "," +
                            match.PrecursorMzError + "," + match.Decoy + "," + match.ProteinScore;
                }
                writer.AddLine(line);
            }

            writer.WriteToFile();
        }
Exemplo n.º 18
0
        public static void Export(string filename, IEnumerable <Query> queries)
        {
            vsCSVWriter writer = new vsCSVWriter(filename);

            writer.AddLine("Spectrum Precursor Mz,Rt,Charge,BaseSequence,Sequence,Precursor Score,Product Score,Intensity Score,Final Score,Precursor Mass Error,Decoy?,Protein Score");

            foreach (Query query in queries)
            {
                string line = query.spectrum.PrecursorMZ + "," + query.precursor.Track.RT + "," + query.precursor.Charge + ",";
                PeptideSpectrumMatch match = query.precursor.OptimizedBestPsm();
                if (match != null)
                {
                    line += match.Peptide.BaseSequence + "," + match.Peptide.Sequence + "," + match.PrecursorScore + "," + match.ProductScore + "," + match.IntensityScore + "," + query.ScoreFct(match.Peptide) + "," +
                            match.PrecursorMzError + "," + match.Decoy + "," + match.ProteinScore;
                }
                writer.AddLine(line);
            }

            writer.WriteToFile();
        }
Exemplo n.º 19
0
        public static void ExportAllVariables(string fileName, Dictionary <DateTime, List <Variable> > DicOfTime, Dictionary <string, List <Variable> > DicOfVar, Dictionary <string, double> DicOfCorr)
        {
            vsCSVWriter writer = new vsCSVWriter(fileName);
            string      title  = "Date";
            string      corr   = "";

            foreach (string key in DicOfVar.Keys)
            {
                title += "," + key;
                corr  += "," + DicOfCorr[key];
            }
            writer.AddLine(title);
            writer.AddLine(corr);

            foreach (DateTime key in DicOfTime.Keys)
            {
                string newLine = key.ToString();
                foreach (string nameToMatch in DicOfVar.Keys)
                {
                    string varVal = ",";
                    foreach (Variable v in DicOfTime[key])
                    {
                        if (v.name.CompareTo(nameToMatch) == 0)
                        {
                            if (v.name.CompareTo("price") == 0)
                            {
                                varVal = "," + v.value;
                            }
                            else
                            {
                                varVal = "," + v.normValue;
                            }
                        }
                    }
                    newLine += varVal;
                }
                writer.AddLine(newLine);
            }
            writer.WriteToFile();
        }
Exemplo n.º 20
0
        /// <summary>
        /// Provides deconvoluted elution curves of mixed spectra from the provided raw files using the provided synthetic raw file
        /// Exports in CSV files and stores everything in class objects
        /// </summary>
        /// <param name="spikedRaws"></param>
        /// <param name="mixedRaws"></param>
        /// <param name="fastaFile"></param>
        /// <param name="folderToOutputTo"></param>
        /// <param name="conSol"></param>
        public void Solve(string[] spikedRaws, string[] mixedRaws, string fastaFile, string folderToOutputTo, IConSol conSol)
        {
            dbOptions     = CreateOptions(fastaFile, folderToOutputTo, precTolPpm, prodTolDa, conSol);
            SpikedSamples = new Samples(dbOptions);
            for (int i = 0; i < spikedRaws.Length; i++)
            {
                SpikedSamples.Add(new Sample(i + 1, 1, 1, spikedRaws[i], spikedRaws[i], 0, ""));
            }

            //Precompute Spiked peptide identifications
            SpikedResult = Ace.Start(dbOptions, SpikedSamples, false, false);
            SpikedResult.ExportPSMs(1, dbOptions.OutputFolder + "Identifications" + System.IO.Path.DirectorySeparatorChar + "SpikedSamplesPSMs.csv");

            MixedSamples = new Samples(dbOptions);
            for (int i = 0; i < mixedRaws.Length; i++)
            {
                MixedSamples.Add(new Sample(i + 1, 1, 1, mixedRaws[i], mixedRaws[i], 0, ""));
            }

            //Precompute Mixed peptide identifications
            mixedResult = Ace.Start(dbOptions, MixedSamples, false, false);
            if (mixedResult == null)
            {
                conSol.WriteLine("OOPS! No queries could be extracted from the list of mixed spectrum files...");
            }
            else
            {
                mixedResult.ExportPSMs(1, dbOptions.OutputFolder + "Identifications" + System.IO.Path.DirectorySeparatorChar + "MixedSamplesPSMs.csv");

                conSol.WriteLine("Computing gradient descents...");

                //Compute all usable spiked peptides
                characterizedPeptides = CharacterizedPrecursor.GetSpikedPrecursors(SpikedSamples, SpikedResult, dbOptions, nbMinFragments, nbMaxFragments);
                ExportSpikedSampleResult(characterizedPeptides, dbOptions);

                vsCSVWriter writerCumul        = new vsCSVWriter(OutputFolder + "Results.csv");
                string      titleCombined      = "Mixed Sample,Precursor";
                string      curveStr           = "Polynomial Curve,";
                string      spikedIntensityStr = "Area under the curve,";
                foreach (double precursor in characterizedPeptides.Keys)
                {
                    foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values)
                    {
                        titleCombined += "," + charPrec.Peptide.Sequence + " Charge " + charPrec.Charge;

                        if (charPrec.eCurveIntensityCount.Coefficients != null && charPrec.eCurveIntensityCount.Coefficients.Length == 3)
                        {
                            curveStr += "," + charPrec.eCurveIntensityCount.Coefficients[0] + "x^2 + " + charPrec.eCurveIntensityCount.Coefficients[1] + "x" + charPrec.eCurveIntensityCount.Coefficients[2];
                        }
                        else
                        {
                            curveStr += ",NA";
                        }

                        spikedIntensityStr += "," + charPrec.eCurveIntensityCount.Area;
                    }
                }
                writerCumul.AddLine(titleCombined);
                writerCumul.AddLine(curveStr);
                writerCumul.AddLine(spikedIntensityStr);

                mixedPrecursors = new Dictionary <Sample, List <MixedPrecursor> >();

                foreach (Sample mixedSample in MixedSamples)
                {
                    mixedPrecursors.Add(mixedSample, MixedPrecursor.GetMixedPrecursors(mixedSample, mixedResult, dbOptions, characterizedPeptides));
                }

                //Get the list of precursors to characterize
                foreach (Sample mixedSample in MixedSamples)
                {
                    foreach (double keyMz in characterizedPeptides.Keys)
                    {
                        List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> > listOfRatios = new List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> >();
                        foreach (MixedPrecursor mPrec in mixedPrecursors[mixedSample])
                        {
                            if (mPrec.MZ == keyMz)
                            {
                                // Compute Max Flow for this precursor
                                Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratios = GetRatios(characterizedPeptides, mPrec, dbOptions, nbMinFragments, nbMaxFragments);
                                listOfRatios.Add(ratios);

                                ExportMixedSampleResult(ratios, mixedSample, mPrec, keyMz, dbOptions);
                            }
                        }

                        bool   isEmpty   = true;
                        string resultStr = vsCSV.GetFileName(mixedSample.sSDF) + "," + keyMz;
                        foreach (double precursor in characterizedPeptides.Keys)
                        {
                            foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values)
                            {
                                double cumulArea = 0.0;
                                foreach (Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratios in listOfRatios)
                                {
                                    if (ratios.ContainsKey(charPrec))
                                    {
                                        cumulArea += ratios[charPrec].eCurvePerMs.Area;
                                    }
                                }
                                resultStr += "," + cumulArea;
                                if (cumulArea > 0)
                                {
                                    isEmpty = false;
                                }
                            }
                        }
                        if (!isEmpty)
                        {
                            writerCumul.AddLine(resultStr);
                        }
                    }
                }
                writerCumul.WriteToFile();

                //List Modifications
                Dictionary <Modification, double> dicOfIntensityPerMod = new Dictionary <Modification, double>();
                foreach (Sample sample in mixedPrecursors.Keys)
                {
                    foreach (MixedPrecursor mP in mixedPrecursors[sample])
                    {
                        foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys)
                        {
                            if (cP.Peptide.VariableModifications != null)
                            {
                                foreach (Modification mod in cP.Peptide.VariableModifications.Values)
                                {
                                    if (!dicOfIntensityPerMod.ContainsKey(mod))
                                    {
                                        dicOfIntensityPerMod.Add(mod, 0.0);
                                    }
                                }
                            }
                        }
                    }
                }


                //Compute site occupancy for identical sequences (real positionnal isomers)
                vsCSVWriter    writerSitesOccupancy = new vsCSVWriter(OutputFolder + "Results_SiteOccupancy.csv");
                List <Protein> AllProteins          = Ace.ReadProteomeFromFasta(fastaFile, false, dbOptions);
                foreach (Protein protein in AllProteins)
                {
                    string newTitleProtein = protein.Description.Replace(',', ' ') + "," + protein.Sequence;
                    for (int i = 0; i < protein.Sequence.Length; i++)
                    {
                        newTitleProtein += "," + protein[i].ToString();
                    }
                    writerSitesOccupancy.AddLine(newTitleProtein);

                    foreach (Sample mixedSample in mixedPrecursors.Keys)
                    {
                        string coverage = "Coverage," + mixedSample.Name;
                        for (int i = 0; i < protein.Sequence.Length; i++)
                        {
                            double cumulSite = 0.0;
                            newTitleProtein += "," + protein[i].ToString();
                            foreach (MixedPrecursor mP in mixedPrecursors[mixedSample])
                            {
                                foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys)
                                {
                                    if (i + 1 >= cP.Peptide.StartResidueNumber && i + 1 <= cP.Peptide.EndResidueNumber)
                                    {
                                        cumulSite += mP.PeptideRatios[cP].eCurvePerMs.Area;
                                    }
                                }
                            }
                            coverage += "," + cumulSite;
                        }
                        writerSitesOccupancy.AddLine(coverage);
                    }

                    foreach (Modification mod in dicOfIntensityPerMod.Keys)
                    {
                        Dictionary <Sample, string> dicOfLines = new Dictionary <Sample, string>();
                        for (int i = 0; i < protein.Sequence.Length; i++)
                        {
                            foreach (Sample mixedSample in mixedPrecursors.Keys)
                            {
                                double cumulModArea = 0.0;
                                foreach (MixedPrecursor mP in mixedPrecursors[mixedSample])
                                {
                                    foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys)
                                    {
                                        if (i + 1 >= cP.Peptide.StartResidueNumber && i + 1 <= cP.Peptide.EndResidueNumber &&
                                            cP.Peptide.VariableModifications != null)
                                        {
                                            foreach (int pos in cP.Peptide.VariableModifications.Keys)
                                            {
                                                if (cP.Peptide.StartResidueNumber + pos - 2 == i + 1 && cP.Peptide.VariableModifications[pos] == mod)
                                                {
                                                    cumulModArea += mP.PeptideRatios[cP].eCurvePerMs.Area;
                                                }
                                            }
                                        }
                                    }
                                }
                                if (!dicOfLines.ContainsKey(mixedSample))
                                {
                                    dicOfLines.Add(mixedSample, mod.Description + "," + mixedSample.Name + "," + cumulModArea);
                                }
                                else
                                {
                                    dicOfLines[mixedSample] += "," + cumulModArea;
                                }
                            }
                        }
                        foreach (string line in dicOfLines.Values)
                        {
                            writerSitesOccupancy.AddLine(line);
                        }
                    }
                }
                writerSitesOccupancy.WriteToFile();
            }
        }
Exemplo n.º 21
0
        public static void ProjectMerge(PeptidAce.Utilities.Interfaces.IConSol console)
        {
            string    strProjectAll = @"C:\_IRIC\Data\NB\ProjectFile_EverythingReplicates_Oct.csv";
            string    project       = @"C:\_IRIC\Data\NB\ProjectTest_AllAce_Spiked_19Oct.csv";
            string    fastaFile     = @"C:\_IRIC\Data\NB\peptide.fasta";
            DBOptions options       = PositionnalIsomerSolver.CreateOptions(fastaFile, @"C:\_IRIC\Data\NB\Units\", 8, 0.05, console);
            Samples   samplesMixed  = new Samples(strProjectAll, 0, options);
            Samples   samplesSynth  = new Samples(project, 0, options);

            PositionnalIsomerSolver newSolver = new PositionnalIsomerSolver();

            newSolver.precTolPpm     = 15;
            newSolver.prodTolDa      = 0.05;
            newSolver.nbMinFragments = 5;
            newSolver.nbMaxFragments = 5;

            string[] synths = new string[samplesSynth.Count];
            for (int i = 0; i < synths.Length; i++)
            {
                synths[i] = samplesSynth[i].sSDF;
            }

            string[] mixed = new string[samplesMixed.Count];
            for (int i = 0; i < mixed.Length; i++)
            {
                mixed[i] = samplesMixed[i].sSDF;
            }

            newSolver.Solve(synths, mixed, fastaFile, Utilities.vsCSV.GetFolder(mixed[0]), options.ConSole);

            //Precompute Spiked peptide identifications
            Result SpikedResult = Ace.Start(options, samplesSynth, false, false);

            Result mixedResult = Ace.Start(options, samplesMixed, false, false);

            //Compute all usable spiked peptides
            Dictionary <double, Dictionary <Sample, CharacterizedPrecursor> > characterizedPeptides = CharacterizedPrecursor.GetSpikedPrecursors(samplesSynth, SpikedResult, options, newSolver.nbMinFragments, newSolver.nbMaxFragments);

            Dictionary <Sample, List <MixedPrecursor> > mixedPrecursors = new Dictionary <Sample, List <MixedPrecursor> >();

            foreach (Sample mixedSample in samplesMixed)
            {
                mixedPrecursors.Add(mixedSample, MixedPrecursor.GetMixedPrecursors(mixedSample, mixedResult, options, characterizedPeptides));
            }

            Dictionary <Sample, List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> > > results = new Dictionary <Sample, List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> > >();

            //Get the list of precursors to characterize
            foreach (Sample mixedSample in samplesMixed)
            {
                foreach (double keyMz in characterizedPeptides.Keys)
                {
                    //List<Dictionary<CharacterizedPrecursor, MaxFlowElutionCurve>> listOfRatios = new List<Dictionary<CharacterizedPrecursor, MaxFlowElutionCurve>>();
                    foreach (MixedPrecursor mPrec in mixedPrecursors[mixedSample])
                    {
                        if (mPrec.MZ == keyMz)
                        {
                            // Compute Max Flow for this precursor
                            Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratios = PositionnalIsomerSolver.GetRatios(characterizedPeptides, mPrec, options, newSolver.nbMinFragments, newSolver.nbMaxFragments);

                            if (!results.ContainsKey(mixedSample))
                            {
                                results.Add(mixedSample, new List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> >());
                            }
                            results[mixedSample].Add(ratios);
                        }
                    }
                }
            }

            List <CharacterizedPrecursor> precursors = new List <CharacterizedPrecursor>();

            foreach (Dictionary <Sample, CharacterizedPrecursor> dic in characterizedPeptides.Values)
            {
                foreach (CharacterizedPrecursor cP in dic.Values)
                {
                    precursors.Add(cP);
                }
            }

            //Create average of each characterized peptide plus standard deviance
            vsCSVWriter writerArea = new vsCSVWriter(@"C:\_IRIC\Data\NB\Merge\stats_Area.csv");

            string lineC = "Count,";

            foreach (CharacterizedPrecursor cP in precursors)
            {
                lineC += cP.Peptide.Sequence + ",";
            }
            lineC += "Intensity per ms,";
            foreach (CharacterizedPrecursor cP in precursors)
            {
                lineC += cP.Peptide.Sequence + ",";
            }
            lineC += "Standard Deviation Count,";
            foreach (CharacterizedPrecursor cP in precursors)
            {
                lineC += cP.Peptide.Sequence + ",";
            }
            lineC += "Standard Deviation per ms,";
            foreach (CharacterizedPrecursor cP in precursors)
            {
                lineC += cP.Peptide.Sequence + ",";
            }
            writerArea.AddLine(lineC);

            foreach (int cond in samplesMixed.GetConditions())
            {
                Dictionary <CharacterizedPrecursor, Dictionary <int, MaxFlowElutionCurve> > deconvoluted = new Dictionary <CharacterizedPrecursor, Dictionary <int, MaxFlowElutionCurve> >();
                string sampleName = "";
                foreach (Sample mixedSample in results.Keys)
                {
                    if (mixedSample.PROJECT.CONDITION == cond)
                    {
                        sampleName = vsCSV.GetFileName_NoExtension(mixedSample.sSDF);
                        foreach (Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratio in results[mixedSample])
                        {
                            foreach (CharacterizedPrecursor cP in ratio.Keys)
                            {
                                if (ratio[cP].eCurveCount.Area > 0)
                                {
                                    if (!deconvoluted.ContainsKey(cP))
                                    {
                                        deconvoluted.Add(cP, new Dictionary <int, MaxFlowElutionCurve>());
                                    }

                                    if (deconvoluted[cP].ContainsKey(mixedSample.PROJECT.REPLICATE))
                                    {
                                        if (deconvoluted[cP][mixedSample.PROJECT.REPLICATE].eCurveCount.Area < ratio[cP].eCurveCount.Area)
                                        {
                                            deconvoluted[cP][mixedSample.PROJECT.REPLICATE] = ratio[cP];
                                        }
                                    }
                                    else
                                    {
                                        deconvoluted[cP].Add(mixedSample.PROJECT.REPLICATE, ratio[cP]);
                                    }
                                    //deconvoluted[cP].Add(ratio[cP]);
                                }
                            }
                        }
                    }
                }


                Dictionary <int, double> totalIntensityCount = new Dictionary <int, double>();
                Dictionary <int, double> totalIntensityPerMs = new Dictionary <int, double>();

                foreach (CharacterizedPrecursor cP in precursors)
                {
                    if (deconvoluted.ContainsKey(cP))
                    {
                        foreach (int keyRep in deconvoluted[cP].Keys)
                        //foreach (MaxFlowElutionCurve curve in deconvoluted[cP])
                        {
                            if (!totalIntensityCount.ContainsKey(keyRep))
                            {
                                totalIntensityCount.Add(keyRep, 0.0);
                                totalIntensityPerMs.Add(keyRep, 0.0);
                            }
                            MaxFlowElutionCurve curve = deconvoluted[cP][keyRep];
                            totalIntensityCount[keyRep] += curve.eCurveCount.Area;
                            totalIntensityPerMs[keyRep] += curve.eCurvePerMs.Area;
                        }
                    }
                }
                string lineArea    = sampleName + ",";
                string lineMS      = ",";
                string stdDevCount = "";
                string stdDevMS    = "";
                //1) Compute an average out of the replicates
                foreach (CharacterizedPrecursor cP in precursors)
                {
                    if (deconvoluted.ContainsKey(cP))
                    {
                        double averageAreaMS    = 0;
                        double averageAreaCount = 0;
                        foreach (MaxFlowElutionCurve curve in deconvoluted[cP].Values)
                        {
                            averageAreaCount += curve.eCurveCount.Area;
                            averageAreaMS    += curve.eCurvePerMs.Area;
                        }
                        if (averageAreaCount > 0)
                        {
                            averageAreaCount = (averageAreaCount / ((double)deconvoluted[cP].Count));
                            averageAreaMS    = (averageAreaMS / ((double)deconvoluted[cP].Count));

                            double        deNormAverageCount = 0.0;
                            double        deNormAveragePerMs = 0.0;
                            List <double> repAreaCount       = new List <double>();
                            List <double> repAreaMS          = new List <double>();
                            foreach (int keyRep in deconvoluted[cP].Keys)
                            {
                                MaxFlowElutionCurve curve = deconvoluted[cP][keyRep];
                                double tmpCount           = (curve.eCurveCount.Area / totalIntensityCount[keyRep]) * averageAreaCount;
                                deNormAverageCount += tmpCount;
                                repAreaCount.Add(tmpCount);
                                double tmpPerMs = (curve.eCurvePerMs.Area / totalIntensityPerMs[keyRep]) * averageAreaMS;
                                deNormAveragePerMs += tmpPerMs;
                                repAreaMS.Add(tmpPerMs);
                            }

                            lineArea += (deNormAverageCount / ((double)repAreaCount.Count)) + ",";
                            lineMS   += (deNormAveragePerMs / ((double)repAreaMS.Count)) + ",";
                            if (repAreaCount.Count > 1)
                            {
                                stdDevCount += MathNet.Numerics.Statistics.ArrayStatistics.StandardDeviation(repAreaCount.ToArray()) + ",";
                                stdDevMS    += MathNet.Numerics.Statistics.ArrayStatistics.StandardDeviation(repAreaMS.ToArray()) + ",";
                            }
                            else
                            {
                                stdDevCount += ",";
                                stdDevMS    += ",";
                            }
                        }
                        else
                        {
                            lineArea    += ",";
                            lineMS      += ",";
                            stdDevCount += ",";
                            stdDevMS    += ",";
                        }
                    }
                    else
                    {
                        lineArea    += ",";
                        lineMS      += ",";
                        stdDevCount += ",";
                        stdDevMS    += ",";
                    }
                }
                writerArea.AddLine(lineArea + lineMS + "," + stdDevCount + "," + stdDevMS);

                //2) Add replicates results (to use for standard deviation)
            }
            writerArea.WriteToFile();
        }
Exemplo n.º 22
0
        public void ExportFragmentIntensities(List <PeptideSpectrumMatch> psms, Peptide peptide, int psmCharge, string fileName)
        {
            vsCSVWriter          writer    = new vsCSVWriter(fileName);
            List <FragmentClass> fragments = new List <FragmentClass>();

            foreach (FragmentClass fragment in dbOptions.fragments)
            //foreach (string fragment in FragmentDictionary.Fragments.Keys)
            {
                bool found = false;
                foreach (ProductMatch match in dbOptions.fragments.ComputeFragments(peptide, psmCharge, dbOptions))
                {
                    if (fragment == match.Fragment)
                    {
                        found = true;
                        break;
                    }
                }
                if (found)
                {
                    fragments.Add(fragment);
                }
            }

            string title = "Cumulated Product Intensities";

            for (int charge = 1; charge <= psmCharge; charge++)
            {
                foreach (FragmentClass fragment in dbOptions.fragments)
                {
                    title += "," + fragment.Name + " ^" + charge;
                }
            }
            for (int charge = 1; charge <= psmCharge; charge++)
            {
                foreach (FragmentClass fragment in fragments)
                {
                    title += "," + fragment.Name + " ^" + charge;
                }
            }
            writer.AddLine(title);

            for (int i = 1; i <= peptide.Length; i++)
            {
                string line = i.ToString();
                for (int charge = 1; charge <= psmCharge; charge++)
                {
                    foreach (FragmentClass fragment in dbOptions.fragments)
                    {
                        double cumul = 0.0;
                        foreach (PeptideSpectrumMatch psm in psms)
                        {
                            foreach (ProductMatch match in psm.AllProductMatches)
                            {
                                if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge)
                                {
                                    cumul += match.obsIntensity;
                                }
                            }
                        }
                        line += "," + cumul;
                    }
                }
                for (int charge = 1; charge <= psmCharge; charge++)
                {
                    foreach (FragmentClass fragment in fragments)
                    {
                        double cumul = 0.0;
                        foreach (PeptideSpectrumMatch psm in psms)
                        {
                            foreach (ProductMatch match in psm.AllProductMatches)
                            {
                                if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge)
                                {
                                    cumul += match.obsIntensity;
                                }
                            }
                        }
                        line += "," + cumul;
                    }
                }
                writer.AddLine(line);
            }
            writer.WriteToFile();
        }
Exemplo n.º 23
0
        public void ExportFragments(PeptideSpectrumMatch psm)
        {
            vsCSVWriter          writer    = new vsCSVWriter(dbOptions.OutputFolder + psm.Peptide.Sequence + "_" + vsCSV.GetFileName_NoExtension(psm.Query.sample.sSDF) + "_" + psm.Query.precursor.Track.RT + ".csv");
            List <FragmentClass> fragments = new List <FragmentClass>();

            foreach (FragmentClass fragment in dbOptions.fragments)
            //foreach (string fragment in FragmentDictionary.Fragments.Keys)
            {
                bool found = false;
                foreach (ProductMatch match in dbOptions.fragments.ComputeFragments(psm.Peptide, psm.Query.precursor.Charge, dbOptions))
                {
                    if (fragment == match.Fragment)
                    {
                        found = true;
                        break;
                    }
                }
                if (found)
                {
                    fragments.Add(fragment);
                }
            }

            string title = "Theoretical Fragments";

            for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
            {
                foreach (FragmentClass fragment in dbOptions.fragments)
                {
                    title += "," + fragment.Name + " ^" + charge;
                }
            }
            for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
            {
                foreach (FragmentClass fragment in fragments)
                {
                    title += "," + fragment.Name + " ^" + charge;
                }
            }
            writer.AddLine(title);

            for (int i = 1; i <= psm.Peptide.Length; i++)
            {
                string line = i.ToString();
                for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
                {
                    foreach (FragmentClass fragment in dbOptions.fragments)
                    {
                        bool found = false;
                        foreach (ProductMatch match in dbOptions.fragments.ComputeFragments(psm.Peptide, psm.Query.precursor.Charge, dbOptions))
                        {
                            if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge)
                            {
                                line += "," + match.theoMz;
                                found = true;
                                break;
                            }
                        }
                        if (!found)
                        {
                            line += ",";
                        }
                    }
                }
                for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
                {
                    foreach (FragmentClass fragment in fragments)
                    {
                        bool found = false;
                        foreach (ProductMatch match in dbOptions.fragments.ComputeFragments(psm.Peptide, psm.Query.precursor.Charge, dbOptions))
                        {
                            if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge)
                            {
                                line += "," + match.theoMz;
                                found = true;
                                break;
                            }
                        }
                        if (!found)
                        {
                            line += ",";
                        }
                    }
                }
                writer.AddLine(line);
            }

            title = "Observed Fragments Intensities";
            for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
            {
                foreach (FragmentClass fragment in dbOptions.fragments)
                {
                    title += "," + fragment.Name + " ^" + charge;
                }
            }
            for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
            {
                foreach (FragmentClass fragment in fragments)
                {
                    title += "," + fragment.Name + " ^" + charge;
                }
            }
            writer.AddLine(title);

            for (int i = 1; i <= psm.Peptide.Length; i++)
            {
                string line = i.ToString();
                for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
                {
                    foreach (FragmentClass fragment in dbOptions.fragments)
                    {
                        bool found = false;
                        foreach (ProductMatch match in psm.AllProductMatches)
                        {
                            if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge)
                            {
                                line += "," + match.obsIntensity;
                                found = true;
                                break;
                            }
                        }
                        if (!found)
                        {
                            line += ",";
                        }
                    }
                }
                for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
                {
                    foreach (FragmentClass fragment in fragments)
                    {
                        bool found = false;
                        foreach (ProductMatch match in psm.AllProductMatches)
                        {
                            if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge)
                            {
                                line += "," + match.obsIntensity;
                                found = true;
                                break;
                            }
                        }
                        if (!found)
                        {
                            line += ",";
                        }
                    }
                }
                writer.AddLine(line);
            }

            title = "Observed Fragments Mz";
            for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
            {
                foreach (FragmentClass fragment in dbOptions.fragments)
                {
                    title += "," + fragment.Name + " ^" + charge;
                }
            }
            for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
            {
                foreach (FragmentClass fragment in fragments)
                {
                    title += "," + fragment.Name + " ^" + charge;
                }
            }
            writer.AddLine(title);

            for (int i = 1; i <= psm.Peptide.Length; i++)
            {
                string line = i.ToString();
                for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
                {
                    foreach (FragmentClass fragment in dbOptions.fragments)
                    {
                        bool found = false;
                        foreach (ProductMatch match in psm.AllProductMatches)
                        {
                            if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge)
                            {
                                line += "," + match.obsMz;
                                found = true;
                                break;
                            }
                        }
                        if (!found)
                        {
                            line += ",";
                        }
                    }
                }
                for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
                {
                    foreach (FragmentClass fragment in fragments)
                    {
                        bool found = false;
                        foreach (ProductMatch match in psm.AllProductMatches)
                        {
                            if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge)
                            {
                                line += "," + match.obsMz;
                                found = true;
                                break;
                            }
                        }
                        if (!found)
                        {
                            line += ",";
                        }
                    }
                }
                writer.AddLine(line);
            }

            title = "Error on Fragments";
            for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
            {
                foreach (FragmentClass fragment in dbOptions.fragments)
                {
                    title += "," + fragment.Name + " ^" + charge;
                }
            }
            for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
            {
                foreach (FragmentClass fragment in fragments)
                {
                    title += "," + fragment.Name + " ^" + charge;
                }
            }
            writer.AddLine(title);

            for (int i = 1; i <= psm.Peptide.Length; i++)
            {
                string line = i.ToString();
                for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
                {
                    foreach (FragmentClass fragment in dbOptions.fragments)
                    {
                        bool found = false;
                        foreach (ProductMatch match in psm.AllProductMatches)
                        {
                            if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge)
                            {
                                line += "," + match.mass_diff;
                                found = true;
                                break;
                            }
                        }
                        if (!found)
                        {
                            line += ",";
                        }
                    }
                }
                for (int charge = 1; charge <= psm.Query.precursor.Charge; charge++)
                {
                    foreach (FragmentClass fragment in fragments)
                    {
                        bool found = false;
                        foreach (ProductMatch match in psm.AllProductMatches)
                        {
                            if (fragment == match.Fragment && match.fragmentPos == i && match.charge == charge)
                            {
                                line += "," + match.mass_diff;
                                found = true;
                                break;
                            }
                        }
                        if (!found)
                        {
                            line += ",";
                        }
                    }
                }
                writer.AddLine(line);
            }
            writer.WriteToFile();
        }
Exemplo n.º 24
0
        public void WriteFragmentation(bool target)
        {
            vsCSVWriter writer = new vsCSVWriter(dbOptions.OutputFolder + "FragmentStats_" + (target ? "Targets" : "Decoy") + ".csv");

            writer.AddLine("  === Fragmentation of " + (target ? "Targets" : "Decoys") + " ===");
            foreach (FragmentClass fragment in dbOptions.fragments)
            {
                double cumulIntensity           = 0;
                int    nbFrag                   = 0;
                Dictionary <int, int> positions = new Dictionary <int, int>();
                foreach (Precursor precursor in matchedPrecursors)
                {
                    PeptideSpectrumMatch psm = precursor.OptimizedBestPsm();
                    if (psm.Target == target)
                    {
                        foreach (ProductMatch match in psm.AllProductMatches)
                        {
                            if (fragment == match.Fragment)
                            {
                                nbFrag++;
                                if (!positions.ContainsKey(match.fragmentPos))
                                {
                                    positions.Add(match.fragmentPos, 1);
                                }
                                else
                                {
                                    positions[match.fragmentPos]++;
                                }
                                cumulIntensity += match.obsIntensity;
                            }
                        }
                    }
                }
                string strPos = "";
                if (positions.Count > 0)
                {
                    foreach (int key in positions.Keys)
                    {
                        strPos += "|" + key + ":" + positions[key];
                    }
                }
                else
                {
                    strPos += ",";
                }
                writer.AddLine("    " + fragment.Name + ", Number of fragments = , " + nbFrag + ",   Intensity = ," + cumulIntensity + ", fragment matched [" + strPos.Substring(1) + "]");
            }
            foreach (FragmentClass fragment in dbOptions.fragments)
            //foreach (string fragment in FragmentDictionary.Fragments.Keys)
            {
                double cumulIntensity = 0;
                int    nbFrag         = 0;
                foreach (Precursor precursor in matchedPrecursors)
                {
                    PeptideSpectrumMatch psm = precursor.OptimizedBestPsm();
                    if (psm.Target == target)
                    {
                        foreach (ProductMatch match in psm.AllProductMatches)
                        {
                            if (fragment == match.Fragment)
                            {
                                nbFrag++;
                                cumulIntensity += match.obsIntensity;
                            }
                        }
                    }
                }
                writer.AddLine("    " + fragment + ", Number of fragments = ," + nbFrag + ",   Intensity = ," + cumulIntensity);
            }
            foreach (FragmentClass fragment in dbOptions.fragments)
            //foreach (string fragment in FragmentDictionary.AAFragments.Keys)
            {
                double cumulIntensity = 0;
                int    nbFrag         = 0;
                foreach (Precursor precursor in matchedPrecursors)
                {
                    PeptideSpectrumMatch psm = precursor.OptimizedBestPsm();
                    if (psm.Target == target)
                    {
                        foreach (ProductMatch match in psm.AllProductMatches)
                        {
                            if (fragment == match.Fragment)
                            {
                                nbFrag++;
                                cumulIntensity += match.obsIntensity;
                            }
                        }
                    }
                }
                writer.AddLine("    " + fragment + ", Number of fragments = ," + nbFrag + ",   Intensity = ," + cumulIntensity);
            }
            writer.WriteToFile();
        }
        public static void FromFolderWithRetentionTimeCSV(string mascotReportCSVFile, string folder, string outputCSV)
        {
            string[]     files = Directory.GetFiles(folder, "*_RetentionTimes.csv");
            List <vsCSV> RTs   = new List <vsCSV>();

            foreach (string file in files)
            {
                RTs.Add(new vsCSV(file));
            }

            vsCSVWriter writer = new vsCSVWriter(outputCSV);

            vsCSV mascotReport       = new vsCSV(mascotReportCSVFile);
            int   indexScanNumber    = -1;
            int   indexFileName      = -1;
            int   indexRetentionTime = -1;
            bool  isContent          = false;

            for (int i = 0; i < mascotReport.LINES_LIST.Count; i++)
            {
                string   line   = mascotReport.LINES_LIST[i];
                string[] splits = line.Split(vsCSV._Generic_Separator);
                if (line.Contains("Scan Number"))
                {
                    indexScanNumber = vsCSV.GetColumnIndex(splits, "Scan Number");
                }
                if (line.Contains("FileName"))
                {
                    indexFileName = vsCSV.GetColumnIndex(splits, "FileName");
                }
                if (line.Contains("Pep Elution Time"))
                {
                    indexRetentionTime = vsCSV.GetColumnIndex(splits, "Pep Elution Time");
                }
                if (isContent)
                {
                    string[] strScanSplits = splits[indexScanNumber].Split('-');
                    int      tmpScan       = 0;
                    for (int k = 0; k < strScanSplits.Length; k++)
                    {
                        tmpScan += int.Parse(strScanSplits[k]);
                    }

                    string file = vsCSV.GetFileName_NoExtension(splits[indexFileName]);
                    string rt   = "";
                    for (int j = 0; j < files.Length; j++)
                    {
                        if (files[j].Contains(file))
                        {
                            rt = RTs[j].LINES_LIST[tmpScan].Split(vsCSV._Generic_Separator)[1];
                            break;
                        }
                    }

                    splits[indexRetentionTime] = rt;
                    line = vsCSV.Concatenate(splits, ",");
                }
                if (indexScanNumber >= 0 && indexScanNumber < splits.Length &&
                    indexFileName >= 0 && indexFileName < splits.Length &&
                    indexRetentionTime >= 0 && indexRetentionTime < splits.Length)
                {
                    isContent = true;
                }

                writer.AddLine(line);
            }
            writer.WriteToFile();
        }
Exemplo n.º 26
0
        public static void ComputeSequenceFROverlap(string fastaFile, bool addReverse, string csvFileOut)
        {
            try
            {
                FileStream fs;
                try
                {
                    fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.Read);
                }
                catch (System.Exception)
                {
                    fs = new FileStream(fastaFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
                }

                Dictionary <char, Dictionary <string, long> > DicOfSeq = new Dictionary <char, Dictionary <string, long> >(30);
                for (int i = 0; i < 26; i++)
                {
                    DicOfSeq.Add((char)('A' + i), new Dictionary <string, long>());
                }

                using (StreamReader sr = new StreamReader(fs))
                {
                    string line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (!line.StartsWith(">"))
                        {
                            if (DicOfSeq[line[0]].ContainsKey(line))
                            {
                                DicOfSeq[line[0]][line] += 1;
                            }
                            else
                            {
                                DicOfSeq[line[0]].Add(line, 1);
                            }

                            if (addReverse)
                            {
                                string rev = Reverse(line);

                                if (DicOfSeq[line[0]].ContainsKey(rev))
                                {
                                    DicOfSeq[line[0]][rev] += 1;
                                }
                                else
                                {
                                    DicOfSeq[line[0]].Add(rev, 1);
                                }
                            }
                        }
                    }
                }
                fs.Close();

                Dictionary <long, long> DicOfNb = new Dictionary <long, long>();
                for (long i = 1; i <= 40; i++)
                {
                    DicOfNb.Add(i, 0);
                }

                for (int i = 0; i < 26; i++)
                {
                    foreach (long val in DicOfSeq[(char)('A' + i)].Values)
                    {
                        if (DicOfNb.ContainsKey(val))
                        {
                            DicOfNb[val] += 1;
                        }
                        else
                        {
                            DicOfNb.Add(val, 1);
                        }
                    }
                }

                vsCSVWriter writer = new vsCSVWriter(csvFileOut);
                foreach (long key in DicOfNb.Keys)
                {
                    writer.AddLine(key + "," + DicOfNb[key]);
                }
                writer.WriteToFile();
            }
            catch (System.Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
        }
Exemplo n.º 27
0
        public static void YangLiuPeptidesWithAllProteins()
        {
            //vsCSV csvPeptides = new vsCSV(@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\DEC18_2012\DMatton\Clustering_186716\Identifications.csv");
            vsCSV       csvPeptides = new vsCSV(@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\DEC18_2012\DMatton\Clustering_186716\Cluster_Intensity_peptides_NormP.csv");
            vsCSVWriter writer      = new vsCSVWriter(@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\DEC18_2012\DMatton\Clustering_186716\ProteinsPerPeptidesFromDatabases_AllReadingFrames.csv");

            NucleicAcid.InitHash();

            FileStream     protein_fasta_database1 = new FileStream(@"G:\Thibault\Olivier\Databases\DMatton\Matton_Illumina_Anthesis_WithReverse.fasta", FileMode.Open, FileAccess.Read, FileShare.Read);
            List <Protein> proteins1 = new List <Protein>(ProteinFastaReader.ReadProteins(protein_fasta_database1, false));
            Dictionary <string, List <string> > protein1AAs = new Dictionary <string, List <string> >();

            foreach (Protein prot in proteins1)
            {
                for (int shift = 0; shift < 3; shift++)
                {
                    protein1AAs.Add(prot.Description + " | Reading Frame " + shift + " | Forward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, false));
                    protein1AAs.Add(prot.Description + " | Reading Frame " + shift + " | Backward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, true));
                }
            }

            FileStream     protein_fasta_database2 = new FileStream(@"G:\Thibault\Olivier\Databases\DMatton\mattond_20110418_WithReverse_EditedJuly2013.fasta", FileMode.Open, FileAccess.Read, FileShare.Read);
            List <Protein> proteins2 = new List <Protein>(ProteinFastaReader.ReadProteins(protein_fasta_database2, false));
            Dictionary <string, List <string> > protein2AAs = new Dictionary <string, List <string> >();

            foreach (Protein prot in proteins2)
            {
                for (int shift = 0; shift < 3; shift++)
                {
                    protein2AAs.Add(prot.Description + " | Reading Frame " + shift + " | Forward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, false));
                    protein2AAs.Add(prot.Description + " | Reading Frame " + shift + " | Backward", NucleicAcid.ConvertNA3ToAAs(prot.BaseSequence, shift, true));
                }
            }

            writer.AddLine(csvPeptides.LINES_LIST[0]);
            Dictionary <string, List <string> > dicOfPepProt = new Dictionary <string, List <string> >();

            for (int i = 1; i < csvPeptides.LINES_LIST.Count; i++)
            {
                string[] splits = csvPeptides.LINES_LIST[i].Split(vsCSV._Generic_Separator);
                string   seq    = splits[4];
                //string seq = splits[13];

                /*
                 * string protDesc = splits[10];
                 * if (protein1AAs.ContainsKey(protDesc))
                 *  if (!protein1AAs[protDesc].Contains(seq))
                 *      Console.WriteLine("Should be there 1");
                 *
                 * if (protein2AAs.ContainsKey(protDesc))
                 *  if (!protein2AAs[protDesc].Contains(seq))
                 *      Console.WriteLine("Should be there 1");
                 * //*/

                StringBuilder sb = new StringBuilder();
                foreach (string key in protein1AAs.Keys)
                {
                    foreach (string protSeq in protein1AAs[key])
                    {
                        if (protSeq.Contains(seq))
                        {
                            sb.Append(key + ";");
                            break;
                        }
                    }
                }

                foreach (string key in protein2AAs.Keys)
                {
                    foreach (string protSeq in protein2AAs[key])
                    {
                        if (protSeq.Contains(seq))
                        {
                            sb.Append(key + ";");
                            break;
                        }
                    }
                }

                if (sb.Length == 0)
                {
                    Console.WriteLine("Zut");
                }
                writer.AddLine(csvPeptides.LINES_LIST[i] + "," + sb.ToString().Trim());
            }
            writer.WriteToFile();
        }
Exemplo n.º 28
0
        public static bool LysineConservation()
        {
            List <string> zincIDs = GetZincProteinsENSP();
            Dictionary <string, List <int> > dicOfSites = ReadZNF(@"C:\_IRIC\DATA\Sumo\ZNF.csv");
            string csvMatrix          = @"C:\_IRIC\DATA\Sumo\matrix_human.tsv";
            string csvToAnnotate      = @"C:\_IRIC\DATA\Sumo\liste des sites SUMO.csv";
            string output             = @"C:\_IRIC\DATA\Sumo\outputL.csv";
            string outputConservation = @"C:\_IRIC\DATA\Sumo\outputConservation.csv";
            string outputAll          = @"C:\_IRIC\DATA\Sumo\outputConservationAll_b.csv";
            vsCSV  csvM = new vsCSV(csvMatrix);
            vsCSV  csvA = new vsCSV(csvToAnnotate);

            Dictionary <string, int> dicOfAnnotates = new Dictionary <string, int>();

            for (int i = 1; i < csvA.LINES_LIST.Count; i++)
            {
                string[] items = csvA.LINES_LIST[i].Split(vsCSV._Generic_Separator);
                dicOfAnnotates.Add(items[0] + items[5], i);
            }

            vsCSVWriter writer = new vsCSVWriter(output);

            writer.AddLine(csvA.getFirstLine());

            Dictionary <string, List <double> > dicOfAllAA = new Dictionary <string, List <double> >();
            vsCSVWriter   writerConsAll = new vsCSVWriter(outputAll);
            StringBuilder sb            = new StringBuilder();

            for (char aa = 'A'; aa <= 'Z'; aa++)
            {
                dicOfAllAA.Add(aa.ToString(), new List <double>());
                sb.Append(aa + ",");
            }
            sb.Append("SpecialK,ZincEveryWhere,ZincSumo");
            dicOfAllAA.Add("SpecialK", new List <double>());
            dicOfAllAA.Add("ZincEveryWhere", new List <double>());
            dicOfAllAA.Add("ZincSumo", new List <double>());
            writerConsAll.AddLine(sb.ToString());

            writer.AddLine("AminoAcid,Some Number");
            Dictionary <string, double> dicOfAA       = new Dictionary <string, double>();
            Dictionary <string, int>    dicOfAANumber = new Dictionary <string, int>();

            int           nbK = 0;
            List <double> echantillonageNoZero = new List <double>();
            double        value = 0;

            for (int j = 0; j < csvM.LINES_LIST.Count; j++)
            {
                string[] splitsJ = csvM.LINES_LIST[j].Split('\t');
                string   aa      = splitsJ[3];
                if (!dicOfAA.ContainsKey(aa))
                {
                    dicOfAANumber.Add(aa, 0);
                    dicOfAA.Add(aa, 0);
                }
                value = -1;
                if (double.TryParse(splitsJ[5], out value))
                {
                    dicOfAA[aa] += value;
                    dicOfAANumber[aa]++;

                    dicOfAllAA[aa].Add(value);
                }

                //string ensbJ = splitsJ[1];
                //int positionJ = int.Parse(splitsJ[2]);
                if ("K".CompareTo(aa) == 0)
                {
                    value = -1;
                    if (double.TryParse(splitsJ[5], out value))
                    {
                        bool found = false;
                        foreach (string id in zincIDs)
                        {
                            if (id.CompareTo(splitsJ[1]) == 0)
                            {
                                found = true;
                            }
                        }
                        if (found)
                        {
                            dicOfAllAA["ZincEveryWhere"].Add(value);
                        }

                        if (dicOfSites.ContainsKey(splitsJ[1] + "|" + splitsJ[2]))
                        {
                            dicOfAllAA["ZincSumo"].Add(value);
                        }

                        echantillonageNoZero.Add(value);

                        if (dicOfAnnotates.ContainsKey(splitsJ[1] + splitsJ[2]))
                        {
                            if (!dicOfAA.ContainsKey("SpecialK"))
                            {
                                dicOfAANumber.Add("SpecialK", 0);
                                dicOfAA.Add("SpecialK", 0);
                            }
                            dicOfAA["SpecialK"] += value;
                            dicOfAANumber["SpecialK"]++;
                            dicOfAllAA["SpecialK"].Add(value);
                            nbK++;
                            writer.AddLine(csvA.LINES_LIST[dicOfAnnotates[splitsJ[1] + splitsJ[2]]] + "," + csvM.LINES_LIST[j].Replace('\t', ','));
                        }
                    }
                }
            }
            writer.WriteToFile();

            vsCSVWriter writerCons = new vsCSVWriter(outputConservation);

            foreach (string key in dicOfAA.Keys)
            {
                writerCons.AddLine(key + "," + dicOfAA[key] / (double)dicOfAANumber[key]);
            }

            double meanNoZero = 0;
            Random r          = new Random();

            for (int i = 0; i < nbK; i++)
            {
                int index = (int)Math.Floor(r.NextDouble() * (echantillonageNoZero.Count - 1));
                meanNoZero += echantillonageNoZero[index];
            }
            writerCons.AddLine("Echantillonnage K," + meanNoZero / (double)nbK);
            writerCons.WriteToFile();

            int  lineIndex = 0;
            bool keepGoing = true;

            while (keepGoing)
            {
                StringBuilder sb2 = new StringBuilder();
                keepGoing = false;
                foreach (string key in dicOfAllAA.Keys)
                {
                    if (lineIndex < dicOfAllAA[key].Count)
                    {
                        sb2.Append(dicOfAllAA[key][lineIndex] + ",");
                        keepGoing = true;
                    }
                    else
                    {
                        sb2.Append(",");
                    }
                }
                writerConsAll.AddLine(sb2.ToString());
                lineIndex++;
            }
            writerConsAll.WriteToFile();
            return(true);
        }
Exemplo n.º 29
0
//            Proteomics.Utilities.Fasta.FastaRead.AppendProteinDescriptionToMascotReport(@"C:\Users\caronlio\Downloads\filtered peptides.csv",
//                                                                                        @"C:\_IRIC\DATA\Tariq\peptideDb-minOcc60_WithReverse.fasta",
//                                                                                        @"C:\Users\caronlio\Downloads\filtered peptides_WithProteinDescriptions.csv");
//
        public static void AppendhCKSAAPToMascotReport(string txtHCKSAAPFile, string csvMascotFile, string csvFileOut)
        {
            vsCSV       csvUbi    = new vsCSV(txtHCKSAAPFile);
            vsCSV       csvMascot = new vsCSV(csvMascotFile);
            vsCSVWriter writer    = new vsCSVWriter(csvFileOut);

            try
            {
                foreach (string lineMascot in csvMascot.LINES_LIST)
                {
                    string strToAppend = "";
                    try
                    {
                        string[] mSplits = lineMascot.Split(vsCSV._Generic_Separator);
                        if (mSplits.Length >= 17 && !lineMascot.StartsWith("Search"))
                        {
                            int  indexStart = int.Parse(mSplits[16]);
                            int  indexStop  = int.Parse(mSplits[17]);
                            bool inIPI      = false;
                            foreach (string lineUbi in csvUbi.LINES_LIST)
                            {
                                if (lineUbi.StartsWith(">"))
                                {
                                    if (inIPI)
                                    {
                                        break;
                                    }

                                    inIPI = false;
                                    if (lineUbi.StartsWith(">IPI:") && lineUbi.Contains(mSplits[3]))
                                    {
                                        inIPI = true;
                                    }
                                }
                                if (inIPI)
                                {
                                    string[] splits   = lineUbi.Split('\t');
                                    int      indexPos = -1;
                                    if (splits.Length > 6 && int.TryParse(splits[0], out indexPos))
                                    {
                                        if (indexPos >= indexStart && indexPos <= indexStop)
                                        {
                                            strToAppend += "," + splits[0] + "," + splits[5];
                                        }
                                    }
                                }
                            }
                        }
                    }
                    catch (System.Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                        Console.WriteLine(ex.StackTrace);
                    }
                    writer.AddLine(lineMascot + strToAppend);
                }
                writer.WriteToFile();
            }
            catch (System.Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
        }
Exemplo n.º 30
0
        public static void Launch(IConSol console)
        {
            vsCSV csv = new vsCSV(@"C:\Users\caronlio\Downloads\Via.Science.Pre.Interview.Assignment.Data.2013.10.18.csv");
            Dictionary <DateTime, List <Variable> > DicOfTime = new Dictionary <DateTime, List <Variable> >();
            Dictionary <string, List <Variable> >   DicOfVar  = new Dictionary <string, List <Variable> >();

            //Data sorted based on date
            for (int i = 1; i < csv.LINES_LIST.Count; i++)
            {
                Variable tmpVar = new Variable(csv.LINES_LIST[i]);
                if (!DicOfTime.ContainsKey(tmpVar.time))
                {
                    DicOfTime.Add(tmpVar.time, new List <Variable>());
                }
                DicOfTime[tmpVar.time].Add(tmpVar);

                if (!DicOfVar.ContainsKey(tmpVar.name))
                {
                    DicOfVar.Add(tmpVar.name, new List <Variable>());
                }
                DicOfVar[tmpVar.name].Add(tmpVar);
            }

            foreach (string name in DicOfVar.Keys)
            {
                InterpolateMissingValues(name, DicOfTime, DicOfVar);
            }

            //Rebuild DicOfVar
            DicOfVar.Clear();
            foreach (List <Variable> list in DicOfTime.Values)
            {
                foreach (Variable variable in list)
                {
                    if (!DicOfVar.ContainsKey(variable.name))
                    {
                        DicOfVar.Add(variable.name, new List <Variable>());
                    }
                    DicOfVar[variable.name].Add(variable);
                }
            }

            //Compute Normalized values
            NormalizeVariables(DicOfVar);

            //Foreach variable, compare correlation with the "price" variable
            List <double> prices = GetArrayofNormed("price", DicOfTime, DicOfVar);
            Dictionary <string, double> DicOfCorrelation = new Dictionary <string, double>();

            foreach (string name in DicOfVar.Keys)
            {
                List <double> normedVals = GetArrayofNormed(name, DicOfTime, DicOfVar);
                double        corr       = MathNet.Numerics.Statistics.Correlation.Pearson(prices, normedVals);
                if (name.CompareTo("price") == 0)
                {
                    Console.WriteLine("test");
                }
                DicOfCorrelation.Add(name, corr);
            }

            //Prediction
            vsCSVWriter output = new vsCSVWriter(@"C:\_IRIC\predictions.csv");

            output.AddLine("Time,Price,Prediction");
            foreach (DateTime time in DicOfTime.Keys)
            {
                double pred = 0;
                foreach (string name in DicOfCorrelation.Keys)
                {
                    if (name.CompareTo("price") != 0)
                    {
                        foreach (Variable v in DicOfTime[time])
                        {
                            if (v.name.CompareTo(name) == 0)
                            {
                                pred += DicOfCorrelation[name] * v.normValue;
                            }
                        }
                    }
                }
                pred *= 100000;
                output.AddLine(pred.ToString());
            }
            output.WriteToFile();

            //Export a csv of the varialbes, ordered by date
            ExportAllVariables(@"C:\_IRIC\assignOut.csv", DicOfTime, DicOfVar, DicOfCorrelation);

            Console.WriteLine("Done!");
        }