public ProteinService(IPreferenceSet prefs, IProjectSummaryDownloader downloader) { _projectsNotFound = new Dictionary <Int32, DateTime>(); _dictionary = new ProteinDictionary(); _downloader = downloader; if (prefs != null && !String.IsNullOrEmpty(prefs.ApplicationDataFolderPath)) { FileName = System.IO.Path.Combine(prefs.ApplicationDataFolderPath, Constants.ProjectInfoFileName); } }
public ProteinService(IPreferenceSet prefs, IProjectSummaryDownloader downloader) { _dictionary = new ProteinDictionary(); _downloader = downloader; _projectsNotFound = new Dictionary <int, DateTime>(); var path = prefs != null?prefs.Get <string>(Preference.ApplicationDataFolderPath) : null; if (!String.IsNullOrEmpty(path)) { FileName = Path.Combine(path, Constants.ProjectInfoFileName); } }
/// <summary> /// Refreshes the service data and returns a collection of objects detailing how the service data was changed. /// </summary> /// <param name="progress">The object used to report refresh progress.</param> /// <returns>A collection of objects detailing how the service data was changed</returns> public IReadOnlyCollection <ProteinDictionaryChange> Refresh(IProgress <ProgressInfo> progress) { IReadOnlyCollection <ProteinDictionaryChange> dictionaryChanges; using (var stream = new MemoryStream()) { Logger.Info("Downloading new project data from Stanford..."); _downloader.Download(stream, progress); stream.Position = 0; var serializer = new ProjectSummaryJsonDeserializer(); var newDictionary = ProteinDictionary.CreateFromExisting(_dictionary, serializer.Deserialize(stream)); dictionaryChanges = newDictionary.Changes; _dictionary = newDictionary; } foreach (var info in dictionaryChanges.Where(info => info.Result != ProteinDictionaryChangeResult.NoChange)) { Logger.Info(info.ToString()); } var now = DateTime.UtcNow; foreach (var key in _projectsNotFound.Keys.ToList()) { if (_dictionary.ContainsKey(key)) { _projectsNotFound.Remove(key); } else { _projectsNotFound[key] = now; } } _lastRefreshTime = now; Write(); return(dictionaryChanges); }
public void GetProteinOrDownloadTest1() { // Arrange var prefs = MockRepository.GenerateStub<IPreferenceSet>(); var downloader = MockRepository.GenerateMock<IProjectSummaryDownloader>(); downloader.Expect(x => x.DownloadFromStanford()); downloader.Expect(x => x.DownloadFilePath).Return("..\\..\\..\\HFM.Proteins.Tests\\TestFiles\\psummary.html"); //downloader.Expect(x => x.DownloadFromHfmWeb()); //downloader.Expect(x => x.DownloadFilePath).Return("..\\..\\TestFiles\\ProjectInfo.xml"); var proteins = new ProteinDictionary(prefs, downloader); var protein = CreateValidProtein(2483); proteins.Add(protein.ProjectNumber, protein); // Act Protein p = proteins.GetProteinOrDownload(2483); Assert.AreEqual(false, p.IsUnknown()); p = proteins.GetProteinOrDownload(2482); Assert.AreEqual(true, p.IsUnknown()); // Do it twice to exercise the projects not found list p = proteins.GetProteinOrDownload(2482); Assert.AreEqual(true, p.IsUnknown()); // Assert downloader.VerifyAllExpectations(); }
public void Init() { _dictionary = new ProteinDictionary(); }
private void ExtractMotifs(Dictionary<string, int> motifs, ProteinDictionary proteins, string proteinDescription, string peptideSequence, int startResidueNumber, int phosphositeIndex) { string protein_sequence = proteins[proteinDescription]; int half_window_size = (motifXWindowSize - 1) / 2; int absolute_start_residue = (startResidueNumber - 1) + phosphositeIndex; int first_half_start_residue = absolute_start_residue - half_window_size; if(first_half_start_residue < 0) { first_half_start_residue = 0; } int first_half_stop_residue = absolute_start_residue - 1; if(first_half_stop_residue < 0) { first_half_stop_residue = 0; } string phosphosequence = null; phosphosequence = protein_sequence.Substring(first_half_start_residue, first_half_stop_residue - first_half_start_residue + 1); phosphosequence += peptideSequence[phosphositeIndex]; int last_half_start_residue = absolute_start_residue + 1; if(last_half_start_residue > protein_sequence.Length - 1) { last_half_start_residue = protein_sequence.Length - 1; } int last_half_stop_residue = absolute_start_residue + 1 + half_window_size - 1; if(last_half_stop_residue > protein_sequence.Length - 1) { last_half_stop_residue = protein_sequence.Length - 1; } phosphosequence += protein_sequence.Substring(last_half_start_residue, last_half_stop_residue - last_half_start_residue + 1); if(!motifs.ContainsKey(phosphosequence)) { motifs.Add(phosphosequence, 0); } motifs[phosphosequence]++; }
public void Phosphinate() { StreamWriter log = null; IXRawfile2 raw = null; StreamReader csv = null; StreamWriter non_phospho_output = null; StreamWriter localized_phospho_output = null; StreamWriter unlocalized_phospho_output = null; StreamWriter motifX = null; try { onStarting(new EventArgs()); onUpdateProgress(new ProgressEventArgs(0)); StringBuilder fixed_modifications_sb = new StringBuilder(); foreach (Modification modification in fixedModifications) { fixed_modifications_sb.Append(modification.Name + ", "); } if (fixed_modifications_sb.Length > 0) { fixed_modifications_sb = fixed_modifications_sb.Remove(fixed_modifications_sb.Length - 2, 2); } string fixed_modifications = fixed_modifications_sb.ToString(); if (!Directory.Exists(outputFolder)) { Directory.CreateDirectory(outputFolder); } log = new StreamWriter(Path.Combine(outputFolder, "Phosphinator_log.txt")); log.AutoFlush = true; log.WriteLine("Phosphinator PARAMETERS"); log.WriteLine("Fixed Modifications: " + fixed_modifications); log.WriteLine("Fragment Intensity Threshold: " + intensityThreshold.ToString() + " (" + intensityThresholdType.ToString() + ')'); log.WriteLine("Fragment m/z Tolerance (Th): " + mzTolerance.ToString()); log.WriteLine("Ambiguity Score Threshold: " + ambiguityScoreThreshold.ToString()); log.WriteLine("Eliminate Precursor Interference: " + eliminatePrecursorInterference.ToString()); if (eliminatePrecursorInterference) { log.WriteLine("Precursor Interference Threshold: " + precursorInterferenceThreshold.ToString()); } if (motifXOutput) { log.WriteLine("Motif-X Fasta Protein Database Filepath: " + motifXFastaProteinDatabaseFilepath); log.WriteLine("Motif-X Window Size: " + motifXWindowSize.ToString()); } log.WriteLine(); ProteinSiteCounter identified_sites_by_protein = new ProteinSiteCounter(); ProteinSiteCounter localized_sites_by_protein = new ProteinSiteCounter(); ProteinSiteCounter unlocalized_sites_by_protein = new ProteinSiteCounter(); Dictionary<string, Dictionary<KeyValuePair<int, string>, List<string>>> localized = new Dictionary<string, Dictionary<KeyValuePair<int, string>, List<string>>>(); Dictionary<string, Dictionary<KeyValuePair<int, string>, List<string>>> unlocalized = new Dictionary<string, Dictionary<KeyValuePair<int, string>, List<string>>>(); non_phospho_output = new StreamWriter(Path.Combine(outputFolder, "non_phospho.csv")); localized_phospho_output = new StreamWriter(Path.Combine(outputFolder, "localized_phospho.csv")); unlocalized_phospho_output = new StreamWriter(Path.Combine(outputFolder, "unlocalized_phospho.csv")); ProteinDictionary proteins = null; Dictionary<string, int> motifs = null; if (motifXOutput) { proteins = new ProteinDictionary(motifXFastaProteinDatabaseFilepath); motifs = new Dictionary<string, int>(); motifX = new StreamWriter(Path.Combine(outputFolder, "motif-x.txt")); } raw = (IXRawfile2) new MSFileReader_XRawfile(); string header_line = null; string[] headers = null; bool quant = false; foreach (string csv_filepath in csvFilepaths) { onStartingFile(new FilepathEventArgs(csv_filepath)); csv = new StreamReader(csv_filepath); using (CsvReader reader = new CsvReader(csv, true)) { headers = reader.GetFieldHeaders(); header_line = string.Join(",", headers); quant = headers.Contains("Channels Detected"); string[] lineData = new string[headers.Length]; //header_line = csv.ReadLine(); //quant = header_line.Contains("TQ_"); non_phospho_output.WriteLine(header_line); localized_phospho_output.WriteLine(header_line + ", Number of Theoretical Fragments, Identified Phosphoisoform, Identified Phosphoisoform Number of Matching Fragments, Best Phosphoisoforms, Best Phosphoisoform, Best Phosphoisoform Number of Matching Fragments, Second-Best Phosphoisoform, Second-Best Phosphoisoform Number of Matching Fragments, Identified Phosphoisoform Correct?, Preliminary Localization of All Phosphorylations?, Peptide Phosphorylation Sites, Probability of Spurious Fragment Match, Number of Theoretical Site-Determining Fragment Ions, Number of Matching Site-Determining Fragment Ions, Matching Site-Determining Fragment Ions, Probability Values, Ambiguity Scores, Phosphorylation Sites Localized?, All Phosphorylation Sites Localized?"); unlocalized_phospho_output.WriteLine(header_line + ", Number of Theoretical Fragments, Identified Phosphoisoform, Identified Phosphoisoform Number of Matching Fragments, Best Phosphoisoforms, Best Phosphoisoform, Best Phosphoisoform Number of Matching Fragments, Second-Best Phosphoisoform, Second-Best Phosphoisoform Number of Matching Fragments, Identified Phosphoisoform Correct?, Preliminary Localization of All Phosphorylations?, Peptide Phosphorylation Sites, Probability of Spurious Fragment Match, Number of Theoretical Site-Determining Fragment Ions, Number of Matching Site-Determining Fragment Ions, Matching Site-Determining Fragment Ions, Probability Values, Ambiguity Scores, Phosphorylation Sites Localized?, All Phosphorylation Sites Localized?"); while (reader.ReadNextRecord()) { //string line = csv.ReadLine(); //string[] fields = Regex.Split(line, // @",(?!(?<=(?:^|,)\s*\x22(?:[^\x22]|\x22\x22|\\\x22)*,)(?:[^\x22]|\x22\x22|\\\x22)*\x22\s*(?:,|$))"); // // crazy regex to parse CSV with internal double quotes from http://regexlib.com/REDetails.aspx?regexp_id=621 string sequence = reader["Peptide"]; string dynamic_modifications = reader["Mods"]; if (!dynamic_modifications.Contains("phosphorylation")) { //non_phospho_output.WriteLine(line); } else { Peptide identified_phosphopeptide = new Peptide(sequence, fixedModifications, dynamic_modifications); int start_residue = int.Parse(reader["Start"]); int stop_residue = int.Parse(reader["Stop"]); string protein_description = reader["Defline"].Trim('"'); StringBuilder sb = new StringBuilder(); reader.CopyCurrentRecordTo(lineData); foreach (string datum in lineData) { if (datum.Contains(',')) sb.Append("\"" + datum + "\""); else sb.Append(datum); sb.Append(','); } sb.Remove(sb.Length - 1, 1); string line = sb.ToString(); if (!identified_sites_by_protein.ContainsKey(protein_description)) { identified_sites_by_protein.Add(protein_description, new Dictionary<string, int>()); } foreach (KeyValuePair<int, string> kvp in identified_phosphopeptide.DynamicModifications) { if (kvp.Value.Contains("phosphorylation")) { string site = sequence[kvp.Key - 1] + (start_residue + kvp.Key).ToString(); if (!identified_sites_by_protein[protein_description].ContainsKey(site)) { identified_sites_by_protein[protein_description].Add(site, 0); } identified_sites_by_protein[protein_description][site]++; } } int scan_number = int.Parse(reader["Spectrum number"]); string filenameID = reader["Filename/id"]; FragmentType[] fragment_types = null; if (filenameID.Contains(".ETD.") || filenameID.Contains(".ECD.")) { fragment_types = new FragmentType[] {FragmentType.c, FragmentType.zdot}; } else { fragment_types = new FragmentType[] {FragmentType.b, FragmentType.y}; } string raw_filename = filenameID.Substring(0, filenameID.IndexOf('.')) + ".raw"; int charge = int.Parse(reader["Charge"]); string current_raw_filename = null; raw.GetFileName(ref current_raw_filename); if (current_raw_filename == null || !raw_filename.Equals(Path.GetFileName(current_raw_filename), StringComparison.InvariantCultureIgnoreCase)) { raw.Close(); string[] raw_filepaths = null; if (!string.IsNullOrEmpty(rawFolder) && Directory.Exists(rawFolder)) { raw_filepaths = Directory.GetFiles(rawFolder, raw_filename, SearchOption.AllDirectories); } else { raw_filepaths = Directory.GetFiles(Path.GetDirectoryName(csv_filepath), raw_filename, SearchOption.AllDirectories); } if (raw_filepaths.Length == 0) { throw new FileNotFoundException("No corresponding .raw file found for " + csv_filepath); } if (raw_filepaths.Length > 1) { throw new Exception("Multiple corresponding .raw files found for " + csv_filepath); } raw.Open(raw_filepaths[0]); raw.SetCurrentController(0, 1); } string scan_filter = null; raw.GetFilterForScanNum(scan_number, ref scan_filter); string low_mz_scan_filter = scan_filter.Substring(scan_filter.IndexOf('[') + 1); double low_mz = double.Parse(low_mz_scan_filter.Substring(0, low_mz_scan_filter.IndexOf('-'))); string high_mz_scan_filter = scan_filter.Substring(scan_filter.LastIndexOf('-') + 1); double high_mz = double.Parse(high_mz_scan_filter.Substring(0, high_mz_scan_filter.IndexOf(']'))); double[,] spectrum = null; if (scan_filter.Contains("FTMS")) { object labels_obj = null; object flags_obj = null; raw.GetLabelData(ref labels_obj, ref flags_obj, ref scan_number); spectrum = (double[,]) labels_obj; } else { double centroid_width = double.NaN; object spectrum_obj = null; object flags = null; int size = -1; raw.GetMassListFromScanNum(ref scan_number, null, 0, -1, 0, 1, ref centroid_width, ref spectrum_obj, ref flags, ref size); spectrum = (double[,]) spectrum_obj; } double base_peak_mz = double.NaN; double base_peak_intensity = double.NaN; for (int i = spectrum.GetLowerBound(1); i <= spectrum.GetUpperBound(1); i++) { if (double.IsNaN(base_peak_mz) || spectrum[(int) RawLabelDataColumn.Intensity, i] > base_peak_intensity) { base_peak_mz = spectrum[(int) RawLabelDataColumn.MZ, i]; base_peak_intensity = spectrum[(int) RawLabelDataColumn.Intensity, i]; } } double intensity_threshold = intensityThreshold; if (intensityThresholdType == IntensityThresholdType.Relative) { intensity_threshold = (intensityThreshold/100.0)*base_peak_intensity; } double[] parameters = new double[4]; if (!scan_filter.Contains("FTMS") && intensityThresholdType == IntensityThresholdType.SignalToNoiseRatio) { List<double> relative_intensities = new List<double>(); for (int i = spectrum.GetLowerBound(1); i <= spectrum.GetUpperBound(1); i++) { relative_intensities.Add(spectrum[(int) RawLabelDataColumn.Intensity, i]/ base_peak_intensity); } double bin_width = 0.001; int bins = 101; double[][] relative_intensity_histogram = new double[2][]; relative_intensity_histogram[0] = new double[bins]; relative_intensity_histogram[1] = new double[bins]; for (int i = relative_intensity_histogram[0].GetLowerBound(0); i <= relative_intensity_histogram[0].GetUpperBound(0); i++) { relative_intensity_histogram[0][i] = i*bin_width; } foreach (double relative_intensity in relative_intensities) { int bin_number = (int) Math.Floor(relative_intensity/bin_width); if (bin_number < bins) { relative_intensity_histogram[1][bin_number]++; } } parameters[0] = 0.0; parameters[1] = 100.0; parameters[2] = 0.0; parameters[3] = 0.001; double[] weights = new double[relative_intensity_histogram[1].Length]; for (int i = weights.GetLowerBound(0); i <= weights.GetUpperBound(0); i++) { weights[i] = 1.0; } LMA lma = new LMA(new GaussianFunctionWithPartials(), parameters, relative_intensity_histogram, weights, new DotNetMatrix.GeneralMatrix(4, 4), 0.001, 5000); lma.Fit(); } List<Peptide> peptides = GetAlternativePhosphoisoformPeptides( identified_phosphopeptide, fixedModifications); List<PhosphopeptideStatistics> all_phosphopeptide_stats = new List<PhosphopeptideStatistics>(peptides.Count); PhosphopeptideStatistics identified_phosphoisoform = null; List<double> ms2_mz_peaks = new List<double>(spectrum.GetLength(1)); for (int i = spectrum.GetLowerBound(1); i <= spectrum.GetUpperBound(1); i++) { double signal_to_noise = scan_filter.Contains("FTMS") ? (spectrum[(int) RawLabelDataColumn.Intensity, i] - spectrum[(int) RawLabelDataColumn.NoiseBaseline, i])/ spectrum[(int) RawLabelDataColumn.NoiseLevel, i] : ((spectrum[(int) RawLabelDataColumn.Intensity, i]/base_peak_intensity) - parameters[2])/parameters[3]; if ((intensityThresholdType == IntensityThresholdType.SignalToNoiseRatio && signal_to_noise >= intensity_threshold) || (intensityThresholdType != IntensityThresholdType.SignalToNoiseRatio && spectrum[(int) RawLabelDataColumn.Intensity, i] >= intensity_threshold)) { ms2_mz_peaks.Add(spectrum[(int) RawLabelDataColumn.MZ, i]); } } double mz_range = high_mz - low_mz; Dictionary<double, bool> searched_fragment_mzs = new Dictionary<double, bool>(); foreach (Peptide peptide in peptides) { PhosphopeptideStatistics phosphopeptide_stats = new PhosphopeptideStatistics(peptide); if (peptide.Sequence == identified_phosphopeptide.Sequence) { identified_phosphoisoform = phosphopeptide_stats; } FragmentDictionary fragments = peptide.CalculateFragments(fragment_types); foreach (KeyValuePair<string, Fragment> kvp in fragments) { phosphopeptide_stats.Fragments.Add(kvp.Key, new Dictionary<int, bool>()); for (int fragment_charge = 1; fragment_charge <= (charge >= 3 ? 2 : 1); fragment_charge++) { if (fragment_charge > 1 && fragment_charge > (double) kvp.Value.Number/peptide.Sequence.Length*charge) { break; } double mz = MZFromMassAndCharge(kvp.Value.Mass, fragment_charge); if (mz < low_mz || mz > high_mz) { continue; } if (!searched_fragment_mzs.ContainsKey(mz)) { bool found = false; foreach (double ms2_mz_peak in ms2_mz_peaks) { if (Math.Abs(ms2_mz_peak - mz) <= mzTolerance) { found = true; break; } else if (ms2_mz_peak > mz + mzTolerance) { break; } } searched_fragment_mzs.Add(mz, found); } phosphopeptide_stats.Fragments[kvp.Key].Add(fragment_charge, searched_fragment_mzs[mz]); } } all_phosphopeptide_stats.Add(phosphopeptide_stats); } all_phosphopeptide_stats.Sort(ComparePhosphopeptidesByDescendingMatchingFragments); PhosphopeptideStatistics best_phosphoisoform = all_phosphopeptide_stats[0]; PhosphopeptideStatistics second_best_phosphoisoform = all_phosphopeptide_stats.Count > 1 ? all_phosphopeptide_stats[1] : null; List<string> best_sequences = new List<string>(); foreach (PhosphopeptideStatistics phosphopeptide_stats in all_phosphopeptide_stats) { if (phosphopeptide_stats.NumberOfMatchingFragments == best_phosphoisoform.NumberOfMatchingFragments) { best_sequences.Add(phosphopeptide_stats.Peptide.Sequence); } else { break; } } bool preliminary_localization = second_best_phosphoisoform == null || best_phosphoisoform.NumberOfMatchingFragments > second_best_phosphoisoform.NumberOfMatchingFragments; bool all_sites_localized = preliminary_localization; Dictionary<string, bool> peptide_sites = new Dictionary<string, bool>(); Dictionary<string, bool> protein_sites = new Dictionary<string, bool>(); string best_sequence = best_phosphoisoform.Peptide.Sequence; for (int i = 0; i < best_sequence.Length; i++) { if (char.IsLower(best_sequence[i])) { if (preliminary_localization && second_best_phosphoisoform == null) { peptide_sites.Add(best_sequence[i] + (i + 1).ToString(), true); protein_sites.Add(best_sequence[i] + (start_residue + i).ToString(), true); if (!localized_sites_by_protein.ContainsKey(protein_description)) { localized_sites_by_protein.Add(protein_description, new Dictionary<string, int>()); } string site = best_sequence[i] + (start_residue + i).ToString(); if (!localized_sites_by_protein[protein_description].ContainsKey(site)) { localized_sites_by_protein[protein_description].Add(site, 0); } localized_sites_by_protein[protein_description][site]++; if (motifXOutput) { ExtractMotifs(motifs, proteins, protein_description, best_sequence, start_residue, i); } } else { peptide_sites.Add(best_sequence[i] + (i + 1).ToString(), false); protein_sites.Add(best_sequence[i] + (start_residue + i).ToString(), false); } } } double probability_of_success = double.NaN; List<string> theoretical_site_determining_fragment_ions = new List<string>(); List<string> matching_site_determining_fragment_ions = new List<string>(); List<string> left_site_determining_fragments = new List<string>(); List<string> right_site_determining_fragments = new List<string>(); List<string> site_determining_fragments = new List<string>(); List<string> p_values = new List<string>(); List<string> a_scores = new List<string>(); List<string> sites_localized = new List<string>(); if (preliminary_localization && second_best_phosphoisoform != null) { probability_of_success = (ms2_mz_peaks.Count*2*mzTolerance)/mz_range; for (int i = 0; i < best_sequence.Length; i++) { if (char.IsLower(best_sequence[i])) { int first_phosphorylatable_residue = i - 1; while (first_phosphorylatable_residue >= 0) { if (best_sequence[first_phosphorylatable_residue] == 'S' || best_sequence[first_phosphorylatable_residue] == 'T' || best_sequence[first_phosphorylatable_residue] == 'Y') { break; } else { first_phosphorylatable_residue--; } } int? num_left_theoretical_site_determining_fragment_ions = null; int? num_left_matching_site_determining_fragment_ions = null; double left_p_value = double.NaN; double left_a_score = double.NaN; if (first_phosphorylatable_residue >= 0) { num_left_theoretical_site_determining_fragment_ions = 0; num_left_matching_site_determining_fragment_ions = 0; for (int j = first_phosphorylatable_residue + 1; j <= i; j++) { string n_terminal_fragment = fragment_types[0].ToString() + j.ToString(); if (best_phosphoisoform.Fragments.ContainsKey(n_terminal_fragment)) { foreach ( KeyValuePair<int, bool> kvp in best_phosphoisoform.Fragments[n_terminal_fragment]) { num_left_theoretical_site_determining_fragment_ions++; if (kvp.Value) { num_left_matching_site_determining_fragment_ions++; string n_terminal_fragment_string = n_terminal_fragment + "(+" + kvp.Key.ToString() + ')'; if ( !left_site_determining_fragments.Contains( n_terminal_fragment_string)) { left_site_determining_fragments.Add( n_terminal_fragment_string); } } } } string c_terminal_fragment = fragment_types[1].ToString() + (best_sequence.Length - j).ToString(); if (best_phosphoisoform.Fragments.ContainsKey(c_terminal_fragment)) { foreach ( KeyValuePair<int, bool> kvp in best_phosphoisoform.Fragments[c_terminal_fragment]) { num_left_theoretical_site_determining_fragment_ions++; if (kvp.Value) { num_left_matching_site_determining_fragment_ions++; string c_terminal_fragment_string = c_terminal_fragment + "(+" + kvp.Key.ToString() + ')'; if ( !left_site_determining_fragments.Contains( c_terminal_fragment_string)) { left_site_determining_fragments.Add( c_terminal_fragment_string); } } } } } left_p_value = alglib.binomialdistr.binomialcdistribution( num_left_matching_site_determining_fragment_ions.Value - 1, num_left_theoretical_site_determining_fragment_ions.Value, probability_of_success); left_a_score = -10*Math.Log10(left_p_value); } int last_phosphorylatable_residue = i + 1; while (last_phosphorylatable_residue < best_sequence.Length) { if (best_sequence[last_phosphorylatable_residue] == 'S' || best_sequence[last_phosphorylatable_residue] == 'T' || best_sequence[last_phosphorylatable_residue] == 'Y') { break; } else { last_phosphorylatable_residue++; } } int? num_right_theoretical_site_determining_fragment_ions = null; int? num_right_matching_site_determining_fragment_ions = null; double right_p_value = double.NaN; double right_a_score = double.NaN; if (last_phosphorylatable_residue < best_sequence.Length) { num_right_theoretical_site_determining_fragment_ions = 0; num_right_matching_site_determining_fragment_ions = 0; for (int j = last_phosphorylatable_residue; j > i; j--) { string n_terminal_fragment = fragment_types[0].ToString() + j.ToString(); if (best_phosphoisoform.Fragments.ContainsKey(n_terminal_fragment)) { foreach ( KeyValuePair<int, bool> kvp in best_phosphoisoform.Fragments[n_terminal_fragment]) { num_right_theoretical_site_determining_fragment_ions++; if (kvp.Value) { num_right_matching_site_determining_fragment_ions++; string n_terminal_fragment_string = n_terminal_fragment + "(+" + kvp.Key.ToString() + ')'; if ( !right_site_determining_fragments.Contains( n_terminal_fragment_string)) { right_site_determining_fragments.Add( n_terminal_fragment_string); } } } } string c_terminal_fragment = fragment_types[1].ToString() + (best_sequence.Length - j).ToString(); if (best_phosphoisoform.Fragments.ContainsKey(c_terminal_fragment)) { foreach ( KeyValuePair<int, bool> kvp in best_phosphoisoform.Fragments[c_terminal_fragment]) { num_right_theoretical_site_determining_fragment_ions++; if (kvp.Value) { num_right_matching_site_determining_fragment_ions++; string c_terminal_fragment_string = c_terminal_fragment + "(+" + kvp.Key.ToString() + ')'; if ( !right_site_determining_fragments.Contains( c_terminal_fragment_string)) { right_site_determining_fragments.Add( c_terminal_fragment_string); } } } } } right_p_value = alglib.binomialdistr.binomialcdistribution( num_right_matching_site_determining_fragment_ions.Value - 1, num_right_theoretical_site_determining_fragment_ions.Value, probability_of_success); right_a_score = -10*Math.Log10(right_p_value); } theoretical_site_determining_fragment_ions.Add( (num_left_theoretical_site_determining_fragment_ions.HasValue ? num_left_theoretical_site_determining_fragment_ions.ToString() : "n/a") + " | " + (num_right_theoretical_site_determining_fragment_ions.HasValue ? num_right_theoretical_site_determining_fragment_ions.ToString() : "n/a")); matching_site_determining_fragment_ions.Add( (num_left_matching_site_determining_fragment_ions.HasValue ? num_left_matching_site_determining_fragment_ions.ToString() : "n/a") + " | " + (num_right_matching_site_determining_fragment_ions.HasValue ? num_right_matching_site_determining_fragment_ions.ToString() : "n/a")); site_determining_fragments.Add((left_site_determining_fragments.Count > 0 ? string.Join(",", left_site_determining_fragments.ToArray()) : "n/a") + " | " + (right_site_determining_fragments.Count > 0 ? string.Join(",", right_site_determining_fragments .ToArray()) : "n/a")); p_values.Add((double.IsNaN(left_p_value) ? "n/a" : left_p_value.ToString()) + " | " + (double.IsNaN(right_p_value) ? "n/a" : right_p_value.ToString())); a_scores.Add((double.IsNaN(left_a_score) ? "n/a" : left_a_score.ToString()) + " | " + (double.IsNaN(right_a_score) ? "n/a" : right_a_score.ToString())); bool site_localized = (double.IsNaN(left_a_score) || left_a_score >= ambiguityScoreThreshold) && (double.IsNaN(right_a_score) || right_a_score >= ambiguityScoreThreshold); sites_localized.Add(site_localized.ToString().ToUpper()); if (site_localized) { peptide_sites[best_sequence[i] + (i + 1).ToString()] = true; protein_sites[best_sequence[i] + (start_residue + i).ToString()] = true; if (!localized_sites_by_protein.ContainsKey(protein_description)) { localized_sites_by_protein.Add(protein_description, new Dictionary<string, int>()); } string site = best_sequence[i] + (start_residue + i).ToString(); if (!localized_sites_by_protein[protein_description].ContainsKey(site)) { localized_sites_by_protein[protein_description].Add(site, 0); } localized_sites_by_protein[protein_description][site]++; if (motifXOutput) { ExtractMotifs(motifs, proteins, protein_description, best_sequence, start_residue, i); } } if (!site_localized) { all_sites_localized = false; } } } } int phosphorylations = 0; foreach ( string dynamic_modification in best_phosphoisoform.Peptide.DynamicModifications.Values) { if (dynamic_modification.Contains("phosphorylation")) { phosphorylations++; } } string isoform = null; if (all_sites_localized) { foreach (KeyValuePair<string, bool> kvp in protein_sites) { isoform += kvp.Key + ','; } isoform = isoform.Substring(0, isoform.Length - 1); KeyValuePair<int, string> isoform_kvp = new KeyValuePair<int, string>(phosphorylations, isoform); if (!localized.ContainsKey(protein_description)) { localized.Add(protein_description, new Dictionary<KeyValuePair<int, string>, List<string>>()); } if (!localized[protein_description].ContainsKey(isoform_kvp)) { localized[protein_description].Add(isoform_kvp, new List<string>()); } localized[protein_description][isoform_kvp].Add(line); } else { if (preliminary_localization) { foreach (KeyValuePair<string, bool> kvp in protein_sites) { isoform += kvp.Key; if (!kvp.Value) { isoform += '?'; } isoform += ','; } } else { for (int i = 0; i < best_sequence.Length; i++) { bool phospho = false; for (int j = 0; j < all_phosphopeptide_stats.Count; j++) { if (all_phosphopeptide_stats[j].NumberOfMatchingFragments < best_phosphoisoform.NumberOfMatchingFragments) { break; } if (char.IsLower(all_phosphopeptide_stats[j].Peptide.Sequence[i])) { phospho = true; } } if (phospho) { isoform += char.ToLower(best_sequence[i]) + (start_residue + i).ToString() + "?,"; } } } isoform = isoform.Substring(0, isoform.Length - 1); KeyValuePair<int, string> isoform_kvp = new KeyValuePair<int, string>(phosphorylations, isoform); if (!unlocalized.ContainsKey(protein_description)) { unlocalized.Add(protein_description, new Dictionary<KeyValuePair<int, string>, List<string>>()); } if (!unlocalized[protein_description].ContainsKey(isoform_kvp)) { unlocalized[protein_description].Add(isoform_kvp, new List<string>()); } unlocalized[protein_description][isoform_kvp].Add(line); } StreamWriter output = all_sites_localized ? localized_phospho_output : unlocalized_phospho_output; output.Write(line + ','); output.Write(identified_phosphoisoform.NumberOfTotalFragments.ToString() + ','); output.Write(identified_phosphoisoform.Peptide.Sequence + ','); output.Write(identified_phosphoisoform.NumberOfMatchingFragments.ToString() + ','); for (int s = 0; s < best_sequences.Count; s++) { output.Write(best_sequences[s]); if (s < best_sequences.Count - 1) { output.Write('/'); } } output.Write(','); output.Write(best_phosphoisoform.Peptide.Sequence + ','); output.Write(best_phosphoisoform.NumberOfMatchingFragments.ToString() + ','); if (second_best_phosphoisoform != null) { output.Write(second_best_phosphoisoform.Peptide.Sequence + ','); output.Write(second_best_phosphoisoform.NumberOfMatchingFragments.ToString() + ','); } else { output.Write("n/a,n/a,"); } output.Write( (identified_phosphoisoform.NumberOfMatchingFragments == best_phosphoisoform.NumberOfMatchingFragments).ToString() + ','); output.Write(preliminary_localization.ToString() + ','); string[] peptide_sites_array = new string[peptide_sites.Count]; peptide_sites.Keys.CopyTo(peptide_sites_array, 0); string peptide_sites_array_string = string.Join("; ", peptide_sites_array); AppendFieldToCsv(peptide_sites_array_string, output); output.Write((!double.IsNaN(probability_of_success) ? probability_of_success.ToString() : string.Empty) + ','); string theoretical_site_determining_fragment_ions_string = string.Join("; ", theoretical_site_determining_fragment_ions.ToArray()); AppendFieldToCsv(theoretical_site_determining_fragment_ions_string, output); string matching_site_determining_fragment_ions_string = string.Join("; ", matching_site_determining_fragment_ions.ToArray()); AppendFieldToCsv(matching_site_determining_fragment_ions_string, output); string site_determining_fragments_string = string.Join("; ", site_determining_fragments.ToArray()); AppendFieldToCsv(site_determining_fragments_string, output); string p_values_string = string.Join("; ", p_values.ToArray()); AppendFieldToCsv(p_values_string, output); string a_scores_string = string.Join("; ", a_scores.ToArray()); AppendFieldToCsv(a_scores_string, output); string sites_localized_string = string.Join("; ", sites_localized.ToArray()); AppendFieldToCsv(sites_localized_string, output); output.Write(all_sites_localized.ToString().ToUpper()); output.WriteLine(); } double progress = (double) csv.BaseStream.Position/csv.BaseStream.Length; onUpdateProgress(new ProgressEventArgs((int) Math.Round(progress*100.0))); } } csv.Close(); onFinishedFile(new EventArgs()); } raw.Close(); non_phospho_output.Close(); localized_phospho_output.Close(); unlocalized_phospho_output.Close(); log.WriteLine("Identified Phosphoproteins: " + identified_sites_by_protein.Proteins.ToString()); log.WriteLine("Identified Phosphosites: " + identified_sites_by_protein.Sites.ToString()); log.WriteLine(); log.WriteLine("Localized Phosphoproteins: " + localized_sites_by_protein.Proteins.ToString()); log.WriteLine("Localized Phosphosites: " + localized_sites_by_protein.Sites.ToString()); log.WriteLine(); int localized_phosphoisoforms = 0; foreach (KeyValuePair<string, Dictionary<KeyValuePair<int, string>, List<string>>> kvp in localized) { foreach (KeyValuePair<KeyValuePair<int, string>, List<string>> kvp2 in kvp.Value) { localized_phosphoisoforms++; } } int unlocalized_phosphoisoforms = 0; foreach (KeyValuePair<string, Dictionary<KeyValuePair<int, string>, List<string>>> kvp in unlocalized) { foreach (KeyValuePair<KeyValuePair<int, string>, List<string>> kvp2 in kvp.Value) { unlocalized_phosphoisoforms++; } } log.WriteLine("Localized Phosphoisoforms: " + localized_phosphoisoforms.ToString()); log.WriteLine("Unlocalized Phosphoisoforms: " + unlocalized_phosphoisoforms.ToString()); log.Close(); using (StreamWriter protein_sites = new StreamWriter(Path.Combine(outputFolder, "localized_protein_phosphosites.csv"))) { protein_sites.WriteLine("Protein Description, Number of Localized Phosphosites"); protein_sites.WriteLine(", Localized Phosphosite"); foreach (KeyValuePair<string, Dictionary<string, int>> kvp in localized_sites_by_protein) { protein_sites.WriteLine((kvp.Key.Contains(",") ? '"' + kvp.Key + '"' : kvp.Key) + ',' + kvp.Value.Count.ToString()); foreach (KeyValuePair<string, int> kvp2 in kvp.Value) { protein_sites.WriteLine(',' + kvp2.Key); } } } using (StreamWriter full_localized_output = new StreamWriter(Path.Combine(outputFolder, "full_localized_phosphoisoforms.csv"))) { //int interference_index = -1; int first_quant_index = -1; int last_quant_index = -1; if (!quant) { full_localized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites, PSMs Identified, Peptides Identified"); } else { full_localized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites, PSMs Identified, PSMs Quantified, Peptides Identified, Peptides Quantified,"); for (int i = 0; i < headers.Length; i++) { if (headers[i].EndsWith("NL)")) { if (first_quant_index < 0) { first_quant_index = i; } } if (first_quant_index >= 0) full_localized_output.Write(' ' + headers[i] + ','); if (headers[i].Equals("Channels Detected")) { last_quant_index = i; } } full_localized_output.Write(" Phosphoisoform Quantified?"); } full_localized_output.WriteLine(); full_localized_output.WriteLine(", " + header_line); using (StreamWriter reduced_localized_output = new StreamWriter(Path.Combine(outputFolder, "reduced_localized_phosphoisoforms.csv"))) { if (!quant) { reduced_localized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites,Peptides , PSMs Identified, Peptides Identified"); } else { reduced_localized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites,Peptides , PSMs Identified, PSMs Quantified, Peptides Identified, Peptides Quantified,"); for (int i = first_quant_index; i <= last_quant_index; i++) { reduced_localized_output.Write(' ' + headers[i] + ','); } reduced_localized_output.Write(" Phosphoisoform Quantified?"); } reduced_localized_output.WriteLine(); foreach (KeyValuePair<string, Dictionary<KeyValuePair<int, string>, List<string>>> kvp in localized) { foreach (KeyValuePair<KeyValuePair<int, string>, List<string>> kvp2 in kvp.Value) { full_localized_output.Write((kvp.Key.Contains(",") ? '"' + kvp.Key + '"' : kvp.Key) + ','); reduced_localized_output.Write((kvp.Key.Contains(",") ? '"' + kvp.Key + '"' : kvp.Key) + ','); full_localized_output.Write((kvp2.Key.Value.Contains(",") ? '"' + kvp2.Key.Value + '"' : kvp2.Key.Value) + ','); reduced_localized_output.Write((kvp2.Key.Value.Contains(",") ? '"' + kvp2.Key.Value + '"' : kvp2.Key.Value) + ','); full_localized_output.Write(kvp2.Key.Key.ToString() + ','); reduced_localized_output.Write(kvp2.Key.Key.ToString() + ','); double[] isoform_quantitation = new double[last_quant_index - first_quant_index + 1]; isoform_quantitation = Array.ConvertAll<double, double>(isoform_quantitation, SET_DOUBLE_VALUE_TO_NAN); int spectra_identified = 0; int spectra_quantified = 0; Dictionary<string, int> unique_peptides_identified = new Dictionary<string, int>(); Dictionary<string, int> unique_peptides_quantified = new Dictionary<string, int>(); StringBuilder peptides = new StringBuilder(); foreach (string line in kvp2.Value) { string[] fields = Regex.Split(line, @",(?!(?<=(?:^|,)\s*\x22(?:[^\x22]|\x22\x22|\\\x22)*,)(?:[^\x22]|\x22\x22|\\\x22)*\x22\s*(?:,|$))"); // crazy regex to parse CSV with internal double quotes from http://regexlib.com/REDetails.aspx?regexp_id=621 spectra_identified++; string peptide_sequence = fields[2]; peptides.Append(peptide_sequence + " "); if (!unique_peptides_identified.ContainsKey(peptide_sequence)) { unique_peptides_identified.Add(peptide_sequence, 0); } unique_peptides_identified[peptide_sequence]++; if (quant) { spectra_quantified++; if (double.IsNaN(isoform_quantitation[0])) { isoform_quantitation = Array.ConvertAll<double, double>(isoform_quantitation, SET_DOUBLE_VALUE_TO_ZERO); } if (!unique_peptides_quantified.ContainsKey(peptide_sequence)) { unique_peptides_quantified.Add(peptide_sequence, 0); } unique_peptides_quantified[peptide_sequence]++; for (int i = first_quant_index; i <= last_quant_index; i++) { double val = 0; double.TryParse(fields[i], out val); isoform_quantitation[i - first_quant_index] += val; } } } full_localized_output.Write(peptides.ToString() + ','); reduced_localized_output.Write(peptides.ToString() + ','); full_localized_output.Write(spectra_identified.ToString() + ','); reduced_localized_output.Write(spectra_identified.ToString() + ','); if (quant) { full_localized_output.Write(spectra_quantified.ToString() + ','); reduced_localized_output.Write(spectra_quantified.ToString() + ','); } full_localized_output.Write(unique_peptides_identified.Count.ToString() + ','); reduced_localized_output.Write(unique_peptides_identified.Count.ToString() + ','); if (quant) { full_localized_output.Write(unique_peptides_quantified.Count.ToString() + ','); reduced_localized_output.Write(unique_peptides_quantified.Count.ToString() + ','); for (int i = isoform_quantitation.GetLowerBound(0); i <= isoform_quantitation.GetUpperBound(0); i++) { full_localized_output.Write(isoform_quantitation[i].ToString() + ','); reduced_localized_output.Write(isoform_quantitation[i].ToString() + ','); } full_localized_output.Write((spectra_quantified > 0).ToString()); reduced_localized_output.Write((spectra_quantified > 0).ToString()); } full_localized_output.WriteLine(); reduced_localized_output.WriteLine(); foreach (string line in kvp2.Value) { full_localized_output.WriteLine(',' + line); } } } } } using (StreamWriter full_unlocalized_output = new StreamWriter(Path.Combine(outputFolder, "full_unlocalized_phosphoisoforms.csv"))) { int first_quant_index = -1; int last_quant_index = -1; if (!quant) { full_unlocalized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites,Peptides, PSMs Identified, Peptides Identified"); } else { full_unlocalized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites,Peptides, PSMs Identified, PSMs Quantified, Peptides Identified, Peptides Quantified,"); for (int i = 0; i < headers.Length; i++) { if (headers[i].EndsWith("NL)")) { if (first_quant_index < 0) { first_quant_index = i; } } if (first_quant_index >= 0) full_unlocalized_output.Write(' ' + headers[i] + ','); if (headers[i].Equals("Channels Detected")) { last_quant_index = i; } } full_unlocalized_output.Write(" Phosphoisoform Quantified?"); } full_unlocalized_output.WriteLine(); full_unlocalized_output.WriteLine(", " + header_line); using (StreamWriter reduced_unlocalized_output = new StreamWriter(Path.Combine(outputFolder, "reduced_unlocalized_phosphoisoforms.csv"))) { if (!quant) { reduced_unlocalized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites,Peptides, PSMs Identified, Peptides Identified"); } else { reduced_unlocalized_output.Write("Protein Description, Phosphoisoform, Phosphoisoform Sites,Peptides, PSMs Identified, PSMs Quantified, Peptides Identified, Peptides Quantified,"); for (int i = first_quant_index; i <= last_quant_index; i++) { reduced_unlocalized_output.Write(' ' + headers[i] + ','); } reduced_unlocalized_output.Write(" Phosphoisoform Quantified?"); } reduced_unlocalized_output.WriteLine(); foreach (KeyValuePair<string, Dictionary<KeyValuePair<int, string>, List<string>>> kvp in unlocalized) { foreach (KeyValuePair<KeyValuePair<int, string>, List<string>> kvp2 in kvp.Value) { full_unlocalized_output.Write((kvp.Key.Contains(",") ? '"' + kvp.Key + '"' : kvp.Key) + ','); reduced_unlocalized_output.Write((kvp.Key.Contains(",") ? '"' + kvp.Key + '"' : kvp.Key) + ','); full_unlocalized_output.Write((kvp2.Key.Value.Contains(",") ? '"' + kvp2.Key.Value + '"' : kvp2.Key.Value) + ','); reduced_unlocalized_output.Write((kvp2.Key.Value.Contains(",") ? '"' + kvp2.Key.Value + '"' : kvp2.Key.Value) + ','); full_unlocalized_output.Write(kvp2.Key.Key.ToString() + ','); reduced_unlocalized_output.Write(kvp2.Key.Key.ToString() + ','); double[] isoform_quantitation = new double[last_quant_index - first_quant_index + 1]; isoform_quantitation = Array.ConvertAll<double, double>(isoform_quantitation, SET_DOUBLE_VALUE_TO_NAN); int spectra_identified = 0; int spectra_quantified = 0; Dictionary<string, int> unique_peptides_identified = new Dictionary<string, int>(); Dictionary<string, int> unique_peptides_quantified = new Dictionary<string, int>(); StringBuilder peptides = new StringBuilder(); foreach (string line in kvp2.Value) { string[] fields = Regex.Split(line, @",(?!(?<=(?:^|,)\s*\x22(?:[^\x22]|\x22\x22|\\\x22)*,)(?:[^\x22]|\x22\x22|\\\x22)*\x22\s*(?:,|$))"); // crazy regex to parse CSV with internal double quotes from http://regexlib.com/REDetails.aspx?regexp_id=621 spectra_identified++; string peptide_sequence = fields[2]; peptides.Append(peptide_sequence + " "); if (!unique_peptides_identified.ContainsKey(peptide_sequence)) { unique_peptides_identified.Add(peptide_sequence, 0); } unique_peptides_identified[peptide_sequence]++; if (quant) { spectra_quantified++; if (double.IsNaN(isoform_quantitation[0])) { isoform_quantitation = Array.ConvertAll<double, double>(isoform_quantitation, SET_DOUBLE_VALUE_TO_ZERO); } if (!unique_peptides_quantified.ContainsKey(peptide_sequence)) { unique_peptides_quantified.Add(peptide_sequence, 0); } unique_peptides_quantified[peptide_sequence]++; for (int i = first_quant_index; i <= last_quant_index; i++) { double val = 0; double.TryParse(fields[i], out val); isoform_quantitation[i - first_quant_index] += val; } } } full_unlocalized_output.Write(peptides.ToString() + ','); reduced_unlocalized_output.Write(peptides.ToString() + ','); full_unlocalized_output.Write(spectra_identified.ToString() + ','); reduced_unlocalized_output.Write(spectra_identified.ToString() + ','); if (quant) { full_unlocalized_output.Write(spectra_quantified.ToString() + ','); reduced_unlocalized_output.Write(spectra_quantified.ToString() + ','); } full_unlocalized_output.Write(unique_peptides_identified.Count.ToString() + ','); reduced_unlocalized_output.Write(unique_peptides_identified.Count.ToString() + ','); if (quant) { full_unlocalized_output.Write(unique_peptides_quantified.Count.ToString() + ','); reduced_unlocalized_output.Write(unique_peptides_quantified.Count.ToString() + ','); for (int i = isoform_quantitation.GetLowerBound(0); i <= isoform_quantitation.GetUpperBound(0); i++) { full_unlocalized_output.Write(isoform_quantitation[i].ToString() + ','); reduced_unlocalized_output.Write(isoform_quantitation[i].ToString() + ','); } full_unlocalized_output.Write((spectra_quantified > 0).ToString()); reduced_unlocalized_output.Write((spectra_quantified > 0).ToString()); } full_unlocalized_output.WriteLine(); reduced_unlocalized_output.WriteLine(); foreach (string line in kvp2.Value) { full_unlocalized_output.WriteLine(',' + line); } } } } } if (motifXOutput) { foreach (string motif in motifs.Keys) { motifX.WriteLine(motif); } motifX.Close(); using (StreamWriter motif_fasta = new StreamWriter(Path.Combine(outputFolder, "motif-x.fasta"))) { foreach (KeyValuePair<string, string> kvp in proteins) { if (!kvp.Key.Contains("DECOY") && !kvp.Key.Contains("REVERSED")) { motif_fasta.WriteLine('>' + kvp.Key); motif_fasta.WriteLine(kvp.Value); } } } } onFinished(new EventArgs()); //} //catch(Exception ex) //{ // onThrowException(new ExceptionEventArgs(ex)); //} //finally //{ if (log != null) { log.Close(); } if (raw != null) { raw.Close(); } if (csv != null) { csv.Close(); } if (non_phospho_output != null) { non_phospho_output.Close(); } if (localized_phospho_output != null) { localized_phospho_output.Close(); } if (unlocalized_phospho_output != null) { unlocalized_phospho_output.Close(); } if (motifX != null) { motifX.Close(); } } catch (Exception e) { MessageBox.Show(e.Message + " " + e.StackTrace); } }
private static IProteinDictionary CreateProteinDictionary() { var proteins = new ProteinDictionary(); var protein = new Protein(); protein.ProjectNumber = 6600; protein.WorkUnitName = "WorkUnitName"; protein.Core = "GROGPU2"; protein.Credit = 450; protein.KFactor = 0; protein.Frames = 100; protein.NumberOfAtoms = 5000; protein.PreferredDays = 2; protein.MaximumDays = 3; proteins.Add(protein.ProjectNumber, protein); protein = new Protein(); protein.ProjectNumber = 5797; protein.WorkUnitName = "WorkUnitName2"; protein.Core = "GROGPU2"; protein.Credit = 675; protein.KFactor = 2.3; protein.Frames = 100; protein.NumberOfAtoms = 7000; protein.PreferredDays = 2; protein.MaximumDays = 3; proteins.Add(protein.ProjectNumber, protein); protein = new Protein(); protein.ProjectNumber = 8011; protein.WorkUnitName = "WorkUnitName3"; protein.Core = "GRO-A4"; protein.Credit = 106.6; protein.KFactor = 0.75; protein.Frames = 100; protein.NumberOfAtoms = 9000; protein.PreferredDays = 2.13; protein.MaximumDays = 4.62; proteins.Add(protein.ProjectNumber, protein); protein = new Protein(); protein.ProjectNumber = 6903; protein.WorkUnitName = "WorkUnitName4"; protein.Core = "GRO-A5"; protein.Credit = 22706; protein.KFactor = 38.05; protein.Frames = 100; protein.NumberOfAtoms = 11000; protein.PreferredDays = 5; protein.MaximumDays = 12; proteins.Add(protein.ProjectNumber, protein); return proteins; }