private bool AMTTagPassesFilter(MassTagLight item, int minimumObservationCount) { if (item.ObservationCount < minimumObservationCount) { return(false); } if (_options.AMTTagFilterNETMax > _options.AMTTagFilterNETMin) { if (item.Net < _options.AMTTagFilterNETMin || item.Net > _options.AMTTagFilterNETMax) { return(false); } } if (_options.AMTTagFilterMassMax > _options.AMTTagFilterMassMin) { if (item.MassMonoisotopic < _options.AMTTagFilterMassMin || item.MassMonoisotopic > _options.AMTTagFilterMassMax) { return(false); } } return(true); }
public MassTagDatabase LoadDatabase() { var database = new MassTagDatabase(); var massTags = new List<MassTagLight>(); var masstagMap = new Dictionary<int, List<Protein>>(); //TODO: Put into base table file reader var lines = File.ReadAllLines(DatabasePath); var delimiters = new[] {"\t"}; for (var i = 1; i < lines.Length; i++) { var data = lines[i].Split(delimiters, StringSplitOptions.RemoveEmptyEntries); var tag = new MassTagLight(); if (data.Length > 4) { tag.Id = Convert.ToInt32(data[0]); tag.PeptideSequence = data[1]; tag.MassMonoisotopic = Convert.ToDouble(data[2]); tag.Net = Convert.ToDouble(data[3]); tag.DriftTime = Convert.ToDouble(data[4]); } massTags.Add(tag); } database.AddMassTagsAndProteins(massTags, masstagMap); return database; }
public MassTagDatabase LoadDatabase() { var database = new MassTagDatabase(); var massTags = new List <MassTagLight>(); var masstagMap = new Dictionary <int, List <Protein> >(); //TODO: Put into base table file reader var lines = File.ReadAllLines(DatabasePath); var delimiters = new[] { "\t" }; for (var i = 1; i < lines.Length; i++) { var data = lines[i].Split(delimiters, StringSplitOptions.RemoveEmptyEntries); var tag = new MassTagLight(); if (data.Length > 4) { tag.Id = Convert.ToInt32(data[0]); tag.PeptideSequence = data[1]; tag.MassMonoisotopic = Convert.ToDouble(data[2]); tag.Net = Convert.ToDouble(data[3]); tag.DriftTime = Convert.ToDouble(data[4]); } massTags.Add(tag); } database.AddMassTagsAndProteins(massTags, masstagMap); return(database); }
/// <summary> /// Downloads the mass tags /// </summary> /// <returns></returns> protected virtual List<MassTagLight> LoadMassTags() { var massTags = new List<MassTagLight>(); using (var connection = CreateConnection(CreateConnectionString())) { connection.Open(); using (var command = connection.CreateCommand()) { SetupMassTagCommand(command); try { using (var reader = command.ExecuteReader()) { while (reader.Read()) { var massTag = new MassTagLight(); if (reader["Mass_Tag_ID"] != DBNull.Value) { var id = Convert.ToInt32(reader["Mass_Tag_ID"]); var peptide = ""; float ganet = -1; float xcorr_max = 0; float stdNet = 0; var monoMass = 0.0; float highDiscriminant = 0; var numObservations = 0; var modification = ""; var modCount = 0; short cleaveageState = 2; float driftTime = 0; var charge = 0; var conformerID = 0; float highPeptideProphetProbability = 0; double msgf = 0; if (reader["Peptide"] != DBNull.Value) peptide = reader["Peptide"].ToString(); if (reader["Net_Value_to_Use"] != DBNull.Value) ganet = Convert.ToSingle(reader["Net_Value_to_Use"]); if (reader["High_Normalized_Score"] != DBNull.Value) xcorr_max = Convert.ToSingle(reader["High_Normalized_Score"]); if (reader["StD_GANET"] != DBNull.Value) stdNet = Convert.ToSingle(reader["StD_GANET"]); if (reader["Monoisotopic_Mass"] != DBNull.Value) monoMass = Convert.ToDouble(reader["Monoisotopic_Mass"]); if (reader["Min_MSGF_SpecProb"] != DBNull.Value) msgf = Convert.ToDouble(reader["Min_MSGF_SpecProb"]); if (reader["Peptide_Obs_Count_Passing_Filter"] != DBNull.Value) numObservations = Convert.ToInt32(reader["Peptide_Obs_Count_Passing_Filter"]); if (reader["Mod_Count"] != DBNull.Value) modCount = Convert.ToInt32(reader["Mod_Count"]); if (reader["Mod_Description"] != DBNull.Value) modification = reader["Mod_Description"].ToString(); if (reader["High_Peptide_Prophet_Probability"] != DBNull.Value) highPeptideProphetProbability = Convert.ToSingle(reader["High_Peptide_Prophet_Probability"]); if (reader["Cleavage_State"] != DBNull.Value) cleaveageState = Convert.ToInt16(reader["Cleavage_State"]); if (reader["Drift_Time_Avg"] != DBNull.Value) driftTime = Convert.ToSingle(reader["Drift_Time_Avg"]); if (reader["Conformer_Charge"] != DBNull.Value) charge = Convert.ToInt32(reader["Conformer_Charge"]); if (reader["Conformer_ID"] != DBNull.Value) conformerID = Convert.ToInt32(reader["Conformer_ID"]); /// Make sure the mass tag has been seen enough times if (numObservations >= Options.MinimumObservationCountFilter) { var molecule = new Molecule(); molecule.Name = peptide; massTag.Id = id; massTag.Molecule = molecule; massTag.Net = ganet; massTag.NetAverage = ganet; massTag.XCorr = xcorr_max; massTag.DiscriminantMax = highDiscriminant; massTag.MassMonoisotopic = monoMass; massTag.ConformationId = conformerID; massTag.NetStandardDeviation = stdNet; massTag.ObservationCount = numObservations; massTag.DriftTime = driftTime; massTag.PriorProbability = highPeptideProphetProbability; massTag.CleavageState = cleaveageState; massTag.ModificationCount = modCount; massTag.MsgfSpecProbMax = msgf; massTag.PeptideSequence = peptide; massTag.ChargeState = charge; if (massTag.NetAverage != -1) { var shouldAdd = false; // If we are using drift time, then we should only // use mass tags that have drift time. if (Options.OnlyLoadTagsWithDriftTime) { if (driftTime > 0) { shouldAdd = true; } } else { shouldAdd = true; } if (shouldAdd) { massTags.Add(massTag); } } } } } reader.Close(); } } catch (Exception) { throw; } } connection.Close(); } return massTags; }
public MassTagDatabase LoadDatabase() { var database = new MassTagDatabase(); //Implement the loading / reading of the MTDB Framework objects. var reader = new SqLiteTargetDatabaseReader(); var mtdbDatabase = reader.ReadDb(m_path); var massTags = new List<MassTagLight>(); // Mapping objects to create unique proteins and for the mass tag database to load only those proteins for // the given consensus targets/mass tags. var proteinMap = new Dictionary<int, Protein>(); var massTagProteinMap = new Dictionary<int, List<Protein>>(); if (mtdbDatabase == null) return database; foreach (var target in mtdbDatabase.ConsensusTargets) { // Copy the consensus data into a mass tag light. var tag = new MassTagLight { Id = target.Id, MassMonoisotopic = target.TheoreticalMonoIsotopicMass, NetAverage = target.AverageNet, NetPredicted = target.PredictedNet, PeptideSequence = target.Sequence, NetStandardDeviation = target.StdevNet }; // Here we create unique proteins for the mass tag copying information from the consensus target proteins. var proteinsForMassTag = new List<Protein>(); foreach (var targetProtein in target.Proteins) { if (!proteinMap.ContainsKey(targetProtein.Id)) { var newProtein = new Protein { Id = targetProtein.Id, Name = targetProtein.ProteinName, ResidueStartPosition = targetProtein.ResidueStart, ResidueEndPosition = targetProtein.ResidueEnd }; //TODO: Do something about the cleavage state and terminus state of a protein loaded from MTDBCreator database. //protein.CleavageState = ?? //protein.TerminusState = ?? proteinMap.Add(newProtein.Id, newProtein); } var protein = proteinMap[targetProtein.Id]; proteinsForMassTag.Add(protein); } massTagProteinMap.Add(tag.Id, proteinsForMassTag); massTags.Add(tag); } database.AddMassTagsAndProteins(massTags, massTagProteinMap); return database; }
/// <summary> /// Handles converting the rows to factor objects. /// </summary> /// <param name="sender"></param> /// <param name="args"></param> public void HandleDataRow(object sender, MageDataEventArgs args) { if (args == null) { throw new NullReferenceException("The mass tags are invalid."); } if (args.Fields == null) { return; throw new NullReferenceException("The mass tag database rows are invalid."); } if (args.Fields.Length < 11) { return; throw new ArgumentException("The number of columns for the mass tags are invalid."); } var tag = new MassTagLight(); if (m_columnMapping.ContainsKey("Mass")) { tag.MassMonoisotopic = Convert.ToDouble(args.Fields[m_columnMapping["Mass"]]); } else { return; } if (m_columnMapping.ContainsKey("NET")) { tag.NetAverage = Convert.ToDouble(args.Fields[m_columnMapping["NET"]]); tag.Net = tag.NetAverage; } else { return; } if (m_columnMapping.ContainsKey("Dataset_Member_Count")) { tag.ObservationCount = Convert.ToInt32(args.Fields[m_columnMapping["Dataset_Member_Count"]]); } else { return; } if (m_columnMapping.ContainsKey("Cluster_ID")) { tag.Id = Convert.ToInt32(args.Fields[m_columnMapping["Cluster_ID"]]); } else { return; } if (m_columnMapping.ContainsKey("Drift_Time")) { tag.DriftTime = Convert.ToDouble(args.Fields[m_columnMapping["Drift_Time"]]); } else { return; } if (m_columnMapping.ContainsKey("Charge")) { tag.ChargeState = Convert.ToInt32(args.Fields[m_columnMapping["Charge"]]); } else { return; } if (m_columnMapping.ContainsKey("Score")) { tag.Score = Convert.ToDouble(args.Fields[m_columnMapping["Score"]]); } else { return; } MassTags.Add(tag); }
/// <summary> /// Performs STAC against the mass tag database. /// </summary> public List <MultiAlignCore.Data.MassTags.FeatureMatchLight <T, MassTagLight> > PerformPeakMatching(List <T> clusters, MassTagDatabase database) { var clusterMap = new Dictionary <int, T>(); var tagMap = new Dictionary <int, Dictionary <int, MassTagLight> >(); var massTags = new List <MassTagLight>(); var i = 0; foreach (var tag in database.MassTags) { var mt = new MassTagLight { Abundance = Convert.ToInt32(tag.Abundance), ChargeState = tag.ChargeState, CleavageState = tag.CleavageState, ConformationId = tag.ConformationId, ConformationObservationCount = tag.ConformationObservationCount, DiscriminantMax = tag.DiscriminantMax, DriftTime = Convert.ToSingle(tag.DriftTime), DriftTimePredicted = tag.DriftTimePredicted, Id = tag.Id, MassMonoisotopic = tag.MassMonoisotopic, ModificationCount = tag.ModificationCount, Modifications = tag.Modifications, Molecule = tag.Molecule, MsgfSpecProbMax = tag.MsgfSpecProbMax, Net = tag.NetAverage, NetAverage = tag.NetAverage, NetPredicted = tag.NetPredicted, NetStandardDeviation = tag.NetStandardDeviation, ObservationCount = tag.ObservationCount, PeptideSequence = tag.PeptideSequence, PeptideSequenceEx = tag.PeptideSequenceEx, PriorProbability = tag.PriorProbability, QualityScore = tag.QualityScore, XCorr = tag.XCorr }; mt.Index = i++; massTags.Add(mt); if (!tagMap.ContainsKey(tag.Id)) { tagMap.Add(tag.Id, new Dictionary <int, MassTagLight>()); } tagMap[tag.Id].Add(tag.ConformationId, tag); } // convert data needed by the algorithm. var features = new List <UMCClusterLight>(); foreach (var cluster in clusters) { var feature = new UMCClusterLight { Id = cluster.Id, MassMonoisotopicAligned = cluster.MassMonoisotopic, MassMonoisotopic = cluster.MassMonoisotopic }; feature.Net = cluster.Net; feature.NetAligned = cluster.Net; feature.ChargeState = cluster.ChargeState; feature.DriftTime = Convert.ToSingle(cluster.DriftTime); feature.DriftTimeAligned = Convert.ToDouble(cluster.DriftTime); features.Add(feature); clusterMap.Add(cluster.Id, cluster); } // create a stac manager and run. var matcher = new FeatureMatcher <UMCClusterLight, MassTagLight>(features, massTags, Options); matcher.MessageEvent += StatusHandler; matcher.ProcessingCompleteEvent += StatusHandler; matcher.MatchFeatures(); Matcher = matcher; Matcher.PopulateStacfdrTable(matcher.MatchList); var matches = new List <MultiAlignCore.Data.MassTags.FeatureMatchLight <T, MassTagLight> >(); foreach (var match in matcher.MatchList) { var matched = new MultiAlignCore.Data.MassTags.FeatureMatchLight <T, MassTagLight> { Observed = clusterMap[match.ObservedFeature.Id], Target = tagMap[match.TargetFeature.Id][match.TargetFeature.ConformationId], Confidence = match.STACScore, Uniqueness = match.STACSpecificity }; matches.Add(matched); } matcher.MessageEvent -= StatusHandler; matcher.ProcessingCompleteEvent -= StatusHandler; return(matches); }
/// <summary> /// Builds an ID for mapping and caching features /// </summary> /// <param name="tag"></param> /// <returns></returns> public static object BuildId(this MassTagLight tag) { return(string.Format("{0}-{1}", tag.Id, tag.ConformationId)); }
public MassTagDatabase LoadDatabase() { var database = new MassTagDatabase(); //Implement the loading / reading of the MTDB Framework objects. var reader = new SqLiteTargetDatabaseReader(); var mtdbDatabase = reader.ReadDb(m_path); var massTags = new List <MassTagLight>(); // Mapping objects to create unique proteins and for the mass tag database to load only those proteins for // the given consensus targets/mass tags. var proteinMap = new Dictionary <int, Protein>(); var massTagProteinMap = new Dictionary <int, List <Protein> >(); if (mtdbDatabase == null) { return(database); } foreach (var target in mtdbDatabase.ConsensusTargets) { // Copy the consensus data into a mass tag light. var tag = new MassTagLight { Id = target.Id, MassMonoisotopic = target.TheoreticalMonoIsotopicMass, NetAverage = target.AverageNet, NetPredicted = target.PredictedNet, PeptideSequence = target.Sequence, NetStandardDeviation = target.StdevNet }; // Here we create unique proteins for the mass tag copying information from the consensus target proteins. var proteinsForMassTag = new List <Protein>(); foreach (var targetProtein in target.Proteins) { if (!proteinMap.ContainsKey(targetProtein.Id)) { var newProtein = new Protein { Id = targetProtein.Id, Name = targetProtein.ProteinName, ResidueStartPosition = targetProtein.ResidueStart, ResidueEndPosition = targetProtein.ResidueEnd }; //TODO: Do something about the cleavage state and terminus state of a protein loaded from MTDBCreator database. //protein.CleavageState = ?? //protein.TerminusState = ?? proteinMap.Add(newProtein.Id, newProtein); } var protein = proteinMap[targetProtein.Id]; proteinsForMassTag.Add(protein); } massTagProteinMap.Add(tag.Id, proteinsForMassTag); massTags.Add(tag); } database.AddMassTagsAndProteins(massTags, massTagProteinMap); return(database); }
/// <summary> /// Downloads the mass tags /// </summary> /// <returns></returns> protected virtual List <MassTagLight> LoadMassTags() { var massTags = new List <MassTagLight>(); using (var connection = CreateConnection(CreateConnectionString())) { connection.Open(); using (var command = connection.CreateCommand()) { SetupMassTagCommand(command); try { using (var reader = command.ExecuteReader()) { while (reader.Read()) { var massTag = new MassTagLight(); if (reader["Mass_Tag_ID"] != DBNull.Value) { var id = Convert.ToInt32(reader["Mass_Tag_ID"]); var peptide = string.Empty; float ganet = -1; var netDefined = false; float xcorr_max = 0; float stdNet = 0; var monoMass = 0.0; float highDiscriminant = 0; var numObservations = 0; var modCount = 0; var modDescription = string.Empty; short cleaveageState = 2; float driftTime = 0; var charge = 0; var conformerID = 0; float highPeptideProphetProbability = 0; double msgf = 0; if (reader["Peptide"] != DBNull.Value) { peptide = reader["Peptide"].ToString(); } if (reader["Net_Value_to_Use"] != DBNull.Value) { ganet = Convert.ToSingle(reader["Net_Value_to_Use"]); netDefined = true; } if (reader["High_Normalized_Score"] != DBNull.Value) { xcorr_max = Convert.ToSingle(reader["High_Normalized_Score"]); } if (netDefined && reader["StD_GANET"] != DBNull.Value) { stdNet = Convert.ToSingle(reader["StD_GANET"]); } if (reader["Monoisotopic_Mass"] != DBNull.Value) { monoMass = Convert.ToDouble(reader["Monoisotopic_Mass"]); } if (reader["Min_MSGF_SpecProb"] != DBNull.Value) { msgf = Convert.ToDouble(reader["Min_MSGF_SpecProb"]); } if (reader["Peptide_Obs_Count_Passing_Filter"] != DBNull.Value) { numObservations = Convert.ToInt32(reader["Peptide_Obs_Count_Passing_Filter"]); } if (reader["Mod_Count"] != DBNull.Value) { modCount = Convert.ToInt32(reader["Mod_Count"]); } if (reader["Mod_Description"] != DBNull.Value) { modDescription = reader["Mod_Description"].ToString(); } // Note: for AMT tags from MSGF+ identifications, this value is actually 1 minus MSGFPlus_PValue // In other words, if the PValue reported by MSGF+ is 2.469814E-07 the Peptide_Prophet_Probability is 0.9999998 if (reader["High_Peptide_Prophet_Probability"] != DBNull.Value) { highPeptideProphetProbability = Convert.ToSingle(reader["High_Peptide_Prophet_Probability"]); } if (reader["Cleavage_State"] != DBNull.Value) { cleaveageState = Convert.ToInt16(reader["Cleavage_State"]); } if (reader["Drift_Time_Avg"] != DBNull.Value) { driftTime = Convert.ToSingle(reader["Drift_Time_Avg"]); } if (reader["Conformer_Charge"] != DBNull.Value) { charge = Convert.ToInt32(reader["Conformer_Charge"]); } if (reader["Conformer_ID"] != DBNull.Value) { conformerID = Convert.ToInt32(reader["Conformer_ID"]); } // Make sure the mass tag has been seen enough times if (numObservations >= Options.MinimumObservationCountFilter) { var molecule = new Molecule { Name = peptide }; massTag.Id = id; massTag.Molecule = molecule; massTag.Net = ganet; massTag.NetAverage = ganet; massTag.NetStandardDeviation = stdNet; massTag.XCorr = xcorr_max; massTag.DiscriminantMax = highDiscriminant; massTag.MassMonoisotopic = monoMass; massTag.ConformationId = conformerID; massTag.ObservationCount = numObservations; massTag.DriftTime = driftTime; massTag.PriorProbability = highPeptideProphetProbability; massTag.CleavageState = cleaveageState; massTag.ModificationCount = modCount; massTag.Modifications = modDescription; massTag.MsgfSpecProbMax = msgf; massTag.PeptideSequence = peptide; massTag.ChargeState = charge; if (!netDefined) { continue; } var shouldAdd = false; // If we are using drift time, then we should only // use mass tags that have drift time. if (massTag.Net >= Options.MinimumNet && massTag.Net <= Options.MaximumNet && massTag.MassMonoisotopic >= Options.MinimumMass && massTag.MassMonoisotopic <= Options.MaximumMass) { if (Options.OnlyLoadTagsWithDriftTime) { if (driftTime > 0) { shouldAdd = true; } } else { shouldAdd = true; } } if (shouldAdd) { massTags.Add(massTag); } } } } reader.Close(); } } catch (Exception) { throw; } } connection.Close(); } return(massTags); }