public DeltaMassPeak(ProteoformRelation base_relation, List <ProteoformRelation> relations_to_group) { lock (base_relation) { base_relation.peak = this; } lock (SaveState.lollipop) { instance_counter += 1; //Not thread safe } RelationType = base_relation.RelationType; DeltaMass = base_relation.DeltaMass; InstanceId = instance_counter; grouped_relations = find_nearby_relations(relations_to_group); Accepted = grouped_relations != null && grouped_relations.Count > 0 && grouped_relations.First().RelationType == ProteoformComparison.ExperimentalTheoretical ? peak_relation_group_count >= SaveState.lollipop.min_peak_count_et : peak_relation_group_count >= SaveState.lollipop.min_peak_count_ee; possiblePeakAssignments = new List <PtmSet>(); if (SaveState.lollipop.theoretical_database.possible_ptmset_dictionary.TryGetValue(Math.Round(DeltaMass, 1), out List <PtmSet> candidates)) { possiblePeakAssignments = candidates.Where(c => RelationType == ProteoformComparison.ExperimentalTheoretical || RelationType == ProteoformComparison.ExperimentalDecoy ? Math.Abs(DeltaMass - c.mass) <= 0.05 : Math.Abs(Math.Abs(DeltaMass) - Math.Abs(c.mass)) <= 0.05).ToList(); } possiblePeakAssignments_string = "[" + String.Join("][", possiblePeakAssignments.Select(ptmset => String.Join(";", ptmset.ptm_combination.Select(ptm => SaveState.lollipop.theoretical_database.unlocalized_lookup.TryGetValue(ptm.modification, out UnlocalizedModification x) ? x.id : ptm.modification.id))).Distinct()) + "]"; }
public override bool Equals(object obj) { ProteoformRelation r2 = obj as ProteoformRelation; return(r2 != null && (InstanceId == r2.InstanceId || connected_proteoforms[0] == r2.connected_proteoforms[1] && connected_proteoforms[1] == r2.connected_proteoforms[0] || connected_proteoforms[0] == r2.connected_proteoforms[0] && connected_proteoforms[1] == r2.connected_proteoforms[1])); }
public AmbiguousIdentification(int begin, int end, PtmSet ptm_set, ProteoformRelation relation, TheoreticalProteoform theoretical_base, List <Proteoform> linked_proteoform_references) { this.begin = begin; this.end = end; this.relation = relation; this.theoretical_base = theoretical_base; this.ptm_set = ptm_set; this.linked_proteoform_references = linked_proteoform_references; }
public List <ProteoformRelation> relate(ExperimentalProteoform[] pfs1, Proteoform[] pfs2, ProteoformComparison relation_type, bool accepted_only, string current_directory, bool limit_et_relations) { if (accepted_only) { pfs1 = pfs1.Where(pf1 => pf1.accepted).ToArray(); } if (accepted_only && (relation_type == ProteoformComparison.ExperimentalExperimental || relation_type == ProteoformComparison.ExperimentalFalse)) { pfs2 = pfs2.OfType <ExperimentalProteoform>().Where(pf2 => pf2.accepted).ToArray(); } Parallel.ForEach(pfs1, pf1 => { lock (pf1) { pf1.candidate_relatives = pfs2.Where(pf2 => allowed_relation(pf1, pf2, relation_type)).ToList(); if (relation_type == ProteoformComparison.ExperimentalExperimental) { pf1.ptm_set = null; pf1.linked_proteoform_references = null; pf1.gene_name = null; } if (limit_et_relations && (relation_type == ProteoformComparison.ExperimentalTheoretical || relation_type == ProteoformComparison.ExperimentalDecoy)) { ProteoformRelation best_relation = pf1.candidate_relatives .Select(pf2 => new ProteoformRelation(pf1, pf2, relation_type, pf1.modified_mass - pf2.modified_mass, current_directory)) .Where(r => r.candidate_ptmset != null) // don't consider unassignable relations for ET .OrderBy(r => r.candidate_ptmset.ptm_rank_sum + Math.Abs(Math.Abs(r.candidate_ptmset.mass) - Math.Abs(r.DeltaMass)) * 10E-6) // get the best explanation for the experimental observation .FirstOrDefault(); pf1.candidate_relatives = best_relation != null ? new List <Proteoform> { best_relation.connected_proteoforms[1] } : new List <Proteoform>(); } } }); List <ProteoformRelation> relations = (from pf1 in pfs1 from pf2 in pf1.candidate_relatives select new ProteoformRelation(pf1, pf2, relation_type, pf1.modified_mass - pf2.modified_mass, Environment.CurrentDirectory)).ToList(); return(count_nearby_relations(relations)); //putative counts include no-mans land }
public void clear_families() { families.Clear(); foreach (ExperimentalProteoform p in experimental_proteoforms) { p.family = null; p.ptm_set = new PtmSet(new List <Ptm>()); p.linked_proteoform_references = null; ProteoformRelation relation = null; p.relation_to_id = relation; p.ambiguous_identifications.Clear(); p.gene_name = null; p.novel_mods = false; p.uniprot_mods = ""; } foreach (Proteoform p in theoretical_proteoforms) { p.family = null; } }
public List <DeltaMassPeak> accept_deltaMass_peaks(List <ProteoformRelation> relations, Dictionary <string, List <ProteoformRelation> > decoy_relations) { //order by E intensity, then by descending unadjusted_group_count (running sum) before forming peaks, and analyze only relations outside of no-man's-land remaining_relations_outside_no_mans = relations.Where(r => r.outside_no_mans_land).OrderByDescending(r => r.nearby_relations_count).ThenByDescending(r => ((ExperimentalProteoform)r.connected_proteoforms[0]).agg_intensity).ToList(); // Group count is the primary sort List <DeltaMassPeak> peaks = new List <DeltaMassPeak>(); ProteoformRelation root = remaining_relations_outside_no_mans.FirstOrDefault(); List <ProteoformRelation> running = new List <ProteoformRelation>(); List <Thread> active = new List <Thread>(); while (remaining_relations_outside_no_mans.FirstOrDefault() != null || active.Count > 0) { while (root != null && active.Count < 1) // Use Environment.ProcessorCount instead of 1 to enable parallization { if (root.RelationType != ProteoformComparison.ExperimentalExperimental && root.RelationType != ProteoformComparison.ExperimentalTheoretical) { throw new ArgumentException("Only EE and ET peaks can be accepted"); } Thread t = new Thread(new ThreadStart(root.generate_peak)); t.Start(); running.Add(root); active.Add(t); root = find_next_root(remaining_relations_outside_no_mans, running); } foreach (Thread t in active) { t.Join(); } foreach (DeltaMassPeak peak in running.Select(r => r.peak)) { peaks.Add(peak); Parallel.ForEach(peak.grouped_relations, relation => { lock (relation) { relation.peak = peak; relation.Accepted = peak.Accepted; } }); } List <ProteoformRelation> mass_differences_in_peaks = running.SelectMany(r => r.peak.grouped_relations).ToList(); remaining_relations_outside_no_mans = remaining_relations_outside_no_mans.Except(mass_differences_in_peaks).ToList(); running.Clear(); active.Clear(); root = find_next_root(remaining_relations_outside_no_mans, running); } if (peaks.Count > 0 && peaks.First().RelationType == ProteoformComparison.ExperimentalTheoretical) { SaveState.lollipop.et_peaks.AddRange(peaks); } else { SaveState.lollipop.ee_peaks.AddRange(peaks); } //Nearby relations are no longer needed after counting them Parallel.ForEach(decoy_relations.SelectMany(kv => kv.Value).Concat(relations).ToList(), r => { lock (r) r.nearby_relations.Clear(); }); return(peaks); }
public List <ProteoformRelation> relate(ExperimentalProteoform[] pfs1, Proteoform[] pfs2, ProteoformComparison relation_type, string current_directory, bool limit_et_relations) { if (relation_type == ProteoformComparison.ExperimentalExperimental || relation_type == ProteoformComparison.ExperimentalFalse) { pfs2 = pfs2.OfType <ExperimentalProteoform>().ToArray(); } Dictionary <int, List <Proteoform> > pfs2_lysine_lookup = new Dictionary <int, List <Proteoform> >(); if (Sweet.lollipop.neucode_labeled) { foreach (Proteoform pf2 in pfs2) { if (!pfs2_lysine_lookup.TryGetValue(pf2.lysine_count, out List <Proteoform> same_lysine_ct)) { pfs2_lysine_lookup.Add(pf2.lysine_count, new List <Proteoform> { pf2 }); } else { same_lysine_ct.Add(pf2); } } } Parallel.ForEach(pfs1, pf1 => { lock (pf1) { if (Sweet.lollipop.neucode_labeled && (relation_type == ProteoformComparison.ExperimentalTheoretical || relation_type == ProteoformComparison.ExperimentalDecoy || relation_type == ProteoformComparison.ExperimentalExperimental)) { pfs2_lysine_lookup.TryGetValue(pf1.lysine_count, out List <Proteoform> pfs2_same_lysine_count); pf1.candidate_relatives = pfs2_same_lysine_count != null ? pfs2_same_lysine_count.Where(pf2 => allowed_relation(pf1, pf2, relation_type)).ToList() : new List <Proteoform>(); } else if (Sweet.lollipop.neucode_labeled && relation_type == ProteoformComparison.ExperimentalFalse) { List <Proteoform> pfs2_lysines_outside_tolerance = pfs2_lysine_lookup .Where(kv => Math.Abs(pf1.lysine_count - kv.Key) > Sweet.lollipop.maximum_missed_lysines) .SelectMany(kv => kv.Value).ToList(); pf1.candidate_relatives = pfs2_lysines_outside_tolerance .Where(pf2 => allowed_relation(pf1, pf2, relation_type)).ToList(); } else if (!Sweet.lollipop.neucode_labeled) { pf1.candidate_relatives = pfs2.Where(pf2 => allowed_relation(pf1, pf2, relation_type)).ToList(); } if (relation_type == ProteoformComparison.ExperimentalExperimental) { pf1.ptm_set = null; pf1.linked_proteoform_references = null; pf1.ambiguous_identifications.Clear(); pf1.gene_name = null; // pf1.relation_to_id = null; } if (relation_type == ProteoformComparison.ExperimentalTheoretical || relation_type == ProteoformComparison.ExperimentalDecoy) { if (limit_et_relations) { ProteoformRelation best_relation = pf1.candidate_relatives .Select(pf2 => new ProteoformRelation(pf1, pf2, relation_type, pf1.modified_mass - pf2.modified_mass, current_directory)) .Where(r => r.candidate_ptmset != null && topdown_bottomup_comparison(pf1, r.connected_proteoforms[1] as TheoreticalProteoform) ) // don't consider unassignable relations for ET .OrderBy(r => r.candidate_ptmset.ptm_rank_sum + Math.Abs(Math.Abs(r.candidate_ptmset.mass) - Math.Abs(r.DeltaMass)) * 10E-6) // get the best explanation for the experimental observation .FirstOrDefault(); pf1.candidate_relatives = best_relation != null ? new List <Proteoform> { best_relation.connected_proteoforms[1] } : new List <Proteoform>(); } else //candidate relatives will be best T from each gene (won't get -42, etc) { List <ProteoformRelation> best_relatives_for_each_gene_name = new List <ProteoformRelation>(); var gene_names = pf1.candidate_relatives.Select(r => (r as TheoreticalProteoform).gene_name.get_prefered_name(Lollipop .preferred_gene_label)) .Distinct(); foreach (var gene_name in gene_names) { best_relatives_for_each_gene_name.Add(pf1.candidate_relatives .Where(p => (p as TheoreticalProteoform).gene_name.get_prefered_name( Lollipop.preferred_gene_label) == gene_name) .Select(pf2 => new ProteoformRelation(pf1, pf2, relation_type, pf1.modified_mass - pf2.modified_mass, current_directory)) .Where(r => r.candidate_ptmset != null) // don't consider unassignable relations for ET .OrderBy(r => r.candidate_ptmset.ptm_rank_sum + Math.Abs(Math.Abs(r.candidate_ptmset.mass) - Math.Abs(r.DeltaMass)) * 10E-6) // get the best explanation for the experimental observation .FirstOrDefault()); } pf1.candidate_relatives = best_relatives_for_each_gene_name != null ? best_relatives_for_each_gene_name.Where(r => r != null && topdown_bottomup_comparison(pf1, r.connected_proteoforms[1] as TheoreticalProteoform)).Select(r => r.connected_proteoforms[1]).ToList() : new List <Proteoform>(); } } } }); List <ProteoformRelation> relations = (from pf1 in pfs1 from pf2 in pf1.candidate_relatives select new ProteoformRelation(pf1, pf2, relation_type, pf1.modified_mass - pf2.modified_mass, current_directory)).ToList(); if (relation_type == ProteoformComparison.ExperimentalExperimental || relation_type == ProteoformComparison.ExperimentalFalse) { if (Sweet.lollipop.ee_use_notch) { relations = relations.Where(r => r.candidate_ptmset != null).ToList(); } } return(count_nearby_relations(relations)); //putative counts include no-mans land }
private void assign_pf_identity(ExperimentalProteoform e, Proteoform theoretical_reference, PtmSet set, ProteoformRelation r, int sign, PtmSet change) { if (r.represented_ptmset == null) { r.represented_ptmset = change; if (r.RelationType == ProteoformComparison.ExperimentalExperimental) { r.DeltaMass *= sign; } } if (e.linked_proteoform_references == null) { e.linked_proteoform_references = new List <Proteoform>(this.linked_proteoform_references); e.linked_proteoform_references.Add(this); e.ptm_set = set; } if (e.gene_name == null) { e.gene_name = this.gene_name; } else { e.gene_name.gene_names.Concat(this.gene_name.gene_names); } }
public DeltaMassPeak(ProteoformRelation base_relation, HashSet <ProteoformRelation> relations_to_group) { lock (base_relation) { base_relation.peak = this; } lock (Sweet.lollipop) { IncrementInstanceCounter(); //Not thread safe } RelationType = base_relation.RelationType; DeltaMass = base_relation.DeltaMass; InstanceId = instance_counter; if (RelationType == ProteoformComparison.ExperimentalExperimental || !Sweet.lollipop.et_use_notch) { grouped_relations = find_nearby_relations(relations_to_group); bool are_positive_candidates = Sweet.lollipop.theoretical_database.possible_ptmset_dictionary.TryGetValue(Math.Round(DeltaMass, 1), out List <PtmSet> positive_candidates); bool are_negative_candidates = Sweet.lollipop.theoretical_database.possible_ptmset_dictionary.TryGetValue( Math.Round(-DeltaMass, 1), out List <PtmSet> negative_candidates) && (RelationType == ProteoformComparison.ExperimentalExperimental || RelationType == ProteoformComparison.ExperimentalFalse); if (are_positive_candidates || are_negative_candidates) { List <PtmSet> candidates = (are_positive_candidates ? positive_candidates : new List <PtmSet>()) .Concat(are_negative_candidates ? negative_candidates : new List <PtmSet>()).ToList(); possiblePeakAssignments = candidates.Where(c => RelationType == ProteoformComparison.ExperimentalTheoretical || RelationType == ProteoformComparison.ExperimentalDecoy ? Math.Abs(DeltaMass - c.mass) <= (grouped_relations.First().RelationType == ProteoformComparison.ExperimentalTheoretical ? Sweet.lollipop.peak_width_base_et : Sweet.lollipop.peak_width_base_ee) : Math.Abs(Math.Abs(DeltaMass) - Math.Abs(c.mass)) <= (grouped_relations.First().RelationType == ProteoformComparison.ExperimentalTheoretical ? Sweet.lollipop.peak_width_base_et : Sweet.lollipop.peak_width_base_ee)).ToList(); } else { possiblePeakAssignments = new List <PtmSet>(); } } else { grouped_relations = relations_to_group.Where(r => Math.Round(r.candidate_ptmset.mass, 5) == Math.Round(base_relation.candidate_ptmset.mass, 5)).ToList(); possiblePeakAssignments = grouped_relations.Select(r => r.candidate_ptmset).ToList(); DeltaMass = possiblePeakAssignments.First().mass; } foreach (ProteoformRelation mass_difference in grouped_relations) { foreach (Proteoform p in mass_difference.connected_proteoforms) { lock (p) p.relationships.Add(mass_difference); } } possiblePeakAssignments_string = "[" + string.Join("][", possiblePeakAssignments.OrderBy(p => p.ptm_rank_sum).Select(ptmset => string.Join(";", ptmset.ptm_combination.Select(ptm => UnlocalizedModification.LookUpId(ptm.modification)))).Distinct()) + "]"; Accepted = grouped_relations != null && grouped_relations.Count > 0 && grouped_relations.First().RelationType == ProteoformComparison.ExperimentalTheoretical ? (peak_relation_group_count >= Sweet.lollipop.min_peak_count_et) : (peak_relation_group_count >= Sweet.lollipop.min_peak_count_ee && (!Sweet.lollipop.ee_accept_peaks_based_on_rank || possiblePeakAssignments.Count > 0 && possiblePeakAssignments.Any(p => p.ptm_rank_sum < Sweet.lollipop.mod_rank_first_quartile))); }
private bool assign_pf_identity(ExperimentalProteoform e, PtmSet set, int begin, int end, ProteoformRelation r, TheoreticalProteoform theoretical_base, List <Proteoform> linked_proteoform_references, bool check_ambiguous_IDs) { bool identification_assigned = false; if (!Sweet.lollipop.id_use_ppm_tolerance || Math.Abs(e.calculate_mass_error(theoretical_base, set, begin, end) * 1e6 / e.modified_mass) < Sweet.lollipop.id_ppm_tolerance) { int new_begin = begin; int new_end = end; PtmSet new_set = new PtmSet(new List <Ptm>(set.ptm_combination)); List <Ptm> remove = new List <Ptm>(); //do retention of M first foreach (var mod in new_set.ptm_combination.Where(m => m.modification.ModificationType == "AminoAcid")) { new_begin--; remove.Add(mod); } foreach (var mod in new_set.ptm_combination.Where(m => m.modification.ModificationType == "Missing")) { if (!new_set.ptm_combination.Any(m => m.modification.ModificationType == "AminoAcid") && begin >= theoretical_base.begin) { if (theoretical_base.sequence[begin - theoretical_base.begin].ToString() == mod.modification.Target.ToString()) { new_begin++; remove.Add(mod); //dont have in ptmset --> change the begin & end } } if (!remove.Contains(mod) && theoretical_base.sequence[end - theoretical_base.begin].ToString() == mod.modification.Target.ToString()) { new_end--; remove.Add(mod); } } foreach (var ptm in remove) { new_set.ptm_combination.Remove(ptm); } new_set = new PtmSet(new_set.ptm_combination); if (e.linked_proteoform_references == null) { identification_assigned = true; if (linked_proteoform_references != null) { e.linked_proteoform_references = new List <Proteoform>(linked_proteoform_references); e.linked_proteoform_references.Add(this); } else { e.linked_proteoform_references = new List <Proteoform>() { theoretical_base }; } e.relation_to_id = r; e.ptm_set = new_set; e.begin = new_begin; e.end = new_end; if (e.gene_name == null) { e.gene_name = theoretical_base.gene_name; } else { e.gene_name.gene_names.Concat(this.gene_name.gene_names); } } else { if (linked_proteoform_references != null && !linked_proteoform_references.Contains(e)) { bool different_id = e.gene_name.get_prefered_name(Lollipop.preferred_gene_label) != theoretical_base.gene_name.get_prefered_name(Lollipop.preferred_gene_label) || ExperimentalProteoform.get_sequence(e.linked_proteoform_references.First() as TheoreticalProteoform, e.begin, e.end) != ExperimentalProteoform.get_sequence(theoretical_base, new_begin, new_end) || !e.ptm_set.same_ptmset(new_set, true); List <Modification> this_known_mods = theoretical_base.ExpandedProteinList.SelectMany(p => p.OneBasedPossibleLocalizedModifications).SelectMany(kv => kv.Value).Where(v => v.MonoisotopicMass != 0).ToList(); List <Modification> previous_id_known_mods = (e.linked_proteoform_references.First() as TheoreticalProteoform).ExpandedProteinList.SelectMany(p => p.OneBasedPossibleLocalizedModifications).SelectMany(kv => kv.Value).Where(v => v.MonoisotopicMass != 0).ToList(); if (!Sweet.lollipop.topdown_theoretical_reduce_ambiguity || (theoretical_base.topdown_theoretical && !(e.linked_proteoform_references.First() as TheoreticalProteoform).topdown_theoretical)) { if (!Sweet.lollipop.annotated_PTMs_reduce_ambiguity || (new_set.ptm_combination.All(mod1 => modification_is_adduct(mod1.modification) || this_known_mods.Select(mod2 => UnlocalizedModification.LookUpId(mod2)).Contains(UnlocalizedModification.LookUpId(mod1.modification))) && !e.ptm_set.ptm_combination.All(mod1 => modification_is_adduct(mod1.modification) || previous_id_known_mods.Select(mod2 => UnlocalizedModification.LookUpId(mod2)).Contains(UnlocalizedModification.LookUpId(mod1.modification))))) { if (Sweet.lollipop.topdown_theoretical_reduce_ambiguity || Sweet.lollipop.annotated_PTMs_reduce_ambiguity) { if (Sweet.lollipop.remove_bad_connections && different_id) //&& e.relation_to_id != r) { e.relation_to_id.Identification = false; e.relation_to_id.represented_ptmset = null; } e.linked_proteoform_references = null; e.ptm_set = new PtmSet(new List <Ptm>()); e.begin = 0; e.end = 0; e.gene_name = null; e.ambiguous_identifications.Clear(); ProteoformRelation relation = null; e.relation_to_id = relation; //reassign the topdown - based ID return(this.assign_pf_identity(e, set, begin, end, r, theoretical_base, linked_proteoform_references, true)); } } } if (Sweet.lollipop.topdown_theoretical_reduce_ambiguity && (e.linked_proteoform_references.First() as TheoreticalProteoform).topdown_theoretical && !theoretical_base.topdown_theoretical) { } else if (Sweet.lollipop.annotated_PTMs_reduce_ambiguity && !new_set.ptm_combination.All(mod1 => modification_is_adduct(mod1.modification) || this_known_mods.Select(mod2 => UnlocalizedModification.LookUpId(mod2)).Contains(UnlocalizedModification.LookUpId(mod1.modification))) && e.ptm_set.ptm_combination.All(mod1 => modification_is_adduct(mod1.modification) || previous_id_known_mods.Select(mod2 => UnlocalizedModification.LookUpId(mod2)).Contains(UnlocalizedModification.LookUpId(mod1.modification)))) { } else { if (different_id) { var new_linked_proteoform_references = new List <Proteoform>(linked_proteoform_references); new_linked_proteoform_references.Add(this); AmbiguousIdentification new_id = new AmbiguousIdentification(new_begin, new_end, new_set, r, theoretical_base, new_linked_proteoform_references); lock (e.ambiguous_identifications) { if (!e.ambiguous_identifications.Any(p => p.theoretical_base.gene_name.primary == new_id.theoretical_base.gene_name.primary && ExperimentalProteoform.get_sequence(p.theoretical_base, p.begin, p.end) == ExperimentalProteoform.get_sequence(new_id.theoretical_base, new_id.begin, new_id.end) && p.ptm_set.same_ptmset(new_id.ptm_set, true))) { e.ambiguous_identifications.Add(new_id); identification_assigned = true; } } } } } } } if (check_ambiguous_IDs) { //remove bad relations if using td to reduce ambiguity if (identification_assigned) { List <AmbiguousIdentification> to_remove = new List <AmbiguousIdentification>(); List <Modification> previous_id_known_mods = (e.linked_proteoform_references.First() as TheoreticalProteoform).ExpandedProteinList.SelectMany(p => p.OneBasedPossibleLocalizedModifications).SelectMany(kv => kv.Value).Where(m => m.MonoisotopicMass != 0).ToList(); if (theoretical_base.topdown_theoretical && Sweet.lollipop.topdown_theoretical_reduce_ambiguity) { to_remove.AddRange(e.ambiguous_identifications.Where(id => !id.theoretical_base.topdown_theoretical)); } if (Sweet.lollipop.annotated_PTMs_reduce_ambiguity && e.ptm_set.ptm_combination.All(mod1 => modification_is_adduct(mod1.modification) || previous_id_known_mods.Select(mod2 => UnlocalizedModification.LookUpId(mod2)).Contains(UnlocalizedModification.LookUpId(mod1.modification)))) { foreach (var ambiguous_id in e.ambiguous_identifications) { List <Modification> ambiguous_id_known_mods = ambiguous_id.theoretical_base.ExpandedProteinList.SelectMany(p => p.OneBasedPossibleLocalizedModifications).SelectMany(kv => kv.Value).Where(m => m.MonoisotopicMass != 0).ToList(); if (ambiguous_id.ptm_set.ptm_combination.Any(mod1 => !modification_is_adduct(mod1.modification) && !ambiguous_id_known_mods.Select(mod2 => UnlocalizedModification.LookUpId(mod2)).Contains(UnlocalizedModification.LookUpId(mod1.modification)))) { to_remove.Add(ambiguous_id); } } } foreach (var x in to_remove) { if (e.ambiguous_identifications.Contains(x)) { e.ambiguous_identifications.Remove(x); if (Sweet.lollipop.remove_bad_connections) { if (e.relation_to_id != x.relation) { x.relation.Identification = false; x.relation.represented_ptmset = null; } } } } foreach (var x in e.ambiguous_identifications) { x.relation.Identification = true; } } if (this as ExperimentalProteoform != null && (this as ExperimentalProteoform).ambiguous_identifications.Count > 0) { lock ((this as ExperimentalProteoform).ambiguous_identifications) { int count = (this as ExperimentalProteoform).ambiguous_identifications.Count; PtmSet[] new_ptm_set = new PtmSet[count]; Parallel.For(0, count, i => { var id = (this as ExperimentalProteoform).ambiguous_identifications[i]; new_ptm_set[i] = determine_mod_change(e, this, id.theoretical_base, r, id.ptm_set, id.begin, id.end); }); for (int i = 0; i < count; i++) { if (new_ptm_set[i] != null) { var id = (this as ExperimentalProteoform).ambiguous_identifications[i]; if (assign_pf_identity(e, new_ptm_set[i], id.begin, id.end, r, id.theoretical_base, id.linked_proteoform_references, false)) { identification_assigned = true; } } } } } } return(identification_assigned); }
private static PtmSet determine_mod_change(ExperimentalProteoform e, Proteoform p, TheoreticalProteoform theoretical_base, ProteoformRelation r, PtmSet this_ptmset, int begin, int end) { double mass_tolerance = p.modified_mass / 1000000 * Sweet.lollipop.mass_tolerance; int sign = Math.Sign(e.modified_mass - p.modified_mass); double deltaM = Math.Sign(r.peak.DeltaMass) < 0 ? r.peak.DeltaMass : sign * r.peak .DeltaMass; // give EE relations the correct sign, but don't switch negative ET relation deltaM's List <PtmSet> possible_additions = r.peak.possiblePeakAssignments .Where(peak => Math.Abs(peak.mass - deltaM) <= 1) .ToList(); // EE relations have PtmSets around both positive and negative deltaM, so remove the ones around the opposite of the deltaM of interest PtmSet best_addition = generate_possible_added_ptmsets(possible_additions, Sweet.lollipop.theoretical_database.all_mods_with_mass, theoretical_base, begin, end, this_ptmset, 1, true) .OrderBy(x => (double)x.ptm_rank_sum + Math.Abs(x.mass - deltaM) * 10E-6) // major score: delta rank; tie breaker: deltaM, where it's always less than 1 .FirstOrDefault(); List <PtmSet> best_losses = new List <PtmSet>(); foreach (PtmSet set in r.peak.possiblePeakAssignments) //Parallel.ForEach(Sweet.lollipop.theoretical_database.all_possible_ptmsets, set => { bool within_loss_tolerance = deltaM >= -set.mass - mass_tolerance && deltaM <= -set.mass + mass_tolerance; if (within_loss_tolerance) { List <Modification> these_mods = this_ptmset.ptm_combination.Select(ptm => ptm.modification).ToList(); List <Modification> those_mods = set.ptm_combination.Select(ptm => ptm.modification).ToList(); // all must be in the current set to remove them bool can_be_removed = those_mods.All(m1 => these_mods.Count(m2 => UnlocalizedModification.LookUpId(m2) == UnlocalizedModification.LookUpId(m1)) >= those_mods.Count(m2 => UnlocalizedModification.LookUpId(m2) == UnlocalizedModification.LookUpId(m1))); lock (best_losses) { if (can_be_removed && within_loss_tolerance) { best_losses.Add(set); } } } } //); PtmSet best_loss = best_losses.OrderBy(s => Math.Abs(deltaM - (-s.mass))).FirstOrDefault(); if (best_addition == null && best_loss == null) { return(null); } // Make the new ptmset with ptms removed or added PtmSet with_mod_change = null; if (best_loss == null) { with_mod_change = new PtmSet(new List <Ptm>(this_ptmset.ptm_combination .Concat(best_addition.ptm_combination).Where(ptm => ptm.modification.MonoisotopicMass != 0) .ToList())); } else { List <Ptm> new_combo = new List <Ptm>(this_ptmset.ptm_combination); foreach (Ptm ptm in best_loss.ptm_combination) { new_combo.Remove(new_combo.FirstOrDefault(asdf => UnlocalizedModification.LookUpId(asdf.modification) == UnlocalizedModification.LookUpId(ptm.modification))); } with_mod_change = new PtmSet(new_combo); } if (r.represented_ptmset == null) { r.represented_ptmset = best_loss == null ? best_addition : best_loss; //if (r.RelationType == ProteoformComparison.ExperimentalExperimental) //{ // r.DeltaMass *= sign; //} } return(with_mod_change); }
private void assign_pf_identity(ExperimentalProteoform e, PtmSet set, ProteoformRelation r, TheoreticalProteoform theoretical_base) { if (e.linked_proteoform_references == null) { e.linked_proteoform_references = new List <Proteoform>(this.linked_proteoform_references); e.linked_proteoform_references.Add(this); e.ptm_set = set; e.begin = this.begin; e.end = this.end; List <Ptm> remove = new List <Ptm>(); //do retention of M first foreach (var mod in set.ptm_combination.Where(m => m.modification.ModificationType == "AminoAcid")) { e.begin--; remove.Add(mod); } foreach (var mod in set.ptm_combination.Where(m => m.modification.ModificationType == "Missing")) { if (theoretical_base.sequence[this.begin - theoretical_base.begin].ToString() == mod.modification.Target.ToString()) { e.begin++; remove.Add(mod); //dont have in ptmset --> change the begin & end } else if (theoretical_base.sequence[this.end - this.begin].ToString() == mod.modification.Target.ToString()) { e.end--; remove.Add(mod); } } foreach (var ptm in remove) { e.ptm_set.ptm_combination.Remove(ptm); } e.ptm_set = new PtmSet(e.ptm_set.ptm_combination); if (e.gene_name == null) { e.gene_name = this.gene_name; } else if (!e.topdown_id) { e.gene_name.gene_names.Concat(this.gene_name.gene_names); } } else { //check if assign int begin = this.begin; int end = this.end; PtmSet ptm_set = set; List <Ptm> remove = new List <Ptm>(); //do retention of M first foreach (var mod in set.ptm_combination.Where(m => m.modification.ModificationType == "AminoAcid")) { begin--; remove.Add(mod); } foreach (var mod in set.ptm_combination.Where(m => m.modification.ModificationType == "Missing")) { if (theoretical_base.sequence[this.begin - theoretical_base.begin].ToString() == mod.modification.Target.ToString()) { begin++; remove.Add(mod); //dont have in ptmset --> change the begin & end } else if (theoretical_base.sequence[this.end - this.begin].ToString() == mod.modification.Target.ToString()) { end--; remove.Add(mod); } } foreach (var ptm in remove) { ptm_set.ptm_combination.Remove(ptm); } ptm_set = new PtmSet(ptm_set.ptm_combination); if (e.gene_name.get_prefered_name(Lollipop.preferred_gene_label) != this.gene_name.get_prefered_name(Lollipop.preferred_gene_label) || e.begin != begin || e.end != end || !e.ptm_set.same_ptmset(ptm_set, true)) { e.ambiguous = true; Proteoform linked_proteoform_reference = this.linked_proteoform_references == null || this.linked_proteoform_references.Count == 0 ? this : this.linked_proteoform_references.First(); Tuple <Proteoform, int, int, PtmSet> new_id = new Tuple <Proteoform, int, int, PtmSet>(linked_proteoform_reference, begin, end, ptm_set); lock (e.ambiguous_identifications) { if (!e.ambiguous_identifications.Any(p => p.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) == new_id.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) && p.Item2 == new_id.Item2 && p.Item3 == new_id.Item3 && p.Item4.same_ptmset(new_id.Item4, true))) { e.ambiguous_identifications.Add(new_id); } } } } if (this as ExperimentalProteoform != null && (this as ExperimentalProteoform).ambiguous) { foreach (var id in this.ambiguous_identifications) { TheoreticalProteoform id_theoretical_base = id.Item1 as TheoreticalProteoform; int begin = id.Item2; int end = id.Item3; var remove = new List <Ptm>(); var ptm_set = determine_mod_change(e, this, id_theoretical_base, r, id.Item4); if (ptm_set == null) { continue; } //do retention of M first foreach (var mod in ptm_set.ptm_combination.Where(m => m.modification.ModificationType == "AminoAcid")) { begin--; remove.Add(mod); } foreach (var mod in ptm_set.ptm_combination.Where(m => m.modification.ModificationType == "Missing")) { if (id_theoretical_base.sequence[id.Item2 - id.Item1.begin].ToString() == mod.modification.Target.ToString()) { begin++; remove.Add(mod); //dont have in ptmset --> change the begin & end } else if (id_theoretical_base.sequence[id.Item3 - id.Item2].ToString() == mod.modification.Target.ToString()) { end--; remove.Add(mod); } } foreach (var ptm in remove) { ptm_set.ptm_combination.Remove(ptm); } ptm_set = new PtmSet(ptm_set.ptm_combination); lock (e.ambiguous_identifications) { var new_id = new Tuple <Proteoform, int, int, PtmSet>(id.Item1, begin, end, ptm_set); if ((e.gene_name.get_prefered_name(Lollipop.preferred_gene_label) != new_id.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) || e.begin != new_id.Item2 || e.end != new_id.Item3 || !e.ptm_set.same_ptmset(new_id.Item4, true)) && !e.ambiguous_identifications.Any(p => p.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) == new_id.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) && p.Item2 == new_id.Item2 && p.Item3 == new_id.Item3 && p.Item4.same_ptmset(new_id.Item4, true))) { e.ambiguous_identifications.Add(new_id); e.ambiguous = true; } } } } e.uniprot_mods = ""; foreach (string mod in e.ptm_set.ptm_combination.Concat(e.ambiguous_identifications.SelectMany(i => i.Item4.ptm_combination)).Where(ptm => ptm.modification.ModificationType != "Deconvolution Error").Select(ptm => UnlocalizedModification.LookUpId(ptm.modification)).ToList().Distinct().OrderBy(m => m)) { // positions with mod List <int> theo_ptms = theoretical_base.ExpandedProteinList.First() .OneBasedPossibleLocalizedModifications .Where(p => p.Key >= e.begin && p.Key <= e.end && p.Value.Select(m => UnlocalizedModification.LookUpId(m)).Contains(mod)) .Select(m => m.Key).ToList(); if (theo_ptms.Count > 0) { e.uniprot_mods += mod + " @ " + string.Join(", ", theo_ptms) + "; "; } if (e.ptm_set.ptm_combination.Select(ptm => UnlocalizedModification.LookUpId(ptm.modification)) .Count(m => m == mod) > theo_ptms.Count || e.ambiguous_identifications.Any(i => i.Item4.ptm_combination.Select(ptm => UnlocalizedModification.LookUpId(ptm.modification)) .Count(m => m == mod) > theo_ptms.Count)) { e.novel_mods = true; } } //else if (!e.topdown_id && e.gene_name.get_prefered_name(Lollipop.preferred_gene_label) != this.gene_name.get_prefered_name(Lollipop.preferred_gene_label) // && e.linked_proteoform_references.Count == this.linked_proteoform_references.Count + 1) //{ // e.ambiguous = true; //} }