public DeltaMassPeak(ProteoformRelation base_relation, List <ProteoformRelation> relations_to_group)
        {
            lock (base_relation)
            {
                base_relation.peak = this;
            }

            lock (SaveState.lollipop)
            {
                instance_counter += 1; //Not thread safe
            }

            RelationType = base_relation.RelationType;
            DeltaMass    = base_relation.DeltaMass;
            InstanceId   = instance_counter;

            grouped_relations = find_nearby_relations(relations_to_group);
            Accepted          = grouped_relations != null && grouped_relations.Count > 0 && grouped_relations.First().RelationType == ProteoformComparison.ExperimentalTheoretical ?
                                peak_relation_group_count >= SaveState.lollipop.min_peak_count_et :
                                peak_relation_group_count >= SaveState.lollipop.min_peak_count_ee;

            possiblePeakAssignments = new List <PtmSet>();
            if (SaveState.lollipop.theoretical_database.possible_ptmset_dictionary.TryGetValue(Math.Round(DeltaMass, 1), out List <PtmSet> candidates))
            {
                possiblePeakAssignments = candidates.Where(c => RelationType == ProteoformComparison.ExperimentalTheoretical || RelationType == ProteoformComparison.ExperimentalDecoy ?
                                                           Math.Abs(DeltaMass - c.mass) <= 0.05 :
                                                           Math.Abs(Math.Abs(DeltaMass) - Math.Abs(c.mass)) <= 0.05).ToList();
            }
            possiblePeakAssignments_string = "[" + String.Join("][", possiblePeakAssignments.Select(ptmset =>
                                                                                                    String.Join(";", ptmset.ptm_combination.Select(ptm =>
                                                                                                                                                   SaveState.lollipop.theoretical_database.unlocalized_lookup.TryGetValue(ptm.modification, out UnlocalizedModification x) ? x.id : ptm.modification.id))).Distinct()) + "]";
        }
Example #2
0
        public override bool Equals(object obj)
        {
            ProteoformRelation r2 = obj as ProteoformRelation;

            return(r2 != null &&
                   (InstanceId == r2.InstanceId ||
                    connected_proteoforms[0] == r2.connected_proteoforms[1] && connected_proteoforms[1] == r2.connected_proteoforms[0] ||
                    connected_proteoforms[0] == r2.connected_proteoforms[0] && connected_proteoforms[1] == r2.connected_proteoforms[1]));
        }
 public AmbiguousIdentification(int begin, int end, PtmSet ptm_set, ProteoformRelation relation, TheoreticalProteoform theoretical_base, List <Proteoform> linked_proteoform_references)
 {
     this.begin                        = begin;
     this.end                          = end;
     this.relation                     = relation;
     this.theoretical_base             = theoretical_base;
     this.ptm_set                      = ptm_set;
     this.linked_proteoform_references = linked_proteoform_references;
 }
Example #4
0
        public List <ProteoformRelation> relate(ExperimentalProteoform[] pfs1, Proteoform[] pfs2, ProteoformComparison relation_type, bool accepted_only, string current_directory, bool limit_et_relations)
        {
            if (accepted_only)
            {
                pfs1 = pfs1.Where(pf1 => pf1.accepted).ToArray();
            }

            if (accepted_only && (relation_type == ProteoformComparison.ExperimentalExperimental || relation_type == ProteoformComparison.ExperimentalFalse))
            {
                pfs2 = pfs2.OfType <ExperimentalProteoform>().Where(pf2 => pf2.accepted).ToArray();
            }

            Parallel.ForEach(pfs1, pf1 =>
            {
                lock (pf1)
                {
                    pf1.candidate_relatives = pfs2.Where(pf2 => allowed_relation(pf1, pf2, relation_type)).ToList();

                    if (relation_type == ProteoformComparison.ExperimentalExperimental)
                    {
                        pf1.ptm_set = null;
                        pf1.linked_proteoform_references = null;
                        pf1.gene_name = null;
                    }

                    if (limit_et_relations && (relation_type == ProteoformComparison.ExperimentalTheoretical || relation_type == ProteoformComparison.ExperimentalDecoy))
                    {
                        ProteoformRelation best_relation = pf1.candidate_relatives
                                                           .Select(pf2 => new ProteoformRelation(pf1, pf2, relation_type, pf1.modified_mass - pf2.modified_mass, current_directory))
                                                           .Where(r => r.candidate_ptmset != null)                                                                                      // don't consider unassignable relations for ET
                                                           .OrderBy(r => r.candidate_ptmset.ptm_rank_sum + Math.Abs(Math.Abs(r.candidate_ptmset.mass) - Math.Abs(r.DeltaMass)) * 10E-6) // get the best explanation for the experimental observation
                                                           .FirstOrDefault();

                        pf1.candidate_relatives = best_relation != null ?
                                                  new List <Proteoform> {
                            best_relation.connected_proteoforms[1]
                        } :
                        new List <Proteoform>();
                    }
                }
            });

            List <ProteoformRelation> relations =
                (from pf1 in pfs1
                 from pf2 in pf1.candidate_relatives
                 select new ProteoformRelation(pf1, pf2, relation_type, pf1.modified_mass - pf2.modified_mass, Environment.CurrentDirectory)).ToList();

            return(count_nearby_relations(relations));  //putative counts include no-mans land
        }
 public void clear_families()
 {
     families.Clear();
     foreach (ExperimentalProteoform p in experimental_proteoforms)
     {
         p.family  = null;
         p.ptm_set = new PtmSet(new List <Ptm>());
         p.linked_proteoform_references = null;
         ProteoformRelation relation = null;
         p.relation_to_id = relation;
         p.ambiguous_identifications.Clear();
         p.gene_name    = null;
         p.novel_mods   = false;
         p.uniprot_mods = "";
     }
     foreach (Proteoform p in theoretical_proteoforms)
     {
         p.family = null;
     }
 }
Example #6
0
        public List <DeltaMassPeak> accept_deltaMass_peaks(List <ProteoformRelation> relations, Dictionary <string, List <ProteoformRelation> > decoy_relations)
        {
            //order by E intensity, then by descending unadjusted_group_count (running sum) before forming peaks, and analyze only relations outside of no-man's-land
            remaining_relations_outside_no_mans = relations.Where(r => r.outside_no_mans_land).OrderByDescending(r => r.nearby_relations_count).ThenByDescending(r => ((ExperimentalProteoform)r.connected_proteoforms[0]).agg_intensity).ToList(); // Group count is the primary sort
            List <DeltaMassPeak> peaks = new List <DeltaMassPeak>();

            ProteoformRelation        root    = remaining_relations_outside_no_mans.FirstOrDefault();
            List <ProteoformRelation> running = new List <ProteoformRelation>();
            List <Thread>             active  = new List <Thread>();

            while (remaining_relations_outside_no_mans.FirstOrDefault() != null || active.Count > 0)
            {
                while (root != null && active.Count < 1) // Use Environment.ProcessorCount instead of 1 to enable parallization
                {
                    if (root.RelationType != ProteoformComparison.ExperimentalExperimental && root.RelationType != ProteoformComparison.ExperimentalTheoretical)
                    {
                        throw new ArgumentException("Only EE and ET peaks can be accepted");
                    }

                    Thread t = new Thread(new ThreadStart(root.generate_peak));
                    t.Start();
                    running.Add(root);
                    active.Add(t);
                    root = find_next_root(remaining_relations_outside_no_mans, running);
                }

                foreach (Thread t in active)
                {
                    t.Join();
                }

                foreach (DeltaMassPeak peak in running.Select(r => r.peak))
                {
                    peaks.Add(peak);
                    Parallel.ForEach(peak.grouped_relations, relation =>
                    {
                        lock (relation)
                        {
                            relation.peak     = peak;
                            relation.Accepted = peak.Accepted;
                        }
                    });
                }

                List <ProteoformRelation> mass_differences_in_peaks = running.SelectMany(r => r.peak.grouped_relations).ToList();
                remaining_relations_outside_no_mans = remaining_relations_outside_no_mans.Except(mass_differences_in_peaks).ToList();

                running.Clear();
                active.Clear();
                root = find_next_root(remaining_relations_outside_no_mans, running);
            }
            if (peaks.Count > 0 && peaks.First().RelationType == ProteoformComparison.ExperimentalTheoretical)
            {
                SaveState.lollipop.et_peaks.AddRange(peaks);
            }
            else
            {
                SaveState.lollipop.ee_peaks.AddRange(peaks);
            }

            //Nearby relations are no longer needed after counting them
            Parallel.ForEach(decoy_relations.SelectMany(kv => kv.Value).Concat(relations).ToList(), r =>
            {
                lock (r) r.nearby_relations.Clear();
            });

            return(peaks);
        }
        public List <ProteoformRelation> relate(ExperimentalProteoform[] pfs1, Proteoform[] pfs2, ProteoformComparison relation_type, string current_directory, bool limit_et_relations)
        {
            if (relation_type == ProteoformComparison.ExperimentalExperimental || relation_type == ProteoformComparison.ExperimentalFalse)
            {
                pfs2 = pfs2.OfType <ExperimentalProteoform>().ToArray();
            }

            Dictionary <int, List <Proteoform> > pfs2_lysine_lookup = new Dictionary <int, List <Proteoform> >();

            if (Sweet.lollipop.neucode_labeled)
            {
                foreach (Proteoform pf2 in pfs2)
                {
                    if (!pfs2_lysine_lookup.TryGetValue(pf2.lysine_count, out List <Proteoform> same_lysine_ct))
                    {
                        pfs2_lysine_lookup.Add(pf2.lysine_count, new List <Proteoform> {
                            pf2
                        });
                    }
                    else
                    {
                        same_lysine_ct.Add(pf2);
                    }
                }
            }

            Parallel.ForEach(pfs1, pf1 =>
            {
                lock (pf1)
                {
                    if (Sweet.lollipop.neucode_labeled &&
                        (relation_type == ProteoformComparison.ExperimentalTheoretical ||
                         relation_type == ProteoformComparison.ExperimentalDecoy ||
                         relation_type == ProteoformComparison.ExperimentalExperimental))
                    {
                        pfs2_lysine_lookup.TryGetValue(pf1.lysine_count, out List <Proteoform> pfs2_same_lysine_count);
                        pf1.candidate_relatives = pfs2_same_lysine_count != null
                            ? pfs2_same_lysine_count.Where(pf2 => allowed_relation(pf1, pf2, relation_type)).ToList()
                            : new List <Proteoform>();
                    }
                    else if (Sweet.lollipop.neucode_labeled && relation_type == ProteoformComparison.ExperimentalFalse)
                    {
                        List <Proteoform> pfs2_lysines_outside_tolerance = pfs2_lysine_lookup
                                                                           .Where(kv => Math.Abs(pf1.lysine_count - kv.Key) > Sweet.lollipop.maximum_missed_lysines)
                                                                           .SelectMany(kv => kv.Value).ToList();
                        pf1.candidate_relatives = pfs2_lysines_outside_tolerance
                                                  .Where(pf2 => allowed_relation(pf1, pf2, relation_type)).ToList();
                    }
                    else if (!Sweet.lollipop.neucode_labeled)
                    {
                        pf1.candidate_relatives = pfs2.Where(pf2 => allowed_relation(pf1, pf2, relation_type)).ToList();
                    }

                    if (relation_type == ProteoformComparison.ExperimentalExperimental)
                    {
                        pf1.ptm_set = null;
                        pf1.linked_proteoform_references = null;
                        pf1.ambiguous_identifications.Clear();
                        pf1.gene_name = null;
                        //    pf1.relation_to_id = null;
                    }

                    if (relation_type == ProteoformComparison.ExperimentalTheoretical ||
                        relation_type == ProteoformComparison.ExperimentalDecoy)
                    {
                        if (limit_et_relations)
                        {
                            ProteoformRelation best_relation = pf1.candidate_relatives
                                                               .Select(pf2 => new ProteoformRelation(pf1, pf2, relation_type,
                                                                                                     pf1.modified_mass - pf2.modified_mass, current_directory))
                                                               .Where(r => r.candidate_ptmset != null && topdown_bottomup_comparison(pf1, r.connected_proteoforms[1] as TheoreticalProteoform)
                                                                      ) // don't consider unassignable relations for ET
                                                               .OrderBy(r =>
                                                                        r.candidate_ptmset.ptm_rank_sum +
                                                                        Math.Abs(Math.Abs(r.candidate_ptmset.mass) - Math.Abs(r.DeltaMass)) *
                                                                        10E-6) // get the best explanation for the experimental observation
                                                               .FirstOrDefault();

                            pf1.candidate_relatives = best_relation != null
                                ? new List <Proteoform> {
                                best_relation.connected_proteoforms[1]
                            }
                                : new List <Proteoform>();
                        }
                        else //candidate relatives will be best T from each gene (won't get -42, etc)
                        {
                            List <ProteoformRelation> best_relatives_for_each_gene_name = new List <ProteoformRelation>();
                            var gene_names = pf1.candidate_relatives.Select(r =>
                                                                            (r as TheoreticalProteoform).gene_name.get_prefered_name(Lollipop
                                                                                                                                     .preferred_gene_label))
                                             .Distinct();
                            foreach (var gene_name in gene_names)
                            {
                                best_relatives_for_each_gene_name.Add(pf1.candidate_relatives
                                                                      .Where(p =>
                                                                             (p as TheoreticalProteoform).gene_name.get_prefered_name(
                                                                                 Lollipop.preferred_gene_label) == gene_name)
                                                                      .Select(pf2 => new ProteoformRelation(pf1, pf2, relation_type,
                                                                                                            pf1.modified_mass - pf2.modified_mass, current_directory))
                                                                      .Where(r => r.candidate_ptmset !=
                                                                             null) // don't consider unassignable relations for ET
                                                                      .OrderBy(r =>
                                                                               r.candidate_ptmset.ptm_rank_sum +
                                                                               Math.Abs(Math.Abs(r.candidate_ptmset.mass) - Math.Abs(r.DeltaMass)) *
                                                                               10E-6) // get the best explanation for the experimental observation
                                                                      .FirstOrDefault());
                            }

                            pf1.candidate_relatives = best_relatives_for_each_gene_name != null ?
                                                      best_relatives_for_each_gene_name.Where(r => r != null && topdown_bottomup_comparison(pf1, r.connected_proteoforms[1] as TheoreticalProteoform)).Select(r => r.connected_proteoforms[1]).ToList() : new List <Proteoform>();
                        }
                    }
                }
            });

            List <ProteoformRelation> relations =
                (from pf1 in pfs1
                 from pf2 in pf1.candidate_relatives
                 select new ProteoformRelation(pf1, pf2, relation_type, pf1.modified_mass - pf2.modified_mass, current_directory)).ToList();

            if (relation_type == ProteoformComparison.ExperimentalExperimental ||
                relation_type == ProteoformComparison.ExperimentalFalse)
            {
                if (Sweet.lollipop.ee_use_notch)
                {
                    relations = relations.Where(r => r.candidate_ptmset != null).ToList();
                }
            }
            return(count_nearby_relations(relations));  //putative counts include no-mans land
        }
Example #8
0
        private void assign_pf_identity(ExperimentalProteoform e, Proteoform theoretical_reference, PtmSet set, ProteoformRelation r, int sign, PtmSet change)
        {
            if (r.represented_ptmset == null)
            {
                r.represented_ptmset = change;
                if (r.RelationType == ProteoformComparison.ExperimentalExperimental)
                {
                    r.DeltaMass *= sign;
                }
            }
            if (e.linked_proteoform_references == null)
            {
                e.linked_proteoform_references = new List <Proteoform>(this.linked_proteoform_references);
                e.linked_proteoform_references.Add(this);
                e.ptm_set = set;
            }

            if (e.gene_name == null)
            {
                e.gene_name = this.gene_name;
            }
            else
            {
                e.gene_name.gene_names.Concat(this.gene_name.gene_names);
            }
        }
Example #9
0
        public DeltaMassPeak(ProteoformRelation base_relation, HashSet <ProteoformRelation> relations_to_group)
        {
            lock (base_relation)
            {
                base_relation.peak = this;
            }

            lock (Sweet.lollipop)
            {
                IncrementInstanceCounter(); //Not thread safe
            }

            RelationType = base_relation.RelationType;
            DeltaMass    = base_relation.DeltaMass;
            InstanceId   = instance_counter;

            if (RelationType == ProteoformComparison.ExperimentalExperimental || !Sweet.lollipop.et_use_notch)
            {
                grouped_relations = find_nearby_relations(relations_to_group);

                bool are_positive_candidates =
                    Sweet.lollipop.theoretical_database.possible_ptmset_dictionary.TryGetValue(Math.Round(DeltaMass, 1),
                                                                                               out List <PtmSet> positive_candidates);
                bool are_negative_candidates =
                    Sweet.lollipop.theoretical_database.possible_ptmset_dictionary.TryGetValue(
                        Math.Round(-DeltaMass, 1), out List <PtmSet> negative_candidates) &&
                    (RelationType == ProteoformComparison.ExperimentalExperimental ||
                     RelationType == ProteoformComparison.ExperimentalFalse);

                if (are_positive_candidates || are_negative_candidates)
                {
                    List <PtmSet> candidates = (are_positive_candidates ? positive_candidates : new List <PtmSet>())
                                               .Concat(are_negative_candidates ? negative_candidates : new List <PtmSet>()).ToList();
                    possiblePeakAssignments = candidates.Where(c =>
                                                               RelationType == ProteoformComparison.ExperimentalTheoretical ||
                                                               RelationType == ProteoformComparison.ExperimentalDecoy
                            ? Math.Abs(DeltaMass - c.mass) <=
                                                               (grouped_relations.First().RelationType == ProteoformComparison.ExperimentalTheoretical
                                  ? Sweet.lollipop.peak_width_base_et
                                  : Sweet.lollipop.peak_width_base_ee)
                            : Math.Abs(Math.Abs(DeltaMass) - Math.Abs(c.mass)) <=
                                                               (grouped_relations.First().RelationType == ProteoformComparison.ExperimentalTheoretical
                                  ? Sweet.lollipop.peak_width_base_et
                                  : Sweet.lollipop.peak_width_base_ee)).ToList();
                }
                else
                {
                    possiblePeakAssignments = new List <PtmSet>();
                }
            }
            else
            {
                grouped_relations       = relations_to_group.Where(r => Math.Round(r.candidate_ptmset.mass, 5) == Math.Round(base_relation.candidate_ptmset.mass, 5)).ToList();
                possiblePeakAssignments = grouped_relations.Select(r => r.candidate_ptmset).ToList();
                DeltaMass = possiblePeakAssignments.First().mass;
            }

            foreach (ProteoformRelation mass_difference in grouped_relations)
            {
                foreach (Proteoform p in mass_difference.connected_proteoforms)
                {
                    lock (p) p.relationships.Add(mass_difference);
                }
            }


            possiblePeakAssignments_string = "[" + string.Join("][", possiblePeakAssignments.OrderBy(p => p.ptm_rank_sum).Select(ptmset =>
                                                                                                                                 string.Join(";", ptmset.ptm_combination.Select(ptm =>
                                                                                                                                                                                UnlocalizedModification.LookUpId(ptm.modification)))).Distinct()) + "]";

            Accepted = grouped_relations != null && grouped_relations.Count > 0 && grouped_relations.First().RelationType == ProteoformComparison.ExperimentalTheoretical ?
                       (peak_relation_group_count >= Sweet.lollipop.min_peak_count_et)
                :
                       (peak_relation_group_count >= Sweet.lollipop.min_peak_count_ee && (!Sweet.lollipop.ee_accept_peaks_based_on_rank || possiblePeakAssignments.Count > 0 && possiblePeakAssignments.Any(p => p.ptm_rank_sum < Sweet.lollipop.mod_rank_first_quartile)));
        }
Example #10
0
        private bool assign_pf_identity(ExperimentalProteoform e, PtmSet set, int begin, int end, ProteoformRelation r, TheoreticalProteoform theoretical_base, List <Proteoform> linked_proteoform_references, bool check_ambiguous_IDs)
        {
            bool identification_assigned = false;

            if (!Sweet.lollipop.id_use_ppm_tolerance || Math.Abs(e.calculate_mass_error(theoretical_base, set, begin, end) * 1e6 / e.modified_mass) < Sweet.lollipop.id_ppm_tolerance)
            {
                int new_begin = begin;
                int new_end   = end;

                PtmSet     new_set = new PtmSet(new List <Ptm>(set.ptm_combination));
                List <Ptm> remove  = new List <Ptm>();
                //do retention of M first
                foreach (var mod in new_set.ptm_combination.Where(m => m.modification.ModificationType == "AminoAcid"))
                {
                    new_begin--;
                    remove.Add(mod);
                }

                foreach (var mod in new_set.ptm_combination.Where(m => m.modification.ModificationType == "Missing"))
                {
                    if (!new_set.ptm_combination.Any(m => m.modification.ModificationType == "AminoAcid") && begin >= theoretical_base.begin)
                    {
                        if (theoretical_base.sequence[begin - theoretical_base.begin].ToString() ==
                            mod.modification.Target.ToString())
                        {
                            new_begin++;
                            remove.Add(mod); //dont have in ptmset --> change the begin & end
                        }
                    }
                    if (!remove.Contains(mod) && theoretical_base.sequence[end - theoretical_base.begin].ToString() ==
                        mod.modification.Target.ToString())
                    {
                        new_end--;
                        remove.Add(mod);
                    }
                }

                foreach (var ptm in remove)
                {
                    new_set.ptm_combination.Remove(ptm);
                }

                new_set = new PtmSet(new_set.ptm_combination);

                if (e.linked_proteoform_references == null)
                {
                    identification_assigned = true;

                    if (linked_proteoform_references != null)
                    {
                        e.linked_proteoform_references = new List <Proteoform>(linked_proteoform_references);
                        e.linked_proteoform_references.Add(this);
                    }
                    else
                    {
                        e.linked_proteoform_references = new List <Proteoform>()
                        {
                            theoretical_base
                        };
                    }

                    e.relation_to_id = r;
                    e.ptm_set        = new_set;
                    e.begin          = new_begin;
                    e.end            = new_end;


                    if (e.gene_name == null)
                    {
                        e.gene_name = theoretical_base.gene_name;
                    }
                    else
                    {
                        e.gene_name.gene_names.Concat(this.gene_name.gene_names);
                    }
                }
                else
                {
                    if (linked_proteoform_references != null && !linked_proteoform_references.Contains(e))
                    {
                        bool different_id = e.gene_name.get_prefered_name(Lollipop.preferred_gene_label) !=
                                            theoretical_base.gene_name.get_prefered_name(Lollipop.preferred_gene_label) ||
                                            ExperimentalProteoform.get_sequence(e.linked_proteoform_references.First() as TheoreticalProteoform, e.begin, e.end)
                                            != ExperimentalProteoform.get_sequence(theoretical_base, new_begin, new_end) || !e.ptm_set.same_ptmset(new_set, true);


                        List <Modification> this_known_mods        = theoretical_base.ExpandedProteinList.SelectMany(p => p.OneBasedPossibleLocalizedModifications).SelectMany(kv => kv.Value).Where(v => v.MonoisotopicMass != 0).ToList();
                        List <Modification> previous_id_known_mods = (e.linked_proteoform_references.First() as TheoreticalProteoform).ExpandedProteinList.SelectMany(p => p.OneBasedPossibleLocalizedModifications).SelectMany(kv => kv.Value).Where(v => v.MonoisotopicMass != 0).ToList();
                        if (!Sweet.lollipop.topdown_theoretical_reduce_ambiguity || (theoretical_base.topdown_theoretical && !(e.linked_proteoform_references.First() as TheoreticalProteoform).topdown_theoretical))
                        {
                            if (!Sweet.lollipop.annotated_PTMs_reduce_ambiguity ||
                                (new_set.ptm_combination.All(mod1 => modification_is_adduct(mod1.modification) || this_known_mods.Select(mod2 => UnlocalizedModification.LookUpId(mod2)).Contains(UnlocalizedModification.LookUpId(mod1.modification))) &&
                                 !e.ptm_set.ptm_combination.All(mod1 => modification_is_adduct(mod1.modification) || previous_id_known_mods.Select(mod2 => UnlocalizedModification.LookUpId(mod2)).Contains(UnlocalizedModification.LookUpId(mod1.modification)))))
                            {
                                if (Sweet.lollipop.topdown_theoretical_reduce_ambiguity || Sweet.lollipop.annotated_PTMs_reduce_ambiguity)
                                {
                                    if (Sweet.lollipop.remove_bad_connections && different_id) //&& e.relation_to_id != r)
                                    {
                                        e.relation_to_id.Identification     = false;
                                        e.relation_to_id.represented_ptmset = null;
                                    }
                                    e.linked_proteoform_references = null;
                                    e.ptm_set   = new PtmSet(new List <Ptm>());
                                    e.begin     = 0;
                                    e.end       = 0;
                                    e.gene_name = null;
                                    e.ambiguous_identifications.Clear();
                                    ProteoformRelation relation = null;
                                    e.relation_to_id = relation;

                                    //reassign the topdown - based ID
                                    return(this.assign_pf_identity(e, set, begin, end, r, theoretical_base, linked_proteoform_references, true));
                                }
                            }
                        }

                        if (Sweet.lollipop.topdown_theoretical_reduce_ambiguity && (e.linked_proteoform_references.First() as TheoreticalProteoform).topdown_theoretical && !theoretical_base.topdown_theoretical)
                        {
                        }
                        else if (Sweet.lollipop.annotated_PTMs_reduce_ambiguity &&
                                 !new_set.ptm_combination.All(mod1 => modification_is_adduct(mod1.modification) || this_known_mods.Select(mod2 => UnlocalizedModification.LookUpId(mod2)).Contains(UnlocalizedModification.LookUpId(mod1.modification))) &&
                                 e.ptm_set.ptm_combination.All(mod1 => modification_is_adduct(mod1.modification) || previous_id_known_mods.Select(mod2 => UnlocalizedModification.LookUpId(mod2)).Contains(UnlocalizedModification.LookUpId(mod1.modification))))
                        {
                        }
                        else
                        {
                            if (different_id)
                            {
                                var new_linked_proteoform_references = new List <Proteoform>(linked_proteoform_references);
                                new_linked_proteoform_references.Add(this);

                                AmbiguousIdentification new_id =
                                    new AmbiguousIdentification(new_begin, new_end, new_set, r, theoretical_base, new_linked_proteoform_references);
                                lock (e.ambiguous_identifications)
                                {
                                    if (!e.ambiguous_identifications.Any(p =>
                                                                         p.theoretical_base.gene_name.primary ==
                                                                         new_id.theoretical_base.gene_name.primary &&
                                                                         ExperimentalProteoform.get_sequence(p.theoretical_base, p.begin, p.end) == ExperimentalProteoform.get_sequence(new_id.theoretical_base, new_id.begin, new_id.end) &&
                                                                         p.ptm_set.same_ptmset(new_id.ptm_set, true)))
                                    {
                                        e.ambiguous_identifications.Add(new_id);
                                        identification_assigned = true;
                                    }
                                }
                            }
                        }
                    }
                }
            }


            if (check_ambiguous_IDs)
            {
                //remove bad relations if using td to reduce ambiguity
                if (identification_assigned)
                {
                    List <AmbiguousIdentification> to_remove = new List <AmbiguousIdentification>();
                    List <Modification>            previous_id_known_mods = (e.linked_proteoform_references.First() as TheoreticalProteoform).ExpandedProteinList.SelectMany(p => p.OneBasedPossibleLocalizedModifications).SelectMany(kv => kv.Value).Where(m => m.MonoisotopicMass != 0).ToList();
                    if (theoretical_base.topdown_theoretical && Sweet.lollipop.topdown_theoretical_reduce_ambiguity)
                    {
                        to_remove.AddRange(e.ambiguous_identifications.Where(id => !id.theoretical_base.topdown_theoretical));
                    }
                    if (Sweet.lollipop.annotated_PTMs_reduce_ambiguity &&
                        e.ptm_set.ptm_combination.All(mod1 => modification_is_adduct(mod1.modification) || previous_id_known_mods.Select(mod2 => UnlocalizedModification.LookUpId(mod2)).Contains(UnlocalizedModification.LookUpId(mod1.modification))))
                    {
                        foreach (var ambiguous_id in e.ambiguous_identifications)
                        {
                            List <Modification> ambiguous_id_known_mods = ambiguous_id.theoretical_base.ExpandedProteinList.SelectMany(p => p.OneBasedPossibleLocalizedModifications).SelectMany(kv => kv.Value).Where(m => m.MonoisotopicMass != 0).ToList();
                            if (ambiguous_id.ptm_set.ptm_combination.Any(mod1 => !modification_is_adduct(mod1.modification) && !ambiguous_id_known_mods.Select(mod2 => UnlocalizedModification.LookUpId(mod2)).Contains(UnlocalizedModification.LookUpId(mod1.modification))))
                            {
                                to_remove.Add(ambiguous_id);
                            }
                        }
                    }
                    foreach (var x in to_remove)
                    {
                        if (e.ambiguous_identifications.Contains(x))
                        {
                            e.ambiguous_identifications.Remove(x);
                            if (Sweet.lollipop.remove_bad_connections)
                            {
                                if (e.relation_to_id != x.relation)
                                {
                                    x.relation.Identification     = false;
                                    x.relation.represented_ptmset = null;
                                }
                            }
                        }
                    }
                    foreach (var x in e.ambiguous_identifications)
                    {
                        x.relation.Identification = true;
                    }
                }


                if (this as ExperimentalProteoform != null && (this as ExperimentalProteoform).ambiguous_identifications.Count > 0)
                {
                    lock ((this as ExperimentalProteoform).ambiguous_identifications)
                    {
                        int      count       = (this as ExperimentalProteoform).ambiguous_identifications.Count;
                        PtmSet[] new_ptm_set = new PtmSet[count];
                        Parallel.For(0, count, i =>
                        {
                            var id         = (this as ExperimentalProteoform).ambiguous_identifications[i];
                            new_ptm_set[i] = determine_mod_change(e, this, id.theoretical_base, r, id.ptm_set, id.begin, id.end);
                        });
                        for (int i = 0; i < count; i++)
                        {
                            if (new_ptm_set[i] != null)
                            {
                                var id = (this as ExperimentalProteoform).ambiguous_identifications[i];
                                if (assign_pf_identity(e, new_ptm_set[i], id.begin, id.end, r, id.theoretical_base, id.linked_proteoform_references, false))
                                {
                                    identification_assigned = true;
                                }
                            }
                        }
                    }
                }
            }
            return(identification_assigned);
        }
Example #11
0
        private static PtmSet determine_mod_change(ExperimentalProteoform e, Proteoform p,
                                                   TheoreticalProteoform theoretical_base, ProteoformRelation r, PtmSet this_ptmset, int begin, int end)
        {
            double mass_tolerance = p.modified_mass / 1000000 * Sweet.lollipop.mass_tolerance;
            int    sign           = Math.Sign(e.modified_mass - p.modified_mass);
            double deltaM         =
                Math.Sign(r.peak.DeltaMass) < 0
                    ? r.peak.DeltaMass
                    : sign * r.peak
                .DeltaMass;           // give EE relations the correct sign, but don't switch negative ET relation deltaM's


            List <PtmSet> possible_additions = r.peak.possiblePeakAssignments
                                               .Where(peak => Math.Abs(peak.mass - deltaM) <= 1)
                                               .ToList(); // EE relations have PtmSets around both positive and negative deltaM, so remove the ones around the opposite of the deltaM of interest

            PtmSet best_addition = generate_possible_added_ptmsets(possible_additions,
                                                                   Sweet.lollipop.theoretical_database.all_mods_with_mass, theoretical_base, begin, end,
                                                                   this_ptmset, 1, true)
                                   .OrderBy(x =>
                                            (double)x.ptm_rank_sum +
                                            Math.Abs(x.mass - deltaM) *
                                            10E-6) // major score: delta rank; tie breaker: deltaM, where it's always less than 1
                                   .FirstOrDefault();


            List <PtmSet> best_losses = new List <PtmSet>();

            foreach (PtmSet set in r.peak.possiblePeakAssignments)
            //Parallel.ForEach(Sweet.lollipop.theoretical_database.all_possible_ptmsets,  set =>
            {
                bool within_loss_tolerance = deltaM >= -set.mass - mass_tolerance && deltaM <= -set.mass + mass_tolerance;
                if (within_loss_tolerance)
                {
                    List <Modification> these_mods = this_ptmset.ptm_combination.Select(ptm => ptm.modification).ToList();
                    List <Modification> those_mods = set.ptm_combination.Select(ptm => ptm.modification).ToList(); // all must be in the current set to remove them
                    bool can_be_removed            = those_mods.All(m1 => these_mods.Count(m2 =>
                                                                                           UnlocalizedModification.LookUpId(m2) ==
                                                                                           UnlocalizedModification.LookUpId(m1)) >=
                                                                    those_mods.Count(m2 =>
                                                                                     UnlocalizedModification.LookUpId(m2) ==
                                                                                     UnlocalizedModification.LookUpId(m1)));
                    lock (best_losses)
                    {
                        if (can_be_removed && within_loss_tolerance)
                        {
                            best_losses.Add(set);
                        }
                    }
                }
            } //);

            PtmSet best_loss = best_losses.OrderBy(s => Math.Abs(deltaM - (-s.mass))).FirstOrDefault();

            if (best_addition == null && best_loss == null)
            {
                return(null);
            }


            // Make the new ptmset with ptms removed or added
            PtmSet with_mod_change = null;

            if (best_loss == null)
            {
                with_mod_change = new PtmSet(new List <Ptm>(this_ptmset.ptm_combination
                                                            .Concat(best_addition.ptm_combination).Where(ptm => ptm.modification.MonoisotopicMass != 0)
                                                            .ToList()));
            }
            else
            {
                List <Ptm> new_combo = new List <Ptm>(this_ptmset.ptm_combination);
                foreach (Ptm ptm in best_loss.ptm_combination)
                {
                    new_combo.Remove(new_combo.FirstOrDefault(asdf => UnlocalizedModification.LookUpId(asdf.modification) == UnlocalizedModification.LookUpId(ptm.modification)));
                }
                with_mod_change = new PtmSet(new_combo);
            }


            if (r.represented_ptmset == null)
            {
                r.represented_ptmset = best_loss == null ? best_addition : best_loss;
                //if (r.RelationType == ProteoformComparison.ExperimentalExperimental)
                //{
                //    r.DeltaMass *= sign;
                //}
            }

            return(with_mod_change);
        }
Example #12
0
        private void assign_pf_identity(ExperimentalProteoform e, PtmSet set, ProteoformRelation r, TheoreticalProteoform theoretical_base)
        {
            if (e.linked_proteoform_references == null)
            {
                e.linked_proteoform_references = new List <Proteoform>(this.linked_proteoform_references);
                e.linked_proteoform_references.Add(this);
                e.ptm_set = set;
                e.begin   = this.begin;
                e.end     = this.end;
                List <Ptm> remove = new List <Ptm>();

                //do retention of M first
                foreach (var mod in set.ptm_combination.Where(m => m.modification.ModificationType == "AminoAcid"))
                {
                    e.begin--;
                    remove.Add(mod);
                }
                foreach (var mod in set.ptm_combination.Where(m => m.modification.ModificationType == "Missing"))
                {
                    if (theoretical_base.sequence[this.begin - theoretical_base.begin].ToString() == mod.modification.Target.ToString())
                    {
                        e.begin++;
                        remove.Add(mod); //dont have in ptmset --> change the begin & end
                    }
                    else if (theoretical_base.sequence[this.end - this.begin].ToString() == mod.modification.Target.ToString())
                    {
                        e.end--;
                        remove.Add(mod);
                    }
                }
                foreach (var ptm in remove)
                {
                    e.ptm_set.ptm_combination.Remove(ptm);
                }
                e.ptm_set = new PtmSet(e.ptm_set.ptm_combination);

                if (e.gene_name == null)
                {
                    e.gene_name = this.gene_name;
                }
                else if (!e.topdown_id)
                {
                    e.gene_name.gene_names.Concat(this.gene_name.gene_names);
                }
            }
            else
            {
                //check if assign
                int        begin   = this.begin;
                int        end     = this.end;
                PtmSet     ptm_set = set;
                List <Ptm> remove  = new List <Ptm>();
                //do retention of M first
                foreach (var mod in set.ptm_combination.Where(m => m.modification.ModificationType == "AminoAcid"))
                {
                    begin--;
                    remove.Add(mod);
                }

                foreach (var mod in set.ptm_combination.Where(m => m.modification.ModificationType == "Missing"))
                {
                    if (theoretical_base.sequence[this.begin - theoretical_base.begin].ToString() ==
                        mod.modification.Target.ToString())
                    {
                        begin++;
                        remove.Add(mod); //dont have in ptmset --> change the begin & end
                    }
                    else if (theoretical_base.sequence[this.end - this.begin].ToString() ==
                             mod.modification.Target.ToString())
                    {
                        end--;
                        remove.Add(mod);
                    }
                }

                foreach (var ptm in remove)
                {
                    ptm_set.ptm_combination.Remove(ptm);
                }

                ptm_set = new PtmSet(ptm_set.ptm_combination);

                if (e.gene_name.get_prefered_name(Lollipop.preferred_gene_label) !=
                    this.gene_name.get_prefered_name(Lollipop.preferred_gene_label) ||
                    e.begin != begin || e.end != end || !e.ptm_set.same_ptmset(ptm_set, true))
                {
                    e.ambiguous = true;
                    Proteoform linked_proteoform_reference =
                        this.linked_proteoform_references == null || this.linked_proteoform_references.Count == 0
                            ? this
                            : this.linked_proteoform_references.First();
                    Tuple <Proteoform, int, int, PtmSet> new_id =
                        new Tuple <Proteoform, int, int, PtmSet>(linked_proteoform_reference, begin, end, ptm_set);
                    lock (e.ambiguous_identifications)
                    {
                        if (!e.ambiguous_identifications.Any(p =>
                                                             p.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) ==
                                                             new_id.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) &&
                                                             p.Item2 == new_id.Item2 && p.Item3 == new_id.Item3 &&
                                                             p.Item4.same_ptmset(new_id.Item4, true)))
                        {
                            e.ambiguous_identifications.Add(new_id);
                        }
                    }
                }
            }

            if (this as ExperimentalProteoform != null && (this as ExperimentalProteoform).ambiguous)
            {
                foreach (var id in this.ambiguous_identifications)
                {
                    TheoreticalProteoform id_theoretical_base = id.Item1 as TheoreticalProteoform;
                    int begin  = id.Item2;
                    int end    = id.Item3;
                    var remove = new List <Ptm>();

                    var ptm_set = determine_mod_change(e, this, id_theoretical_base, r, id.Item4);
                    if (ptm_set == null)
                    {
                        continue;
                    }
                    //do retention of M first
                    foreach (var mod in ptm_set.ptm_combination.Where(m => m.modification.ModificationType == "AminoAcid"))
                    {
                        begin--;
                        remove.Add(mod);
                    }
                    foreach (var mod in ptm_set.ptm_combination.Where(m => m.modification.ModificationType == "Missing"))
                    {
                        if (id_theoretical_base.sequence[id.Item2 - id.Item1.begin].ToString() == mod.modification.Target.ToString())
                        {
                            begin++;
                            remove.Add(mod); //dont have in ptmset --> change the begin & end
                        }
                        else if (id_theoretical_base.sequence[id.Item3 - id.Item2].ToString() == mod.modification.Target.ToString())
                        {
                            end--;
                            remove.Add(mod);
                        }
                    }
                    foreach (var ptm in remove)
                    {
                        ptm_set.ptm_combination.Remove(ptm);
                    }
                    ptm_set = new PtmSet(ptm_set.ptm_combination);
                    lock (e.ambiguous_identifications)
                    {
                        var new_id = new Tuple <Proteoform, int, int, PtmSet>(id.Item1, begin, end, ptm_set);
                        if ((e.gene_name.get_prefered_name(Lollipop.preferred_gene_label) !=
                             new_id.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) ||
                             e.begin != new_id.Item2 || e.end != new_id.Item3 || !e.ptm_set.same_ptmset(new_id.Item4, true)) &&
                            !e.ambiguous_identifications.Any(p =>
                                                             p.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) ==
                                                             new_id.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) &&
                                                             p.Item2 == new_id.Item2 && p.Item3 == new_id.Item3 &&
                                                             p.Item4.same_ptmset(new_id.Item4, true)))
                        {
                            e.ambiguous_identifications.Add(new_id);
                            e.ambiguous = true;
                        }
                    }
                }
            }

            e.uniprot_mods = "";
            foreach (string mod in e.ptm_set.ptm_combination.Concat(e.ambiguous_identifications.SelectMany(i => i.Item4.ptm_combination)).Where(ptm => ptm.modification.ModificationType != "Deconvolution Error").Select(ptm => UnlocalizedModification.LookUpId(ptm.modification)).ToList().Distinct().OrderBy(m => m))
            {
                // positions with mod
                List <int> theo_ptms = theoretical_base.ExpandedProteinList.First()
                                       .OneBasedPossibleLocalizedModifications
                                       .Where(p => p.Key >= e.begin && p.Key <= e.end &&
                                              p.Value.Select(m => UnlocalizedModification.LookUpId(m)).Contains(mod))
                                       .Select(m => m.Key).ToList();
                if (theo_ptms.Count > 0)
                {
                    e.uniprot_mods += mod + " @ " + string.Join(", ", theo_ptms) + "; ";
                }
                if (e.ptm_set.ptm_combination.Select(ptm => UnlocalizedModification.LookUpId(ptm.modification))
                    .Count(m => m == mod) > theo_ptms.Count ||
                    e.ambiguous_identifications.Any(i => i.Item4.ptm_combination.Select(ptm => UnlocalizedModification.LookUpId(ptm.modification))
                                                    .Count(m => m == mod) > theo_ptms.Count))
                {
                    e.novel_mods = true;
                }
            }

            //else if (!e.topdown_id && e.gene_name.get_prefered_name(Lollipop.preferred_gene_label) != this.gene_name.get_prefered_name(Lollipop.preferred_gene_label)
            // && e.linked_proteoform_references.Count == this.linked_proteoform_references.Count + 1)
            //{
            //    e.ambiguous = true;
            //}
        }