public bool allowed_relation(Proteoform pf1, Proteoform pf2_with_allowed_lysines, ProteoformComparison relation_type)
 {
     if (relation_type == ProteoformComparison.ExperimentalTheoretical || relation_type == ProteoformComparison.ExperimentalDecoy)
     {
         return
             ((pf1.modified_mass - pf2_with_allowed_lysines.modified_mass) >=
              Sweet.lollipop.et_low_mass_difference &&
              (pf1.modified_mass - pf2_with_allowed_lysines.modified_mass) <=
              Sweet.lollipop.et_high_mass_difference &&
              (Sweet.lollipop.add_td_theoreticals || !(pf2_with_allowed_lysines as TheoreticalProteoform).new_topdown_proteoform));
     }
     else if (relation_type == ProteoformComparison.ExperimentalExperimental)
     {
         return
             (pf1 != pf2_with_allowed_lysines &&
              pf1.modified_mass >= pf2_with_allowed_lysines.modified_mass &&
              pf1 != pf2_with_allowed_lysines &&
              pf1.modified_mass - pf2_with_allowed_lysines.modified_mass <= Sweet.lollipop.ee_max_mass_difference &&
              Math.Abs((pf1 as ExperimentalProteoform).agg_rt - (pf2_with_allowed_lysines as ExperimentalProteoform).agg_rt) <= Sweet.lollipop.ee_max_RetentionTime_difference);
     }
     else if (relation_type == ProteoformComparison.ExperimentalFalse)
     {
         //going to hard code in 10 minutes as min RT for 2 to not be related.
         return
             (pf1.modified_mass >= pf2_with_allowed_lysines.modified_mass &&
              pf1 != pf2_with_allowed_lysines &&
              (pf1.modified_mass - pf2_with_allowed_lysines.modified_mass <= Sweet.lollipop.ee_max_mass_difference) &&
              (Sweet.lollipop.neucode_labeled || Math.Abs((pf1 as ExperimentalProteoform).agg_rt - (pf2_with_allowed_lysines as ExperimentalProteoform).agg_rt) > 10) &&
              (!Sweet.lollipop.neucode_labeled || Math.Abs((pf1 as ExperimentalProteoform).agg_rt - (pf2_with_allowed_lysines as ExperimentalProteoform).agg_rt) < Sweet.lollipop.ee_max_RetentionTime_difference));
     }
     else
     {
         return(false);
     }
 }
Exemple #2
0
        public bool allowed_relation(Proteoform pf1, Proteoform pf2, ProteoformComparison relation_type)
        {
            switch (relation_type)
            {
            case (ProteoformComparison.ExperimentalTheoretical):
            case (ProteoformComparison.ExperimentalDecoy):
                return((!SaveState.lollipop.neucode_labeled || pf2.lysine_count == pf1.lysine_count) &&
                       (pf1.modified_mass - pf2.modified_mass) >= SaveState.lollipop.et_low_mass_difference &&
                       (pf1.modified_mass - pf2.modified_mass) <= SaveState.lollipop.et_high_mass_difference &&
                       (pf2.ptm_set.ptm_combination.Count < 3 || pf2.ptm_set.ptm_combination.Select(ptm => ptm.modification.monoisotopicMass).All(x => x == pf2.ptm_set.ptm_combination.First().modification.monoisotopicMass)));

            case (ProteoformComparison.ExperimentalExperimental):
                return(pf1.modified_mass >= pf2.modified_mass &&
                       pf1 != pf2 &&
                       (!SaveState.lollipop.neucode_labeled || pf1.lysine_count == pf2.lysine_count) &&
                       pf1.modified_mass - pf2.modified_mass <= SaveState.lollipop.ee_max_mass_difference &&
                       Math.Abs(((ExperimentalProteoform)pf1).agg_rt - ((ExperimentalProteoform)pf2).agg_rt) <= SaveState.lollipop.ee_max_RetentionTime_difference);

            case (ProteoformComparison.ExperimentalFalse):
                return(pf1.modified_mass >= pf2.modified_mass &&
                       pf1 != pf2 &&
                       (pf1.modified_mass - pf2.modified_mass <= SaveState.lollipop.ee_max_mass_difference) &&
                       (!SaveState.lollipop.neucode_labeled || Math.Abs(pf1.lysine_count - pf2.lysine_count) > SaveState.lollipop.missed_lysines) &&
                       (SaveState.lollipop.neucode_labeled || Math.Abs(((ExperimentalProteoform)pf1).agg_rt - ((ExperimentalProteoform)pf2).agg_rt) > SaveState.lollipop.ee_max_RetentionTime_difference * 2) &&
                       (!SaveState.lollipop.neucode_labeled || Math.Abs(((ExperimentalProteoform)pf1).agg_rt - ((ExperimentalProteoform)pf2).agg_rt) < SaveState.lollipop.ee_max_RetentionTime_difference));

            default:
                return(false);
            }
        }
        private void assign_pf_identity(ExperimentalProteoform e, Proteoform theoretical_reference, PtmSet set, ProteoformRelation r, int sign, PtmSet change)
        {
            if (r.represented_ptmset == null)
            {
                r.represented_ptmset = change;
                if (r.RelationType == ProteoformComparison.ExperimentalExperimental)
                {
                    r.DeltaMass *= sign;
                }
            }
            if (e.linked_proteoform_references == null)
            {
                e.linked_proteoform_references = new List <Proteoform>(this.linked_proteoform_references);
                e.linked_proteoform_references.Add(this);
                e.ptm_set = set;
            }

            if (e.gene_name == null)
            {
                e.gene_name = this.gene_name;
            }
            else
            {
                e.gene_name.gene_names.Concat(this.gene_name.gene_names);
            }
        }
Exemple #4
0
 public bool allowed_relation(Proteoform pf1, Proteoform pf2_with_allowed_lysines, ProteoformComparison relation_type)
 {
     if (relation_type == ProteoformComparison.ExperimentalTheoretical || relation_type == ProteoformComparison.ExperimentalDecoy)
     {
         return
             ((pf1.modified_mass - pf2_with_allowed_lysines.modified_mass) >= Sweet.lollipop.et_low_mass_difference &&
              (pf1.modified_mass - pf2_with_allowed_lysines.modified_mass) <= Sweet.lollipop.et_high_mass_difference);
     }
     else if (relation_type == ProteoformComparison.ExperimentalExperimental)
     {
         return
             (pf1 != pf2_with_allowed_lysines &&
              pf1.modified_mass >= pf2_with_allowed_lysines.modified_mass &&
              pf1 != pf2_with_allowed_lysines &&
              pf1.modified_mass - pf2_with_allowed_lysines.modified_mass <= Sweet.lollipop.ee_max_mass_difference &&
              Math.Abs((pf1 as ExperimentalProteoform).agg_rt - (pf2_with_allowed_lysines as ExperimentalProteoform).agg_rt) <= Sweet.lollipop.ee_max_RetentionTime_difference);
     }
     else if (relation_type == ProteoformComparison.ExperimentalFalse)
     {
         return
             (pf1.modified_mass >= pf2_with_allowed_lysines.modified_mass &&
              pf1 != pf2_with_allowed_lysines &&
              (pf1.modified_mass - pf2_with_allowed_lysines.modified_mass <= Sweet.lollipop.ee_max_mass_difference) &&
              (Sweet.lollipop.neucode_labeled || Math.Abs((pf1 as ExperimentalProteoform).agg_rt - (pf2_with_allowed_lysines as ExperimentalProteoform).agg_rt) > Sweet.lollipop.ee_max_RetentionTime_difference * 2) &&
              (!Sweet.lollipop.neucode_labeled || Math.Abs((pf1 as ExperimentalProteoform).agg_rt - (pf2_with_allowed_lysines as ExperimentalProteoform).agg_rt) < Sweet.lollipop.ee_max_RetentionTime_difference));
     }
     else
     {
         return(false);
     }
 }
Exemple #5
0
        public static string get_proteoform_shared_name(Proteoform p, string node_label, int double_rounding)
        {
            if (p as ExperimentalProteoform != null)
            {
                ExperimentalProteoform e = p as ExperimentalProteoform;
                string name = Math.Round(e.agg_mass, double_rounding) + "_Da_" + e.accession;
                if (node_label == Lollipop.node_labels[1] && e.linked_proteoform_references != null && e.linked_proteoform_references.Count > 0)
                {
                    name += " " + (e.linked_proteoform_references.First() as TheoreticalProteoform).accession
                            + " " + (e.ptm_set.ptm_combination.Count == 0 ?
                                     "Unmodified" :
                                     String.Join("; ", e.ptm_set.ptm_combination.Select(ptm => SaveState.lollipop.theoretical_database.unlocalized_lookup[ptm.modification].id)));
                }
                return(name);
            }

            else if (p as TheoreticalProteoform != null)
            {
                return(p.accession + " " + p.ptm_description);
            }

            else
            {
                return(p.accession);
            }
        }
        private static PtmSet determine_mod_change(ExperimentalProteoform e, Proteoform p, TheoreticalProteoform theoretical_base, ProteoformRelation r, PtmSet this_ptmset)
        {
            double mass_tolerance = p.modified_mass / 1000000 * Sweet.lollipop.mass_tolerance;
            int    sign           = Math.Sign(e.modified_mass - p.modified_mass);
            double deltaM         = Math.Sign(r.peak.DeltaMass) < 0 ? r.peak.DeltaMass : sign * r.peak.DeltaMass; // give EE relations the correct sign, but don't switch negative ET relation deltaM's


            List <PtmSet> possible_additions = r.peak.possiblePeakAssignments.Where(peak => Math.Abs(peak.mass - deltaM) <= 1).ToList(); // EE relations have PtmSets around both positive and negative deltaM, so remove the ones around the opposite of the deltaM of interest
            PtmSet        best_addition      = generate_possible_added_ptmsets(possible_additions, Sweet.lollipop.theoretical_database.all_mods_with_mass, theoretical_base, p.begin, p.end, p.ptm_set, 1, true)
                                               .OrderBy(x => (double)x.ptm_rank_sum + Math.Abs(x.mass - deltaM) * 10E-6)                 // major score: delta rank; tie breaker: deltaM, where it's always less than 1
                                               .FirstOrDefault();

            PtmSet best_loss = null;

            foreach (PtmSet set in Sweet.lollipop.theoretical_database.all_possible_ptmsets)
            {
                bool within_loss_tolerance         = deltaM >= -set.mass - mass_tolerance && deltaM <= -set.mass + mass_tolerance;
                List <Modification> these_mods     = this_ptmset.ptm_combination.Select(ptm => ptm.modification).ToList();
                List <Modification> those_mods     = set.ptm_combination.Select(ptm => ptm.modification).ToList();                                                                           // all must be in the current set to remove them
                bool can_be_removed                = those_mods.All(m1 => these_mods.Count(m2 => m2.OriginalId == m1.OriginalId) >= those_mods.Count(m2 => m2.OriginalId == m1.OriginalId)); //# of each mod in current set must be greater than or equal to # in set to remove.
                bool better_than_current_best_loss = best_loss == null || Math.Abs(deltaM - (-set.mass)) < Math.Abs(deltaM - (-best_loss.mass));
                if (can_be_removed && within_loss_tolerance && better_than_current_best_loss)
                {
                    best_loss = set;
                }
            }

            if (best_addition == null && best_loss == null)
            {
                return(null);
            }

            // Make the new ptmset with ptms removed or added
            PtmSet with_mod_change = null;

            if (best_loss == null)
            {
                with_mod_change = new PtmSet(new List <Ptm>(this_ptmset.ptm_combination.Concat(best_addition.ptm_combination).Where(ptm => ptm.modification.MonoisotopicMass != 0).ToList()));
            }
            else
            {
                List <Ptm> new_combo = new List <Ptm>(this_ptmset.ptm_combination);
                foreach (Ptm ptm in best_loss.ptm_combination)
                {
                    new_combo.Remove(new_combo.FirstOrDefault(asdf => asdf.modification.Equals(ptm.modification)));
                }
                with_mod_change = new PtmSet(new_combo);
            }

            if (r.represented_ptmset == null)
            {
                r.represented_ptmset = best_loss == null ? best_addition : best_loss;
                if (r.RelationType == ProteoformComparison.ExperimentalExperimental)
                {
                    r.DeltaMass *= sign;
                }
            }

            return(with_mod_change);
        }
        public List <ProteoformFamily> construct_families()
        {
            ProteoformFamily.reset_family_counter();
            Stack <Proteoform>      remaining        = new Stack <Proteoform>(this.experimental_proteoforms.ToArray());
            List <ProteoformFamily> running_families = new List <ProteoformFamily>();
            List <Proteoform>       running          = new List <Proteoform>();
            List <Thread>           active           = new List <Thread>();

            while (remaining.Count > 0 || active.Count > 0)
            {
                while (remaining.Count > 0 && active.Count < Environment.ProcessorCount)
                {
                    Proteoform       root = remaining.Pop();
                    ProteoformFamily fam  = new ProteoformFamily(root);
                    Thread           t    = new Thread(new ThreadStart(fam.construct_family));
                    t.Start();
                    running_families.Add(fam);
                    running.Add(root);
                    active.Add(t);
                }

                foreach (Thread t in active)
                {
                    t.Join();
                }

                List <Proteoform> cumulative_proteoforms = new List <Proteoform>();
                foreach (ProteoformFamily family in running_families.ToList())
                {
                    if (cumulative_proteoforms.Contains(family.proteoforms.First()))
                    {
                        running_families.Remove(family); // check for duplicates due to arbitrary seed selection
                    }
                    else
                    {
                        cumulative_proteoforms.AddRange(family.proteoforms);
                        Parallel.ForEach(family.proteoforms, p => { lock (p) p.family = family; });
                    }
                }

                this.families.AddRange(running_families);
                remaining = new Stack <Proteoform>(remaining.Except(cumulative_proteoforms));

                running_families.Clear();
                running.Clear();
                active.Clear();
            }
            if (Sweet.lollipop.gene_centric_families)
            {
                families = combine_gene_families(families).ToList();
            }
            Sweet.lollipop.theoretical_database.aaIsotopeMassList = new AminoAcidMasses(Sweet.lollipop.carbamidomethylation, Sweet.lollipop.neucode_labeled).AA_Masses;
            Parallel.ForEach(families, f => f.identify_experimentals());
            return(families);
        }
Exemple #8
0
        public List <ProteoformFamily> construct_families()
        {
            Stack <Proteoform>      remaining        = new Stack <Proteoform>(this.experimental_proteoforms.Where(e => e.accepted).ToArray());
            List <ProteoformFamily> running_families = new List <ProteoformFamily>();
            List <Proteoform>       running          = new List <Proteoform>();
            List <Thread>           active           = new List <Thread>();

            while (remaining.Count > 0 || active.Count > 0)
            {
                while (remaining.Count > 0 && active.Count < Environment.ProcessorCount)
                {
                    Proteoform       root = remaining.Pop();
                    ProteoformFamily fam  = new ProteoformFamily(root);
                    Thread           t    = new Thread(new ThreadStart(fam.construct_family));
                    t.Start();
                    running_families.Add(fam);
                    running.Add(root);
                    active.Add(t);
                }

                foreach (Thread t in active)
                {
                    t.Join();
                }

                List <Proteoform> cumulative_proteoforms = new List <Proteoform>();
                foreach (ProteoformFamily family in running_families.ToList())
                {
                    if (cumulative_proteoforms.Contains(family.proteoforms.First()))
                    {
                        running_families.Remove(family); // check for duplicates due to arbitrary seed selection
                    }
                    else
                    {
                        cumulative_proteoforms.AddRange(family.proteoforms);
                        Parallel.ForEach(family.proteoforms, p => { lock (p) p.family = family; });
                    }
                }

                this.families.AddRange(running_families);
                remaining = new Stack <Proteoform>(remaining.Except(cumulative_proteoforms));

                running_families.Clear();
                running.Clear();
                active.Clear();
            }
            if (gene_centric_families)
            {
                families = combine_gene_families(families).ToList();
            }
            Parallel.ForEach(families, f => f.identify_experimentals());
            return(families);
        }
        public bool shift_experimental_masses(int shift, bool neucode_labeled)
        {
            if (RelationType != ProteoformComparison.ExperimentalTheoretical)
            {
                return(false); //Not currently intended for ee relations
            }
            foreach (ProteoformRelation r in this.grouped_relations)
            {
                Proteoform p = r.connected_proteoforms[0];
                if (p is ExperimentalProteoform && ((ExperimentalProteoform)p).mass_shifted == false && SaveState.lollipop.target_proteoform_community.experimental_proteoforms.Contains(p))
                {
                    ((ExperimentalProteoform)p).shift_masses(shift, neucode_labeled);
                }
            }

            return(true);
        }
Exemple #10
0
        public ProteoformRelation(Proteoform pf1, Proteoform pf2, ProteoformComparison relation_type, double delta_mass, string current_directory)
        {
            connected_proteoforms[0] = pf1;
            connected_proteoforms[1] = pf2;
            RelationType             = relation_type;
            DeltaMass  = delta_mass;
            InstanceId = instanceCounter;
            lock (SaveState.lollipop) instanceCounter += 1; //Not thread safe

            if (CH2 == null || HPO3 == null)
            {
                Loaders.LoadElements(Path.Combine(current_directory, "elements.dat"));
                CH2  = ChemicalFormula.ParseFormula("C1 H2");
                HPO3 = ChemicalFormula.ParseFormula("H1 O3 P1");
            }

            if (SaveState.lollipop.neucode_labeled)
            {
                lysine_count = pf1.lysine_count;
            }

            if ((relation_type == ProteoformComparison.ExperimentalTheoretical || relation_type == ProteoformComparison.ExperimentalDecoy) &&
                SaveState.lollipop.theoretical_database.possible_ptmset_dictionary.TryGetValue(Math.Round(delta_mass, 1), out List <PtmSet> candidate_sets) &&
                pf2 as TheoreticalProteoform != null)
            {
                TheoreticalProteoform t      = pf2 as TheoreticalProteoform;
                double        mass_tolerance = t.modified_mass / 1000000 * (double)SaveState.lollipop.mass_tolerance;
                List <PtmSet> narrower_range_of_candidates = candidate_sets.Where(s => Math.Abs(s.mass - delta_mass) < 0.05).ToList();
                candidate_ptmset = t.generate_possible_added_ptmsets(narrower_range_of_candidates, delta_mass, mass_tolerance, SaveState.lollipop.theoretical_database.all_mods_with_mass, t, t.sequence, SaveState.lollipop.mod_rank_first_quartile)
                                   .OrderBy(x => x.ptm_rank_sum + Math.Abs(Math.Abs(x.mass) - Math.Abs(delta_mass)) * 10E-6) // major score: delta rank; tie breaker: deltaM, where it's always less than 1
                                   .FirstOrDefault();
            }

            // Start the model (0 Da) at the mass defect of CH2 or HPO3 itself, allowing the peak width tolerance on either side
            double half_peak_width = RelationType == ProteoformComparison.ExperimentalTheoretical || RelationType == ProteoformComparison.ExperimentalDecoy ?
                                     SaveState.lollipop.peak_width_base_et / 2 :
                                     SaveState.lollipop.peak_width_base_ee / 2;
            double low_decimal_bound  = half_peak_width + ((CH2.MonoisotopicMass - Math.Truncate(CH2.MonoisotopicMass)) / CH2.MonoisotopicMass) * (Math.Abs(delta_mass) <= CH2.MonoisotopicMass ? CH2.MonoisotopicMass : Math.Abs(delta_mass));
            double high_decimal_bound = 1 - half_peak_width + ((HPO3.MonoisotopicMass - Math.Ceiling(HPO3.MonoisotopicMass)) / HPO3.MonoisotopicMass) * (Math.Abs(delta_mass) <= HPO3.MonoisotopicMass ? HPO3.MonoisotopicMass : Math.Abs(delta_mass));
            double delta_mass_decimal = Math.Abs(delta_mass - Math.Truncate(delta_mass));

            outside_no_mans_land = delta_mass_decimal <= low_decimal_bound || delta_mass_decimal >= high_decimal_bound ||
                                   high_decimal_bound <= low_decimal_bound;
        }
Exemple #11
0
 public ProteoformFamily(Proteoform seed)
 {
     family_counter++;
     this.family_id = family_counter;
     this.seed      = seed;
 }
Exemple #12
0
        private void get_uniprot_mods()
        {
            var mods = topdown_ptm_set.ptm_combination.Where(p => !Proteoform.modification_is_adduct(p.modification))
                       .Select(ptm => UnlocalizedModification.LookUpId(ptm.modification)).ToList().Distinct().OrderBy(m => m).ToList();

            topdown_uniprot_mods = "";
            string add = "";

            if (Sweet.lollipop.theoretical_database.theoreticals_by_accession.ContainsKey(Sweet.lollipop.target_proteoform_community.community_number))
            {
                Sweet.lollipop.theoretical_database.theoreticals_by_accession[Sweet.lollipop.target_proteoform_community.community_number].TryGetValue(accession.Split('_')[0].Split('-')[0], out var matching_theoretical);
                if (matching_theoretical != null)
                {
                    foreach (string mod in mods)
                    {
                        // positions with mod
                        List <int> theo_ptms = matching_theoretical.First().ExpandedProteinList.SelectMany(p => p
                                                                                                           .OneBasedPossibleLocalizedModifications)
                                               .Where(p => p.Key >= topdown_begin && p.Key <= topdown_end &&
                                                      p.Value.Select(m => UnlocalizedModification.LookUpId(m)).Contains(mod))
                                               .Select(m => m.Key).ToList();
                        if (theo_ptms.Count > 0)
                        {
                            add += mod + " @ " + string.Join(", ", theo_ptms) + "; ";
                        }
                        if (topdown_ptm_set.ptm_combination.Where(ptm => !Proteoform.modification_is_adduct(ptm.modification)).Select(ptm => UnlocalizedModification.LookUpId(ptm.modification))
                            .Count(m => m == mod) > theo_ptms.Count)
                        {
                            topdown_novel_mods = true;
                        }
                    }
                    topdown_uniprot_mods += add;
                    if (add.Length == 0)
                    {
                        topdown_uniprot_mods += "N/A";
                    }

                    foreach (var ambig_id in ambiguous_topdown_hits)
                    {
                        Sweet.lollipop.theoretical_database.theoreticals_by_accession[Sweet.lollipop.target_proteoform_community.community_number].TryGetValue(accession.Split('_')[0].Split('-')[0], out var matching_ambig_theoretical);
                        if (matching_ambig_theoretical != null)
                        {
                            var ambig_mods = ambig_id.ptm_list.Where(p => !Proteoform.modification_is_adduct(p.modification))
                                             .Select(ptm => UnlocalizedModification.LookUpId(ptm.modification)).ToList().Distinct().OrderBy(m => m).ToList();

                            topdown_uniprot_mods += " | ";
                            add = "";
                            foreach (var mod in ambig_mods)
                            {
                                // positions with mod
                                List <int> theo_ptms = matching_ambig_theoretical.First().ExpandedProteinList.SelectMany(p => p
                                                                                                                         .OneBasedPossibleLocalizedModifications)
                                                       .Where(p => p.Key >= ambig_id.begin && p.Key <= ambig_id.end &&
                                                              p.Value.Select(m => UnlocalizedModification.LookUpId(m)).Contains(mod))
                                                       .Select(m => m.Key).ToList();
                                if (theo_ptms.Count > 0)
                                {
                                    add += mod + " @ " + string.Join(", ", theo_ptms) + "; ";
                                }
                                if (ambig_id.ptm_list.Where(ptm => !Proteoform.modification_is_adduct(ptm.modification)).Select(ptm => UnlocalizedModification.LookUpId(ptm.modification))
                                    .Count(m => m == mod) > theo_ptms.Count)
                                {
                                    topdown_novel_mods = true;
                                }
                            }
                        }
                        topdown_uniprot_mods += add;
                        if (add.Length == 0)
                        {
                            topdown_uniprot_mods += "N/A";
                        }
                    }
                }
            }
        }
Exemple #13
0
        public List <ProteoformFamily> construct_families()
        {
            ProteoformFamily.reset_family_counter();
            Stack <Proteoform>      remaining        = new Stack <Proteoform>(this.experimental_proteoforms.Where(e => e.accepted).ToArray());
            List <ProteoformFamily> running_families = new List <ProteoformFamily>();
            List <Proteoform>       running          = new List <Proteoform>();
            List <Thread>           active           = new List <Thread>();

            while (remaining.Count > 0 || active.Count > 0)
            {
                while (remaining.Count > 0 && active.Count < Environment.ProcessorCount)
                {
                    Proteoform       root = remaining.Pop();
                    ProteoformFamily fam  = new ProteoformFamily(root);
                    Thread           t    = new Thread(new ThreadStart(fam.construct_family));
                    t.Start();
                    running_families.Add(fam);
                    running.Add(root);
                    active.Add(t);
                }

                foreach (Thread t in active)
                {
                    t.Join();
                }

                List <Proteoform> cumulative_proteoforms = new List <Proteoform>();
                foreach (ProteoformFamily family in running_families.ToList())
                {
                    if (cumulative_proteoforms.Contains(family.proteoforms.First()))
                    {
                        running_families.Remove(family); // check for duplicates due to arbitrary seed selection
                    }
                    else
                    {
                        cumulative_proteoforms.AddRange(family.proteoforms);
                        Parallel.ForEach(family.proteoforms, p => { lock (p) p.family = family; });
                    }
                }

                this.families.AddRange(running_families);
                remaining = new Stack <Proteoform>(remaining.Except(cumulative_proteoforms));

                running_families.Clear();
                running.Clear();
                active.Clear();
            }
            if (Lollipop.gene_centric_families)
            {
                families = combine_gene_families(families).ToList();
            }
            Sweet.lollipop.theoretical_database.aaIsotopeMassList = new AminoAcidMasses(Sweet.lollipop.carbamidomethylation, Sweet.lollipop.neucode_labeled).AA_Masses;
            Parallel.ForEach(families, f => f.identify_experimentals());
            //read in BU results if available, map to proteoforms.
            //Sweet.lollipop.BottomUpPSMList.Clear();
            //BottomUpReader.bottom_up_PTMs_not_in_dictionary.Clear();
            //foreach (InputFile file in Sweet.lollipop.input_files.Where(f => f.purpose == Purpose.BottomUp))
            //{
            //    Sweet.lollipop.BottomUpPSMList.AddRange(BottomUpReader.ReadBUFile(file.complete_path, theoreticals_by_accession.Values.ToList()));
            //}
            return(families);
        }
        public ProteoformRelation(Proteoform pf1, Proteoform pf2, ProteoformComparison relation_type, double delta_mass, string current_directory)
        {
            connected_proteoforms[0] = pf1;
            connected_proteoforms[1] = pf2;
            RelationType             = relation_type;
            DeltaMass  = delta_mass;
            InstanceId = instanceCounter;
            lock (Sweet.lollipop) instanceCounter += 1; //Not thread safe

            if (CH2 == null || HPO3 == null)
            {
                CH2  = ChemicalFormula.ParseFormula("C1 H2");
                HPO3 = ChemicalFormula.ParseFormula("H1 O3 P1");
            }

            if (Sweet.lollipop.neucode_labeled)
            {
                lysine_count = pf1.lysine_count;
            }


            List <PtmSet> candidate_sets = new List <PtmSet>();

            if (Sweet.lollipop.et_use_notch && (relation_type == ProteoformComparison.ExperimentalTheoretical || relation_type == ProteoformComparison.ExperimentalDecoy))
            {
                if (Sweet.lollipop.et_use_notch && !Sweet.lollipop.et_notch_ppm)
                {
                    double mass = delta_mass - Sweet.lollipop.notch_tolerance_et;
                    while (mass <= delta_mass + Sweet.lollipop.notch_tolerance_et)
                    {
                        Sweet.lollipop.theoretical_database.possible_ptmset_dictionary_notches.TryGetValue(
                            Math.Round(mass, 1), out List <PtmSet> candidates);
                        if (candidates != null)
                        {
                            candidate_sets.AddRange(candidates);
                        }

                        mass += 0.1;
                    }

                    candidate_sets = candidate_sets.Distinct().ToList();
                }
                else
                {
                    Sweet.lollipop.theoretical_database.possible_ptmset_dictionary_notches.TryGetValue(Math.Round(delta_mass, 1), out candidate_sets);
                }

                if (candidate_sets != null)
                {
                    candidate_sets = candidate_sets.Where(s => Sweet.lollipop.et_notch_ppm
                        ? Math.Abs(s.mass - delta_mass) * 1e6 / pf1.modified_mass <
                                                          Sweet.lollipop.notch_tolerance_et
                        : Math.Abs(s.mass - delta_mass) < Sweet.lollipop.notch_tolerance_et).ToList();
                    candidate_ptmset = candidate_sets.OrderBy(s => s.ptm_rank_sum).FirstOrDefault();
                }
            }

            else if (Sweet.lollipop.ee_use_notch &&
                     (relation_type == ProteoformComparison.ExperimentalExperimental ||
                      relation_type == ProteoformComparison.ExperimentalFalse))
            {
                if (Sweet.lollipop.ee_use_notch && !Sweet.lollipop.ee_notch_ppm)
                {
                    double mass = delta_mass - Sweet.lollipop.notch_tolerance_ee;
                    while (mass <= delta_mass + Sweet.lollipop.notch_tolerance_ee)
                    {
                        Sweet.lollipop.theoretical_database.possible_ptmset_dictionary_notches.TryGetValue(
                            Math.Round(mass, 1), out List <PtmSet> candidates);
                        if (candidates != null)
                        {
                            candidate_sets.AddRange(candidates);
                        }

                        mass += 0.1;
                    }

                    candidate_sets = candidate_sets.Distinct().ToList();
                }
                else
                {
                    Sweet.lollipop.theoretical_database.possible_ptmset_dictionary_notches.TryGetValue(Math.Round(delta_mass, 1), out candidate_sets);
                }

                if (candidate_sets != null)
                {
                    candidate_sets = candidate_sets.Where(s => Sweet.lollipop.ee_notch_ppm
                        ? Math.Abs(s.mass - delta_mass) * 1e6 / pf1.modified_mass <
                                                          Sweet.lollipop.notch_tolerance_ee
                        : Math.Abs(s.mass - delta_mass) < Sweet.lollipop.notch_tolerance_ee).ToList();
                    candidate_ptmset = candidate_sets.OrderBy(s => s.ptm_rank_sum).FirstOrDefault();
                }
            }
            else if
            (relation_type == ProteoformComparison.ExperimentalTheoretical ||
             relation_type == ProteoformComparison.ExperimentalDecoy)
            {
                if (Sweet.lollipop.peak_width_base_et > 0.09)
                {
                    double mass = delta_mass - Sweet.lollipop.peak_width_base_et;
                    while (mass <= delta_mass + Sweet.lollipop.peak_width_base_et)
                    {
                        Sweet.lollipop.theoretical_database.possible_ptmset_dictionary.TryGetValue(
                            Math.Round(mass, 1), out List <PtmSet> candidates);
                        if (candidates != null)
                        {
                            candidate_sets.AddRange(candidates);
                        }

                        mass += 0.1;
                    }

                    candidate_sets = candidate_sets.Distinct().ToList();
                }
                else
                {
                    Sweet.lollipop.theoretical_database.possible_ptmset_dictionary.TryGetValue(
                        Math.Round(delta_mass, 1), out candidate_sets);
                }

                if (pf2 as TheoreticalProteoform != null && candidate_sets != null && candidate_sets.Count > 0)
                {
                    List <PtmSet> narrower_range_of_candidates = new List <PtmSet>();
                    if (Sweet.lollipop.et_use_notch)
                    {
                        narrower_range_of_candidates = candidate_sets;
                    }
                    else
                    {
                        narrower_range_of_candidates = candidate_sets
                                                       .Where(s => Math.Abs(s.mass - delta_mass) < Sweet.lollipop.peak_width_base_et).ToList();
                    }

                    TheoreticalProteoform t = pf2 as TheoreticalProteoform;
                    candidate_ptmset = Proteoform.generate_possible_added_ptmsets(narrower_range_of_candidates,
                                                                                  Sweet.lollipop.theoretical_database.all_mods_with_mass, t, pf2.begin, pf2.end,
                                                                                  pf2.ptm_set,
                                                                                  Sweet.lollipop.mod_rank_first_quartile, false).OrderBy(x =>
                                                                                                                                         x.ptm_rank_sum +
                                                                                                                                         Math.Abs(Math.Abs(x.mass) - Math.Abs(delta_mass)) *
                                                                                                                                         10E-6) // major score: delta rank; tie breaker: deltaM, where it's always less than 1
                                       .FirstOrDefault();
                }
            }

            // Start the model (0 Da) at the mass defect of CH2 or HPO3 itself, allowing the peak width tolerance on either side
            double half_peak_width = RelationType == ProteoformComparison.ExperimentalTheoretical || RelationType == ProteoformComparison.ExperimentalDecoy ?
                                     Sweet.lollipop.peak_width_base_et / 2 :
                                     Sweet.lollipop.peak_width_base_ee / 2;
            double low_decimal_bound  = half_peak_width + ((CH2.MonoisotopicMass - Math.Truncate(CH2.MonoisotopicMass)) / CH2.MonoisotopicMass) * (Math.Abs(delta_mass) <= CH2.MonoisotopicMass ? CH2.MonoisotopicMass : Math.Abs(delta_mass));
            double high_decimal_bound = 1 - half_peak_width + ((HPO3.MonoisotopicMass - Math.Ceiling(HPO3.MonoisotopicMass)) / HPO3.MonoisotopicMass) * (Math.Abs(delta_mass) <= HPO3.MonoisotopicMass ? HPO3.MonoisotopicMass : Math.Abs(delta_mass));
            double delta_mass_decimal = Math.Abs(delta_mass - Math.Truncate(delta_mass));

            outside_no_mans_land = delta_mass_decimal <= low_decimal_bound || delta_mass_decimal >= high_decimal_bound ||
                                   high_decimal_bound <= low_decimal_bound;
            if (Sweet.lollipop.et_use_notch && (relation_type == ProteoformComparison.ExperimentalTheoretical || relation_type == ProteoformComparison.ExperimentalDecoy))
            {
                outside_no_mans_land = true;
            }
            if (Sweet.lollipop.ee_use_notch && (relation_type == ProteoformComparison.ExperimentalExperimental || relation_type == ProteoformComparison.ExperimentalFalse))
            {
                outside_no_mans_land = true;
            }
        }
        private void assign_pf_identity(ExperimentalProteoform e, PtmSet set, ProteoformRelation r, TheoreticalProteoform theoretical_base)
        {
            if (e.linked_proteoform_references == null)
            {
                e.linked_proteoform_references = new List <Proteoform>(this.linked_proteoform_references);
                e.linked_proteoform_references.Add(this);
                e.ptm_set = set;
                e.begin   = this.begin;
                e.end     = this.end;
                List <Ptm> remove = new List <Ptm>();

                //do retention of M first
                foreach (var mod in set.ptm_combination.Where(m => m.modification.ModificationType == "AminoAcid"))
                {
                    e.begin--;
                    remove.Add(mod);
                }
                foreach (var mod in set.ptm_combination.Where(m => m.modification.ModificationType == "Missing"))
                {
                    if (theoretical_base.sequence[this.begin - theoretical_base.begin].ToString() == mod.modification.Target.ToString())
                    {
                        e.begin++;
                        remove.Add(mod); //dont have in ptmset --> change the begin & end
                    }
                    else if (theoretical_base.sequence[this.end - this.begin].ToString() == mod.modification.Target.ToString())
                    {
                        e.end--;
                        remove.Add(mod);
                    }
                }
                foreach (var ptm in remove)
                {
                    e.ptm_set.ptm_combination.Remove(ptm);
                }
                e.ptm_set = new PtmSet(e.ptm_set.ptm_combination);

                if (e.gene_name == null)
                {
                    e.gene_name = this.gene_name;
                }
                else if (!e.topdown_id)
                {
                    e.gene_name.gene_names.Concat(this.gene_name.gene_names);
                }
            }
            else
            {
                //check if assign
                int        begin   = this.begin;
                int        end     = this.end;
                PtmSet     ptm_set = set;
                List <Ptm> remove  = new List <Ptm>();
                //do retention of M first
                foreach (var mod in set.ptm_combination.Where(m => m.modification.ModificationType == "AminoAcid"))
                {
                    begin--;
                    remove.Add(mod);
                }

                foreach (var mod in set.ptm_combination.Where(m => m.modification.ModificationType == "Missing"))
                {
                    if (theoretical_base.sequence[this.begin - theoretical_base.begin].ToString() ==
                        mod.modification.Target.ToString())
                    {
                        begin++;
                        remove.Add(mod); //dont have in ptmset --> change the begin & end
                    }
                    else if (theoretical_base.sequence[this.end - this.begin].ToString() ==
                             mod.modification.Target.ToString())
                    {
                        end--;
                        remove.Add(mod);
                    }
                }

                foreach (var ptm in remove)
                {
                    ptm_set.ptm_combination.Remove(ptm);
                }

                ptm_set = new PtmSet(ptm_set.ptm_combination);

                if (e.gene_name.get_prefered_name(Lollipop.preferred_gene_label) !=
                    this.gene_name.get_prefered_name(Lollipop.preferred_gene_label) ||
                    e.begin != begin || e.end != end || !e.ptm_set.same_ptmset(ptm_set, true))
                {
                    e.ambiguous = true;
                    Proteoform linked_proteoform_reference =
                        this.linked_proteoform_references == null || this.linked_proteoform_references.Count == 0
                            ? this
                            : this.linked_proteoform_references.First();
                    Tuple <Proteoform, int, int, PtmSet> new_id =
                        new Tuple <Proteoform, int, int, PtmSet>(linked_proteoform_reference, begin, end, ptm_set);
                    lock (e.ambiguous_identifications)
                    {
                        if (!e.ambiguous_identifications.Any(p =>
                                                             p.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) ==
                                                             new_id.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) &&
                                                             p.Item2 == new_id.Item2 && p.Item3 == new_id.Item3 &&
                                                             p.Item4.same_ptmset(new_id.Item4, true)))
                        {
                            e.ambiguous_identifications.Add(new_id);
                        }
                    }
                }
            }

            if (this as ExperimentalProteoform != null && (this as ExperimentalProteoform).ambiguous)
            {
                foreach (var id in this.ambiguous_identifications)
                {
                    TheoreticalProteoform id_theoretical_base = id.Item1 as TheoreticalProteoform;
                    int begin  = id.Item2;
                    int end    = id.Item3;
                    var remove = new List <Ptm>();

                    var ptm_set = determine_mod_change(e, this, id_theoretical_base, r, id.Item4);
                    if (ptm_set == null)
                    {
                        continue;
                    }
                    //do retention of M first
                    foreach (var mod in ptm_set.ptm_combination.Where(m => m.modification.ModificationType == "AminoAcid"))
                    {
                        begin--;
                        remove.Add(mod);
                    }
                    foreach (var mod in ptm_set.ptm_combination.Where(m => m.modification.ModificationType == "Missing"))
                    {
                        if (id_theoretical_base.sequence[id.Item2 - id.Item1.begin].ToString() == mod.modification.Target.ToString())
                        {
                            begin++;
                            remove.Add(mod); //dont have in ptmset --> change the begin & end
                        }
                        else if (id_theoretical_base.sequence[id.Item3 - id.Item2].ToString() == mod.modification.Target.ToString())
                        {
                            end--;
                            remove.Add(mod);
                        }
                    }
                    foreach (var ptm in remove)
                    {
                        ptm_set.ptm_combination.Remove(ptm);
                    }
                    ptm_set = new PtmSet(ptm_set.ptm_combination);
                    lock (e.ambiguous_identifications)
                    {
                        var new_id = new Tuple <Proteoform, int, int, PtmSet>(id.Item1, begin, end, ptm_set);
                        if ((e.gene_name.get_prefered_name(Lollipop.preferred_gene_label) !=
                             new_id.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) ||
                             e.begin != new_id.Item2 || e.end != new_id.Item3 || !e.ptm_set.same_ptmset(new_id.Item4, true)) &&
                            !e.ambiguous_identifications.Any(p =>
                                                             p.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) ==
                                                             new_id.Item1.gene_name.get_prefered_name(Lollipop.preferred_gene_label) &&
                                                             p.Item2 == new_id.Item2 && p.Item3 == new_id.Item3 &&
                                                             p.Item4.same_ptmset(new_id.Item4, true)))
                        {
                            e.ambiguous_identifications.Add(new_id);
                            e.ambiguous = true;
                        }
                    }
                }
            }

            e.uniprot_mods = "";
            foreach (string mod in e.ptm_set.ptm_combination.Concat(e.ambiguous_identifications.SelectMany(i => i.Item4.ptm_combination)).Where(ptm => ptm.modification.ModificationType != "Deconvolution Error").Select(ptm => UnlocalizedModification.LookUpId(ptm.modification)).ToList().Distinct().OrderBy(m => m))
            {
                // positions with mod
                List <int> theo_ptms = theoretical_base.ExpandedProteinList.First()
                                       .OneBasedPossibleLocalizedModifications
                                       .Where(p => p.Key >= e.begin && p.Key <= e.end &&
                                              p.Value.Select(m => UnlocalizedModification.LookUpId(m)).Contains(mod))
                                       .Select(m => m.Key).ToList();
                if (theo_ptms.Count > 0)
                {
                    e.uniprot_mods += mod + " @ " + string.Join(", ", theo_ptms) + "; ";
                }
                if (e.ptm_set.ptm_combination.Select(ptm => UnlocalizedModification.LookUpId(ptm.modification))
                    .Count(m => m == mod) > theo_ptms.Count ||
                    e.ambiguous_identifications.Any(i => i.Item4.ptm_combination.Select(ptm => UnlocalizedModification.LookUpId(ptm.modification))
                                                    .Count(m => m == mod) > theo_ptms.Count))
                {
                    e.novel_mods = true;
                }
            }

            //else if (!e.topdown_id && e.gene_name.get_prefered_name(Lollipop.preferred_gene_label) != this.gene_name.get_prefered_name(Lollipop.preferred_gene_label)
            // && e.linked_proteoform_references.Count == this.linked_proteoform_references.Count + 1)
            //{
            //    e.ambiguous = true;
            //}
        }
Exemple #16
0
        public void identify_experimentals()
        {
            HashSet <ExperimentalProteoform> identified_experimentals = new HashSet <ExperimentalProteoform>();

            if (Sweet.lollipop.identify_from_td_nodes)
            {
                foreach (TopDownProteoform topdown in experimental_proteoforms.Where(e => e.topdown_id))
                {
                    Sweet.lollipop.theoretical_database
                    .theoreticals_by_accession[Sweet.lollipop.target_proteoform_community.community_number]
                    .TryGetValue(topdown.accession.Split('_')[0].Split('-')[0], out var t);
                    if (t != null && t.Count > 0)
                    {
                        TheoreticalProteoform theoretical =
                            new TheoreticalProteoform(topdown.accession, topdown.name, topdown.sequence,
                                                      t.First().ExpandedProteinList, topdown.modified_mass, topdown.lysine_count,
                                                      topdown.topdown_ptm_set, true, false, null);
                        theoretical.topdown_theoretical    = true;
                        theoretical.new_topdown_proteoform = true;
                        theoretical.begin = topdown.topdown_begin;
                        theoretical.end   = topdown.topdown_end;
                        foreach (ExperimentalProteoform e in topdown.identify_connected_experimentals(theoretical, topdown.topdown_begin, topdown.topdown_end,
                                                                                                      new PtmSet(topdown.topdown_ptm_set.ptm_combination), null))
                        {
                            identified_experimentals.Add(e);
                        }
                    }
                }
            }
            foreach (TheoreticalProteoform t in theoretical_proteoforms.OrderBy(t => t.topdown_theoretical))
            {
                lock (identified_experimentals)
                    foreach (ExperimentalProteoform e in t.identify_connected_experimentals(t, t.begin, t.end, t.ptm_set, t.linked_proteoform_references))
                    {
                        identified_experimentals.Add(e);
                    }
            }

            //Continue looking for new experimental identifications until no more remain to be identified
            List <ExperimentalProteoform> newly_identified_experimentals = new List <ExperimentalProteoform>(identified_experimentals).OrderBy(p => p.relationships.Count(r => r.candidate_ptmset != null) > 0 ? p.relationships.Where(r => r.candidate_ptmset != null).Min(r => Math.Abs(r.DeltaMass - r.candidate_ptmset.mass)) : 1e6).ThenBy(p => p.modified_mass).ToList();
            int last_identified_count = identified_experimentals.Count - 1;

            while (newly_identified_experimentals.Count > 0) //&& identified_experimentals.Count > last_identified_count)
            {
                last_identified_count = identified_experimentals.Count;
                HashSet <ExperimentalProteoform> tmp_new_experimentals = new HashSet <ExperimentalProteoform>();
                foreach (ExperimentalProteoform id_experimental in newly_identified_experimentals)
                {
                    {
                        lock (identified_experimentals) lock (tmp_new_experimentals)
                                foreach (ExperimentalProteoform new_e in id_experimental.identify_connected_experimentals(id_experimental.linked_proteoform_references.First() as TheoreticalProteoform, id_experimental.begin,
                                                                                                                          id_experimental.end, id_experimental.ptm_set, id_experimental.linked_proteoform_references))
                                {
                                    identified_experimentals.Add(new_e);
                                    tmp_new_experimentals.Add(new_e);
                                }
                    }
                }
                newly_identified_experimentals = new List <ExperimentalProteoform>(tmp_new_experimentals);
            }

            List <string> topdown_ids = Sweet.lollipop.topdown_proteoforms
                                        .Select(p => p.accession.Split('_')[0].Split('-')[0] + "_" + p.sequence + "_" + string.Join(", ", p.topdown_ptm_set.ptm_combination.Where(m => m.modification.ModificationType != "Deconvolution Error").Select(ptm => UnlocalizedModification.LookUpId(ptm.modification)).OrderBy(m => m))).ToList();


            //determine identified experimentals that are adducts
            //checks if any experimentals have same mods as e's ptmset, except e has additional adduct only mods.
            Parallel.ForEach(experimental_proteoforms, e =>
            {
                e.adduct =
                    e.linked_proteoform_references != null &&
                    e.ptm_set.ptm_combination.Any(m => Proteoform.modification_is_adduct(m.modification)) &&
                    experimental_proteoforms.Any(l =>
                                                 l.linked_proteoform_references != null &&
                                                 l.gene_name.get_prefered_name(Lollipop.preferred_gene_label) == e.gene_name.get_prefered_name(Lollipop.preferred_gene_label) &&
                                                 l.ptm_set.ptm_combination.Count < e.ptm_set.ptm_combination.Count &&
                                                 e.ptm_set.ptm_combination.Where(m => l.ptm_set.ptm_combination.Count(p => UnlocalizedModification.LookUpId(p.modification) == UnlocalizedModification.LookUpId(m.modification)) != e.ptm_set.ptm_combination.Count(p => UnlocalizedModification.LookUpId(p.modification) == UnlocalizedModification.LookUpId(m.modification)))
                                                 .Count(p => !Proteoform.modification_is_adduct(p.modification))
                                                 == 0
                                                 );

                if (e as TopDownProteoform != null)
                {
                    (e as TopDownProteoform).set_correct_id();
                }

                if (e.linked_proteoform_references != null)
                {
                    var mods = e.ptm_set.ptm_combination.Where(p => !Proteoform.modification_is_adduct(p.modification))
                               .Select(ptm => UnlocalizedModification.LookUpId(ptm.modification)).ToList().Distinct().OrderBy(m => m).ToList();
                    e.uniprot_mods = "";
                    string add     = "";
                    foreach (string mod in mods)
                    {
                        // positions with mod
                        List <int> theo_ptms = (e.linked_proteoform_references.First() as TheoreticalProteoform).ExpandedProteinList.SelectMany(p => p
                                                                                                                                                .OneBasedPossibleLocalizedModifications)
                                               .Where(p => p.Key >= e.begin && p.Key <= e.end &&
                                                      p.Value.Select(m => UnlocalizedModification.LookUpId(m)).Contains(mod))
                                               .Select(m => m.Key).ToList();
                        if (theo_ptms.Count > 0)
                        {
                            add += mod + " @ " + string.Join(", ", theo_ptms) + "; ";
                        }
                        if (e.ptm_set.ptm_combination.Where(ptm => ptm.modification.ModificationType != "Deconvolution Error" && !Proteoform.modification_is_adduct(ptm.modification)).Select(ptm => UnlocalizedModification.LookUpId(ptm.modification))
                            .Count(m => m == mod) > theo_ptms.Count)
                        {
                            e.novel_mods = true;
                        }
                    }
                    e.uniprot_mods += add;
                    if (add.Length == 0)
                    {
                        e.uniprot_mods += "N/A";
                    }

                    foreach (var ambig_id in e.ambiguous_identifications)
                    {
                        var ambig_mods = ambig_id.ptm_set.ptm_combination.Where(p => !Proteoform.modification_is_adduct(p.modification))
                                         .Select(ptm => UnlocalizedModification.LookUpId(ptm.modification)).ToList().Distinct().OrderBy(m => m).ToList();

                        e.uniprot_mods += " | ";
                        add             = "";
                        foreach (var mod in ambig_mods)
                        {
                            // positions with mod
                            List <int> theo_ptms = ambig_id.theoretical_base.ExpandedProteinList.SelectMany(p => p
                                                                                                            .OneBasedPossibleLocalizedModifications)
                                                   .Where(p => p.Key >= ambig_id.begin && p.Key <= ambig_id.end &&
                                                          p.Value.Select(m => UnlocalizedModification.LookUpId(m)).Contains(mod))
                                                   .Select(m => m.Key).ToList();
                            if (theo_ptms.Count > 0)
                            {
                                add += mod + " @ " + string.Join(", ", theo_ptms) + "; ";
                            }
                            if (ambig_id.ptm_set.ptm_combination.Where(ptm => ptm.modification.ModificationType != "Deconvolution Error" && !Proteoform.modification_is_adduct(ptm.modification)).Select(ptm => UnlocalizedModification.LookUpId(ptm.modification))
                                .Count(m => m == mod) > theo_ptms.Count)
                            {
                                e.novel_mods = true;
                            }
                        }
                        e.uniprot_mods += add;
                        if (add.Length == 0)
                        {
                            e.uniprot_mods += "N/A";
                        }
                    }
                }

                //determine level #
                e.proteoform_level_description = "";
                if (e.linked_proteoform_references == null)
                {
                    e.proteoform_level             = 5;
                    e.proteoform_level_description = "Unidentified";
                }
                else if (e.ambiguous_identifications.Count == 0)
                {
                    if (e.ptm_set.ptm_combination.Count == 0)
                    {
                        e.proteoform_level = 1;
                    }
                    else
                    {
                        e.proteoform_level              = 2;
                        e.proteoform_level_description += "PTM localization ambiguity; ";
                    }

                    //check if accessions had been grouped in constructing the theoretical database
                    if ((e.linked_proteoform_references.First() as TheoreticalProteoform).ExpandedProteinList.SelectMany(p => p.AccessionList).Select(a => a.Split('_')[0]).Distinct().Count() > 1)
                    {
                        e.proteoform_level             += 1;
                        e.proteoform_level_description += "Gene ambiguity; ";
                    }
                }
                else
                {
                    var unique_accessions = new List <string>()
                    {
                        e.linked_proteoform_references.First().accession.Split('_')[0].Split('-')[0]
                    }.Concat(e.ambiguous_identifications.Select(a => a.theoretical_base.accession.Split('_')[0].Split('-')[0])).Distinct();
                    var unique_sequences = new List <string>()
                    {
                        ExperimentalProteoform.get_sequence(e.linked_proteoform_references.First() as TheoreticalProteoform, e.begin, e.end)
                    }.
                    Concat(e.ambiguous_identifications.Select(a => ExperimentalProteoform.get_sequence(a.theoretical_base, a.begin, a.end))).Distinct();
                    var unique_PTMs = new List <string>()
                    {
                        string.Join(", ", e.ptm_set.ptm_combination.Where(m => m.modification.ModificationType != "Deconvolution Error").Select(ptm => UnlocalizedModification.LookUpId(ptm.modification)).OrderBy(m => m))
                    }.Concat(e.ambiguous_identifications.Select(a => string.Join(", ", a.ptm_set.ptm_combination.Where(m => m.modification.ModificationType != "Deconvolution Error").Select(ptm => UnlocalizedModification.LookUpId(ptm.modification))))).Distinct();

                    int gene_ambiguity = unique_accessions.Count() > 1 ? 1 : 0;

                    //check if accessions had been grouped in constructing the theoretical database
                    if ((e.linked_proteoform_references.First() as TheoreticalProteoform).ExpandedProteinList.SelectMany(p => p.AccessionList).Select(a => a.Split('_')[0]).Distinct().Count() > 1)
                    {
                        gene_ambiguity = 1;
                    }

                    int sequence_ambiguity = unique_sequences.Count() > 1 ? 1 : 0;
                    int PTM_ambiguity      = unique_PTMs.Count() > 1 ? 1 : 0;
                    int PTM_location       = e.ptm_set.ptm_combination.Count(m => m.modification.ModificationType != "Deconvolution Error") > 0 || e.ambiguous_identifications.Any(a => a.ptm_set.ptm_combination.Count(m => m.modification.ModificationType != "Deconvolution Error") > 0) ? 1 : 0;

                    e.proteoform_level = 1 + gene_ambiguity + sequence_ambiguity + PTM_ambiguity + PTM_location;
                    if (gene_ambiguity > 0)
                    {
                        e.proteoform_level_description += "Gene ambiguity; ";
                    }
                    if (sequence_ambiguity > 0)
                    {
                        e.proteoform_level_description += "Sequence ambiguity; ";
                    }
                    if (PTM_ambiguity > 0)
                    {
                        e.proteoform_level_description += "PTM identity ambiguity; ";
                    }
                    if (PTM_location > 0)
                    {
                        e.proteoform_level_description += "PTM localization ambiguity; ";
                    }
                }
                if (e.proteoform_level == 1)
                {
                    e.proteoform_level_description = "Unambiguous";
                }

                //determine if new intact-mass ID
                e.new_intact_mass_id = false;
                if (!e.topdown_id && e.linked_proteoform_references != null && e.ambiguous_identifications.Count == 0)
                {
                    string this_id = string.Join(",", (e.linked_proteoform_references.First() as TheoreticalProteoform).ExpandedProteinList.SelectMany(p => p.AccessionList.Select(a => a.Split('_')[0])).Distinct()) + "_" + ExperimentalProteoform.get_sequence(e.linked_proteoform_references.First() as TheoreticalProteoform, e.begin, e.end) + "_" + string.Join(", ", e.ptm_set.ptm_combination.Where(m => m.modification.ModificationType != "Deconvolution Error").Select(ptm => UnlocalizedModification.LookUpId(ptm.modification)).OrderBy(m => m));
                    if (!topdown_ids.Any(t => this_id.Split('_')[0].Split(',').Contains(t.Split('_')[0]) &&
                                         this_id.Split('_')[1] == t.Split('_')[1] && this_id.Split('_')[2] == t.Split('_')[2]))
                    {
                        e.new_intact_mass_id = true;
                    }
                }
            });

            if (Sweet.lollipop.remove_bad_connections)
            {
                if (theoretical_proteoforms.Count > 0 || (Sweet.lollipop.identify_from_td_nodes && experimental_proteoforms.Count(e => e.topdown_id) > 0))
                {
                    Parallel.ForEach(relations, r =>
                    {
                        r.Accepted = r.Identification;
                    });
                }
            }
        }