Ejemplo n.º 1
0
        public Parameters GetParameters(IMatrixData mdata, ref string errorString)
        {
            List <string> colChoice = mdata.StringColumnNames;
            int           colInd    = 0;

            for (int i = 0; i < colChoice.Count; i++)
            {
                if (colChoice[i].ToUpper().Equals("UNIPROT"))
                {
                    colInd = i;
                    break;
                }
            }
            string[] choice = PhosphoSitePlusParser.GetAllMods();
            return
                (new Parameters(
                     new MultiChoiceParam("Modifications")
            {
                Value = ArrayUtils.ConsecutiveInts(choice.Length), Values = choice
            },
                     new SingleChoiceParam("Uniprot column")
            {
                Value = colInd,
                Help = "Specify here the column that contains Uniprot identifiers.",
                Values = colChoice
            }));
        }
Ejemplo n.º 2
0
        public Parameters GetParameters(IMatrixData mdata, ref string errorString)
        {
            List <string> colChoice = mdata.StringColumnNames;
            int           colInd    = 0;

            for (int i = 0; i < colChoice.Count; i++)
            {
                if (colChoice[i].ToUpper().Equals("UNIPROT"))
                {
                    colInd = i;
                    break;
                }
            }
            int colSeqInd = 0;

            for (int i = 0; i < colChoice.Count; i++)
            {
                if (colChoice[i].ToUpper().Equals("SEQUENCE WINDOW"))
                {
                    colSeqInd = i;
                    break;
                }
            }
            string[] choice = PhosphoSitePlusParser.GetAllMods();
            int      ind    = ArrayUtils.IndexOf(choice, "Phosphorylation");

            return
                (new Parameters(
                     new SingleChoiceParam("Modification")
            {
                Value = ind,
                Values = choice,
                Help = "Select here the kind of modification for which information should be added."
            },
                     new SingleChoiceParam("Uniprot column")
            {
                Value = colInd,
                Help = "Specify here the column that contains Uniprot identifiers.",
                Values = colChoice
            },
                     new SingleChoiceParam("Sequence window")
            {
                Value = colSeqInd,
                Help = "Specify here the column that contains the sequence windows around the site.",
                Values = colChoice
            }));
        }
Ejemplo n.º 3
0
        public static void FillInAnnotation(IDictionary <string, ProteinAnnotation> annots)
        {
            string file1 = Path.Combine(FileUtils.GetConfigPath(), "maxquantAnnot.txt");
            string file2 = file1 + ".gz";
            string file  = null;

            if (File.Exists(file1))
            {
                file = file1;
            }
            else if (File.Exists(file2))
            {
                file = file2;
            }
            if (file != null)
            {
                HashSet <string> acc = GetAccessions(annots);
                Dictionary <string, MiniProteinAnnotation> map = MiniProteinAnnotation.ReadMapping(file, acc);
                string phosphoSiteFolder          = FileUtils.executablePath + "\\conf\\PSP";
                Dictionary <string, string> psMap = PhosphoSitePlusParser.ParsePhosphoSite(phosphoSiteFolder);
                FillInAnnotation(annots, map, psMap);
            }
        }
Ejemplo n.º 4
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            string mod = param.GetParam <int>("Modification").StringValue;

            PhosphoSitePlusParser.ParseKnownMod(mod, out string[] seqWins, out string[] accs, out string[] pubmedLtp, out string[] pubmedMs2, out string[] cstMs2, out string[] species);
            if (seqWins == null)
            {
                processInfo.ErrString = "File does not exist.";
                return;
            }
            string[]   up    = mdata.StringColumns[param.GetParam <int>("Uniprot column").Value];
            string[][] uprot = new string[up.Length][];
            for (int i = 0; i < up.Length; i++)
            {
                uprot[i] = up[i].Length > 0 ? up[i].Split(';') : new string[0];
            }
            string[] win = mdata.StringColumns[param.GetParam <int>("Sequence window").Value];
            Dictionary <string, List <int> > map = new Dictionary <string, List <int> >();

            for (int i = 0; i < seqWins.Length; i++)
            {
                string acc = accs[i];
                if (!map.ContainsKey(acc))
                {
                    map.Add(acc, new List <int>());
                }
                map[acc].Add(i);
            }
            string[]   newCol    = new string[uprot.Length];
            string[][] newCatCol = new string[uprot.Length][];
            string[][] originCol = new string[uprot.Length][];
            for (int i = 0; i < newCol.Length; i++)
            {
                string[]         win1    = TransformIl(win[i]).Split(';');
                HashSet <string> wins    = new HashSet <string>();
                HashSet <string> origins = new HashSet <string>();
                foreach (string ux in uprot[i])
                {
                    if (map.ContainsKey(ux))
                    {
                        List <int> n = map[ux];
                        foreach (int ind in n)
                        {
                            string s = seqWins[ind];
                            if (Contains(win1, TransformIl(s.ToUpper().Substring(1, s.Length - 2))))
                            {
                                wins.Add(s);
                                if (pubmedLtp[ind].Length > 0)
                                {
                                    origins.Add("LTP");
                                }
                                if (pubmedMs2[ind].Length > 0)
                                {
                                    origins.Add("HTP");
                                }
                                if (cstMs2[ind].Length > 0)
                                {
                                    origins.Add("CST");
                                }
                            }
                        }
                    }
                }
                if (wins.Count > 0)
                {
                    newCol[i]    = StringUtils.Concat(";", ArrayUtils.ToArray(wins));
                    newCatCol[i] = new[] { "+" };
                    string[] x = ArrayUtils.ToArray(origins);
                    Array.Sort(x);
                    originCol[i] = x;
                }
                else
                {
                    newCol[i]    = "";
                    newCatCol[i] = new string[0];
                    originCol[i] = new string[0];
                }
            }
            mdata.AddStringColumn("PhosphoSitePlus window", "", newCol);
            mdata.AddCategoryColumn("Known site", "", newCatCol);
            mdata.AddCategoryColumn("Origin", "", originCol);
        }
Ejemplo n.º 5
0
 public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                         ref IDocumentData[] documents, ProcessInfo processInfo)
 {
     string[] mods = param.GetParam <int[]>("Modifications").StringValue.Split(new[] { ';' },
                                                                               StringSplitOptions.RemoveEmptyEntries);
     string[]   up    = mdata.StringColumns[param.GetParam <int>("Uniprot column").Value];
     string[][] uprot = new string[up.Length][];
     for (int i = 0; i < up.Length; i++)
     {
         uprot[i] = up[i].Length > 0 ? up[i].Split(';') : new string[0];
     }
     double[][] c = new double[mods.Length][];
     for (int index = 0; index < mods.Length; index++)
     {
         string mod      = mods[index];
         string filename = PhosphoSitePlusParser.GetFilenameForMod(mod);
         if (filename == null)
         {
             processInfo.ErrString = "File does not exist.";
             return;
         }
         PhosphoSitePlusParser.ParseKnownMods(filename, out string[] seqWins, out string[] accs, out string[] pubmedLtp, out string[] pubmedMs2, out string[] cstMs2, out string[] species);
         for (int i = 0; i < seqWins.Length; i++)
         {
             seqWins[i] = seqWins[i].ToUpper();
         }
         Dictionary <string, HashSet <string> > counts = new Dictionary <string, HashSet <string> >();
         for (int i = 0; i < accs.Length; i++)
         {
             string acc = accs[i];
             if (!counts.ContainsKey(acc))
             {
                 counts.Add(acc, new HashSet <string>());
             }
             counts[acc].Add(seqWins[i]);
         }
         c[index] = new double[up.Length];
         for (int i = 0; i < up.Length; i++)
         {
             c[index][i] = CountSites(uprot[i], counts);
         }
     }
     string[][] catCol = new string[up.Length][];
     for (int i = 0; i < catCol.Length; i++)
     {
         List <string> x = new List <string>();
         for (int j = 0; j < mods.Length; j++)
         {
             if (c[j][i] > 0)
             {
                 x.Add(mods[j]);
             }
         }
         x.Sort();
         catCol[i] = x.ToArray();
     }
     mdata.AddCategoryColumn("Known modifications", "Known modifications", catCol);
     for (int i = 0; i < mods.Length; i++)
     {
         mdata.AddNumericColumn(mods[i] + " count", mods[i] + " count", c[i]);
     }
 }
Ejemplo n.º 6
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            string[] seqWins;
            string[] accs;
            string[] function;
            string[] process;
            string[] protInteract;
            string[] otherInteract;
            string[] notes;
            string[] species;
            PhosphoSitePlusParser.ParseRegulatorySites(out seqWins, out accs, out function, out process, out protInteract,
                                                       out otherInteract, out notes, out species);
            if (seqWins == null)
            {
                processInfo.ErrString = "File  does not exist.";
                return;
            }
            string[]   up    = mdata.StringColumns[param.GetParam <int>("Uniprot column").Value];
            string[][] uprot = new string[up.Length][];
            for (int i = 0; i < up.Length; i++)
            {
                uprot[i] = up[i].Length > 0 ? up[i].Split(';') : new string[0];
            }
            string[] win = mdata.StringColumns[param.GetParam <int>("Sequence window").Value];
            Dictionary <string, List <int> > map = new Dictionary <string, List <int> >();

            for (int i = 0; i < seqWins.Length; i++)
            {
                string acc = accs[i];
                if (!map.ContainsKey(acc))
                {
                    map.Add(acc, new List <int>());
                }
                map[acc].Add(i);
            }
            string[][] newCatCol      = new string[uprot.Length][];
            string[][] function2      = new string[uprot.Length][];
            string[][] process2       = new string[uprot.Length][];
            string[][] protInteract2  = new string[uprot.Length][];
            string[][] otherInteract2 = new string[uprot.Length][];
            string[][] notes2         = new string[uprot.Length][];
            for (int i = 0; i < uprot.Length; i++)
            {
                string[]         win1           = TransformIl(win[i]).Split(';');
                HashSet <string> wins           = new HashSet <string>();
                HashSet <string> function1      = new HashSet <string>();
                HashSet <string> process1       = new HashSet <string>();
                HashSet <string> protInteract1  = new HashSet <string>();
                HashSet <string> otherInteract1 = new HashSet <string>();
                HashSet <string> notes1         = new HashSet <string>();
                foreach (string ux in uprot[i])
                {
                    if (map.ContainsKey(ux))
                    {
                        List <int> n = map[ux];
                        foreach (int ind in n)
                        {
                            string s = seqWins[ind];
                            if (Contains(win1, TransformIl(s.ToUpper().Substring(1, s.Length - 2))))
                            {
                                wins.Add(s);
                                if (function[ind].Length > 0)
                                {
                                    function1.Add(function[ind]);
                                }
                                if (process[ind].Length > 0)
                                {
                                    process1.Add(process[ind]);
                                }
                                if (protInteract[ind].Length > 0)
                                {
                                    protInteract1.Add(protInteract[ind]);
                                }
                                if (otherInteract[ind].Length > 0)
                                {
                                    otherInteract1.Add(otherInteract[ind]);
                                }
                                if (notes[ind].Length > 0)
                                {
                                    notes1.Add(notes[ind]);
                                }
                            }
                        }
                    }
                }
                if (wins.Count > 0)
                {
                    newCatCol[i]      = new[] { "+" };
                    function2[i]      = ArrayUtils.ToArray(function1);
                    process2[i]       = ArrayUtils.ToArray(process1);
                    protInteract2[i]  = ArrayUtils.ToArray(protInteract1);
                    otherInteract2[i] = ArrayUtils.ToArray(otherInteract1);
                    notes2[i]         = ArrayUtils.ToArray(notes1);
                }
                else
                {
                    newCatCol[i]      = new string[0];
                    function2[i]      = new string[0];
                    process2[i]       = new string[0];
                    protInteract2[i]  = new string[0];
                    otherInteract2[i] = new string[0];
                    notes2[i]         = new string[0];
                }
            }
            mdata.AddCategoryColumn("Regulatory site", "", newCatCol);
            mdata.AddCategoryColumn("Regulatory site function", "", function2);
            mdata.AddCategoryColumn("Regulatory site process", "", process2);
            mdata.AddCategoryColumn("Regulatory site protInteract", "", protInteract2);
            mdata.AddCategoryColumn("Regulatory site otherInteract", "", otherInteract2);
            mdata.AddCategoryColumn("Regulatory site notes", "", notes2);
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            string[] seqWins;
            string[] subAccs;
            string[] kinases;
            string[] kinAccs;
            string[] species;
            PhosphoSitePlusParser.ParseKinaseSubstrate(out seqWins, out subAccs, out kinases, out kinAccs, out species);
            if (seqWins == null)
            {
                processInfo.ErrString = "File does not exist.";
                return;
            }
            string[]   up    = mdata.StringColumns[param.GetParam <int>("Uniprot column").Value];
            string[][] uprot = new string[up.Length][];
            for (int i = 0; i < up.Length; i++)
            {
                uprot[i] = up[i].Length > 0 ? up[i].Split(';') : new string[0];
            }
            string[] win = mdata.StringColumns[param.GetParam <int>("Sequence window").Value];
            Dictionary <string, List <Tuple <string, string, string> > > substrateProperties =
                new Dictionary <string, List <Tuple <string, string, string> > >();

            for (int i = 0; i < seqWins.Length; i++)
            {
                string subAcc = subAccs[i];
                if (!substrateProperties.ContainsKey(subAcc))
                {
                    substrateProperties.Add(subAcc, new List <Tuple <string, string, string> >());
                }
                substrateProperties[subAcc].Add(new Tuple <string, string, string>(seqWins[i], kinases[i], kinAccs[i]));
            }
            string[] kinaseNameColumn    = new string[uprot.Length];
            string[] kinaseUniprotColumn = new string[uprot.Length];
            for (int i = 0; i < kinaseNameColumn.Length; i++)
            {
                string[]         win1              = AddKnownSites.TransformIl(win[i]).Split(';');
                HashSet <string> kinaseNamesHits   = new HashSet <string>();
                HashSet <string> kinaseUniprotHits = new HashSet <string>();
                foreach (string ux in uprot[i])
                {
                    if (substrateProperties.ContainsKey(ux))
                    {
                        List <Tuple <string, string, string> > properties = substrateProperties[ux];
                        foreach (Tuple <string, string, string> property in properties)
                        {
                            string w = property.Item1;
                            if (AddKnownSites.Contains(win1, AddKnownSites.TransformIl(w.ToUpper().Substring(1, w.Length - 2))))
                            {
                                kinaseNamesHits.Add(property.Item2);
                                kinaseUniprotHits.Add(property.Item3);
                            }
                        }
                    }
                }
                kinaseNameColumn[i]    = kinaseNamesHits.Count > 0 ? StringUtils.Concat(";", ArrayUtils.ToArray(kinaseNamesHits)) : "";
                kinaseUniprotColumn[i] = kinaseUniprotHits.Count > 0
                                        ? StringUtils.Concat(";", ArrayUtils.ToArray(kinaseUniprotHits))
                                        : "";
            }
            mdata.AddStringColumn("PhosphoSitePlus kinase", "", kinaseNameColumn);
            mdata.AddStringColumn("PhosphoSitePlus kinase uniprot", "", kinaseUniprotColumn);
        }