public Parameters GetParameters(IMatrixData mdata, ref string errorString) { List <string> colChoice = mdata.StringColumnNames; int colInd = 0; for (int i = 0; i < colChoice.Count; i++) { if (colChoice[i].ToUpper().Equals("UNIPROT")) { colInd = i; break; } } string[] choice = PhosphoSitePlusParser.GetAllMods(); return (new Parameters( new MultiChoiceParam("Modifications") { Value = ArrayUtils.ConsecutiveInts(choice.Length), Values = choice }, new SingleChoiceParam("Uniprot column") { Value = colInd, Help = "Specify here the column that contains Uniprot identifiers.", Values = colChoice })); }
public Parameters GetParameters(IMatrixData mdata, ref string errorString) { List <string> colChoice = mdata.StringColumnNames; int colInd = 0; for (int i = 0; i < colChoice.Count; i++) { if (colChoice[i].ToUpper().Equals("UNIPROT")) { colInd = i; break; } } int colSeqInd = 0; for (int i = 0; i < colChoice.Count; i++) { if (colChoice[i].ToUpper().Equals("SEQUENCE WINDOW")) { colSeqInd = i; break; } } string[] choice = PhosphoSitePlusParser.GetAllMods(); int ind = ArrayUtils.IndexOf(choice, "Phosphorylation"); return (new Parameters( new SingleChoiceParam("Modification") { Value = ind, Values = choice, Help = "Select here the kind of modification for which information should be added." }, new SingleChoiceParam("Uniprot column") { Value = colInd, Help = "Specify here the column that contains Uniprot identifiers.", Values = colChoice }, new SingleChoiceParam("Sequence window") { Value = colSeqInd, Help = "Specify here the column that contains the sequence windows around the site.", Values = colChoice })); }
public static void FillInAnnotation(IDictionary <string, ProteinAnnotation> annots) { string file1 = Path.Combine(FileUtils.GetConfigPath(), "maxquantAnnot.txt"); string file2 = file1 + ".gz"; string file = null; if (File.Exists(file1)) { file = file1; } else if (File.Exists(file2)) { file = file2; } if (file != null) { HashSet <string> acc = GetAccessions(annots); Dictionary <string, MiniProteinAnnotation> map = MiniProteinAnnotation.ReadMapping(file, acc); string phosphoSiteFolder = FileUtils.executablePath + "\\conf\\PSP"; Dictionary <string, string> psMap = PhosphoSitePlusParser.ParsePhosphoSite(phosphoSiteFolder); FillInAnnotation(annots, map, psMap); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string mod = param.GetParam <int>("Modification").StringValue; PhosphoSitePlusParser.ParseKnownMod(mod, out string[] seqWins, out string[] accs, out string[] pubmedLtp, out string[] pubmedMs2, out string[] cstMs2, out string[] species); if (seqWins == null) { processInfo.ErrString = "File does not exist."; return; } string[] up = mdata.StringColumns[param.GetParam <int>("Uniprot column").Value]; string[][] uprot = new string[up.Length][]; for (int i = 0; i < up.Length; i++) { uprot[i] = up[i].Length > 0 ? up[i].Split(';') : new string[0]; } string[] win = mdata.StringColumns[param.GetParam <int>("Sequence window").Value]; Dictionary <string, List <int> > map = new Dictionary <string, List <int> >(); for (int i = 0; i < seqWins.Length; i++) { string acc = accs[i]; if (!map.ContainsKey(acc)) { map.Add(acc, new List <int>()); } map[acc].Add(i); } string[] newCol = new string[uprot.Length]; string[][] newCatCol = new string[uprot.Length][]; string[][] originCol = new string[uprot.Length][]; for (int i = 0; i < newCol.Length; i++) { string[] win1 = TransformIl(win[i]).Split(';'); HashSet <string> wins = new HashSet <string>(); HashSet <string> origins = new HashSet <string>(); foreach (string ux in uprot[i]) { if (map.ContainsKey(ux)) { List <int> n = map[ux]; foreach (int ind in n) { string s = seqWins[ind]; if (Contains(win1, TransformIl(s.ToUpper().Substring(1, s.Length - 2)))) { wins.Add(s); if (pubmedLtp[ind].Length > 0) { origins.Add("LTP"); } if (pubmedMs2[ind].Length > 0) { origins.Add("HTP"); } if (cstMs2[ind].Length > 0) { origins.Add("CST"); } } } } } if (wins.Count > 0) { newCol[i] = StringUtils.Concat(";", ArrayUtils.ToArray(wins)); newCatCol[i] = new[] { "+" }; string[] x = ArrayUtils.ToArray(origins); Array.Sort(x); originCol[i] = x; } else { newCol[i] = ""; newCatCol[i] = new string[0]; originCol[i] = new string[0]; } } mdata.AddStringColumn("PhosphoSitePlus window", "", newCol); mdata.AddCategoryColumn("Known site", "", newCatCol); mdata.AddCategoryColumn("Origin", "", originCol); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string[] mods = param.GetParam <int[]>("Modifications").StringValue.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries); string[] up = mdata.StringColumns[param.GetParam <int>("Uniprot column").Value]; string[][] uprot = new string[up.Length][]; for (int i = 0; i < up.Length; i++) { uprot[i] = up[i].Length > 0 ? up[i].Split(';') : new string[0]; } double[][] c = new double[mods.Length][]; for (int index = 0; index < mods.Length; index++) { string mod = mods[index]; string filename = PhosphoSitePlusParser.GetFilenameForMod(mod); if (filename == null) { processInfo.ErrString = "File does not exist."; return; } PhosphoSitePlusParser.ParseKnownMods(filename, out string[] seqWins, out string[] accs, out string[] pubmedLtp, out string[] pubmedMs2, out string[] cstMs2, out string[] species); for (int i = 0; i < seqWins.Length; i++) { seqWins[i] = seqWins[i].ToUpper(); } Dictionary <string, HashSet <string> > counts = new Dictionary <string, HashSet <string> >(); for (int i = 0; i < accs.Length; i++) { string acc = accs[i]; if (!counts.ContainsKey(acc)) { counts.Add(acc, new HashSet <string>()); } counts[acc].Add(seqWins[i]); } c[index] = new double[up.Length]; for (int i = 0; i < up.Length; i++) { c[index][i] = CountSites(uprot[i], counts); } } string[][] catCol = new string[up.Length][]; for (int i = 0; i < catCol.Length; i++) { List <string> x = new List <string>(); for (int j = 0; j < mods.Length; j++) { if (c[j][i] > 0) { x.Add(mods[j]); } } x.Sort(); catCol[i] = x.ToArray(); } mdata.AddCategoryColumn("Known modifications", "Known modifications", catCol); for (int i = 0; i < mods.Length; i++) { mdata.AddNumericColumn(mods[i] + " count", mods[i] + " count", c[i]); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string[] seqWins; string[] accs; string[] function; string[] process; string[] protInteract; string[] otherInteract; string[] notes; string[] species; PhosphoSitePlusParser.ParseRegulatorySites(out seqWins, out accs, out function, out process, out protInteract, out otherInteract, out notes, out species); if (seqWins == null) { processInfo.ErrString = "File does not exist."; return; } string[] up = mdata.StringColumns[param.GetParam <int>("Uniprot column").Value]; string[][] uprot = new string[up.Length][]; for (int i = 0; i < up.Length; i++) { uprot[i] = up[i].Length > 0 ? up[i].Split(';') : new string[0]; } string[] win = mdata.StringColumns[param.GetParam <int>("Sequence window").Value]; Dictionary <string, List <int> > map = new Dictionary <string, List <int> >(); for (int i = 0; i < seqWins.Length; i++) { string acc = accs[i]; if (!map.ContainsKey(acc)) { map.Add(acc, new List <int>()); } map[acc].Add(i); } string[][] newCatCol = new string[uprot.Length][]; string[][] function2 = new string[uprot.Length][]; string[][] process2 = new string[uprot.Length][]; string[][] protInteract2 = new string[uprot.Length][]; string[][] otherInteract2 = new string[uprot.Length][]; string[][] notes2 = new string[uprot.Length][]; for (int i = 0; i < uprot.Length; i++) { string[] win1 = TransformIl(win[i]).Split(';'); HashSet <string> wins = new HashSet <string>(); HashSet <string> function1 = new HashSet <string>(); HashSet <string> process1 = new HashSet <string>(); HashSet <string> protInteract1 = new HashSet <string>(); HashSet <string> otherInteract1 = new HashSet <string>(); HashSet <string> notes1 = new HashSet <string>(); foreach (string ux in uprot[i]) { if (map.ContainsKey(ux)) { List <int> n = map[ux]; foreach (int ind in n) { string s = seqWins[ind]; if (Contains(win1, TransformIl(s.ToUpper().Substring(1, s.Length - 2)))) { wins.Add(s); if (function[ind].Length > 0) { function1.Add(function[ind]); } if (process[ind].Length > 0) { process1.Add(process[ind]); } if (protInteract[ind].Length > 0) { protInteract1.Add(protInteract[ind]); } if (otherInteract[ind].Length > 0) { otherInteract1.Add(otherInteract[ind]); } if (notes[ind].Length > 0) { notes1.Add(notes[ind]); } } } } } if (wins.Count > 0) { newCatCol[i] = new[] { "+" }; function2[i] = ArrayUtils.ToArray(function1); process2[i] = ArrayUtils.ToArray(process1); protInteract2[i] = ArrayUtils.ToArray(protInteract1); otherInteract2[i] = ArrayUtils.ToArray(otherInteract1); notes2[i] = ArrayUtils.ToArray(notes1); } else { newCatCol[i] = new string[0]; function2[i] = new string[0]; process2[i] = new string[0]; protInteract2[i] = new string[0]; otherInteract2[i] = new string[0]; notes2[i] = new string[0]; } } mdata.AddCategoryColumn("Regulatory site", "", newCatCol); mdata.AddCategoryColumn("Regulatory site function", "", function2); mdata.AddCategoryColumn("Regulatory site process", "", process2); mdata.AddCategoryColumn("Regulatory site protInteract", "", protInteract2); mdata.AddCategoryColumn("Regulatory site otherInteract", "", otherInteract2); mdata.AddCategoryColumn("Regulatory site notes", "", notes2); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string[] seqWins; string[] subAccs; string[] kinases; string[] kinAccs; string[] species; PhosphoSitePlusParser.ParseKinaseSubstrate(out seqWins, out subAccs, out kinases, out kinAccs, out species); if (seqWins == null) { processInfo.ErrString = "File does not exist."; return; } string[] up = mdata.StringColumns[param.GetParam <int>("Uniprot column").Value]; string[][] uprot = new string[up.Length][]; for (int i = 0; i < up.Length; i++) { uprot[i] = up[i].Length > 0 ? up[i].Split(';') : new string[0]; } string[] win = mdata.StringColumns[param.GetParam <int>("Sequence window").Value]; Dictionary <string, List <Tuple <string, string, string> > > substrateProperties = new Dictionary <string, List <Tuple <string, string, string> > >(); for (int i = 0; i < seqWins.Length; i++) { string subAcc = subAccs[i]; if (!substrateProperties.ContainsKey(subAcc)) { substrateProperties.Add(subAcc, new List <Tuple <string, string, string> >()); } substrateProperties[subAcc].Add(new Tuple <string, string, string>(seqWins[i], kinases[i], kinAccs[i])); } string[] kinaseNameColumn = new string[uprot.Length]; string[] kinaseUniprotColumn = new string[uprot.Length]; for (int i = 0; i < kinaseNameColumn.Length; i++) { string[] win1 = AddKnownSites.TransformIl(win[i]).Split(';'); HashSet <string> kinaseNamesHits = new HashSet <string>(); HashSet <string> kinaseUniprotHits = new HashSet <string>(); foreach (string ux in uprot[i]) { if (substrateProperties.ContainsKey(ux)) { List <Tuple <string, string, string> > properties = substrateProperties[ux]; foreach (Tuple <string, string, string> property in properties) { string w = property.Item1; if (AddKnownSites.Contains(win1, AddKnownSites.TransformIl(w.ToUpper().Substring(1, w.Length - 2)))) { kinaseNamesHits.Add(property.Item2); kinaseUniprotHits.Add(property.Item3); } } } } kinaseNameColumn[i] = kinaseNamesHits.Count > 0 ? StringUtils.Concat(";", ArrayUtils.ToArray(kinaseNamesHits)) : ""; kinaseUniprotColumn[i] = kinaseUniprotHits.Count > 0 ? StringUtils.Concat(";", ArrayUtils.ToArray(kinaseUniprotHits)) : ""; } mdata.AddStringColumn("PhosphoSitePlus kinase", "", kinaseNameColumn); mdata.AddStringColumn("PhosphoSitePlus kinase uniprot", "", kinaseUniprotColumn); }