public static void FillInAnnotation(IDictionary <string, ProteinAnnotation> annots) { string file1 = Path.Combine(FileUtils.GetConfigPath(), "maxquantAnnot.txt"); string file2 = file1 + ".gz"; string file = null; if (File.Exists(file1)) { file = file1; } else if (File.Exists(file2)) { file = file2; } if (file != null) { HashSet <string> acc = GetAccessions(annots); Dictionary <string, MiniProteinAnnotation> map = MiniProteinAnnotation.ReadMapping(file, acc); string phosphoSiteFolder = FileUtils.executablePath + "\\conf\\PSP"; Dictionary <string, string> psMap = PhosphoSitePlusParser.ParsePhosphoSite(phosphoSiteFolder); FillInAnnotation(annots, map, psMap); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string folder = FileUtils.executablePath + "\\conf"; string file = folder + "\\maxquantAnnot.txt.gz"; int protInd = param.GetParam <int>("Proteins").Value; int posInd = param.GetParam <int>("Positions within proteins").Value; bool addStatus = param.GetParam <bool>("Add status column").Value; string[] protCol = mdata.StringColumns[protInd]; HashSet <string> allProtIds = new HashSet <string>(); string[][] protIds = new string[protCol.Length][]; for (int i = 0; i < protCol.Length; i++) { protIds[i] = protCol[i].Length > 0 ? protCol[i].Split(';') : new string[0]; foreach (string s in protIds[i]) { if (!allProtIds.Contains(s)) { allProtIds.Add(s); } } } Dictionary <string, MiniProteinAnnotation> map = MiniProteinAnnotation.ReadMapping(file, allProtIds); string[] posCol = mdata.StringColumns[posInd]; int nrows = protCol.Length; string[][] pfamCol = new string[nrows][]; Dictionary <FeatureType, string[][]> cols = new Dictionary <FeatureType, string[][]>(); Dictionary <FeatureType, string[][]> statusCols = new Dictionary <FeatureType, string[][]>(); foreach (FeatureType t in FeatureType.allFeatureTypes) { cols.Add(t, new string[nrows][]); statusCols.Add(t, new string[nrows][]); } for (int i = 0; i < protCol.Length; i++) { string[] posString = posCol[i].Length > 0 ? posCol[i].Split(';') : new string[0]; HashSet <string> pfams = new HashSet <string>(); Dictionary <FeatureType, HashSet <string> > others = new Dictionary <FeatureType, HashSet <string> >(); Dictionary <FeatureType, HashSet <string> > othersStatus = new Dictionary <FeatureType, HashSet <string> >(); for (int j = 0; j < protIds[i].Length; j++) { string protId = protIds[i][j]; int pos = Parser.Int(posString[j]); if (map.ContainsKey(protId)) { MiniProteinAnnotation mpa = map[protId]; for (int k = 0; k < mpa.PfamIds.Length; k++) { if (Fits(pos, mpa.PfamStart[k], mpa.PfamEnd[k])) { pfams.Add(mpa.PfamNames[k]); } } foreach (FeatureType featureType in mpa.Features.Keys) { foreach (UniprotFeature uf in mpa.Features[featureType]) { if (!Parser.TryInt(uf.FeatureBegin, out int begin)) { begin = int.MaxValue; } if (!Parser.TryInt(uf.FeatureEnd, out int end)) { end = int.MinValue; } if (Fits(pos, begin, end)) { if (!others.ContainsKey(featureType)) { others.Add(featureType, new HashSet <string>()); othersStatus.Add(featureType, new HashSet <string>()); } string x = uf.FeatureDescription; if (string.IsNullOrEmpty(x)) { x = "+"; } others[featureType].Add(x); string y = uf.FeatureStatus; if (!string.IsNullOrEmpty(y)) { othersStatus[featureType].Add(y); } } } } } } pfamCol[i] = ToArray(pfams); foreach (FeatureType t in FeatureType.allFeatureTypes) { if (others.ContainsKey(t)) { cols[t][i] = ToArray(others[t]); } else { cols[t][i] = new string[0]; } if (othersStatus.ContainsKey(t)) { statusCols[t][i] = ToArray(othersStatus[t]); } else { statusCols[t][i] = new string[0]; } } } mdata.AddCategoryColumn("Pfam domains", "", pfamCol); foreach (FeatureType t in FeatureType.allFeatureTypes) { mdata.AddCategoryColumn(t.UniprotName, "", cols[t]); if (addStatus) { mdata.AddCategoryColumn(t.UniprotName + " status", "", statusCols[t]); } } }