Ejemplo n.º 1
0
 public static void FillInAnnotation(IDictionary <string, ProteinAnnotation> annotations,
                                     IDictionary <string, MiniProteinAnnotation> map, IDictionary <string, string> pspMap)
 {
     foreach (ProteinAnnotation pa in annotations.Values)
     {
         if (map.ContainsKey(pa.Accession))
         {
             MiniProteinAnnotation mpa = map[pa.Accession];
             pa.GeneName    = mpa.GeneName;
             pa.IsReviewed  = mpa.IsReviewed;
             pa.ProteinName = mpa.ProteinName;
             pa.PfamIds     = mpa.PfamIds;
             pa.PfamNames   = mpa.PfamNames;
             pa.PfamStart   = mpa.PfamStart;
             pa.PfamEnd     = mpa.PfamEnd;
             pa.Pdbs        = mpa.Pdbs;
             pa.Features    = mpa.Features;
         }
         else if (pa.Accession.Contains("-"))
         {
             string acc = pa.Accession.Substring(0, pa.Accession.IndexOf('-'));
             if (map.ContainsKey(acc))
             {
                 MiniProteinAnnotation mpa = map[acc];
                 pa.GeneName    = mpa.GeneName;
                 pa.IsReviewed  = mpa.IsReviewed;
                 pa.ProteinName = mpa.ProteinName;
                 pa.PfamIds     = mpa.PfamIds;
                 pa.PfamNames   = mpa.PfamNames;
                 pa.PfamStart   = new int[0];
                 pa.PfamEnd     = new int[0];
                 pa.Pdbs        = new string[0];
                 pa.Features    = new Dictionary <FeatureType, List <UniprotFeature> >();
             }
         }
         if (pspMap.ContainsKey(pa.Accession))
         {
             string   x = pspMap[pa.Accession];
             string[] modificationsPsp = x.Length == 0 ? new string[0] : x.Split(';');
             string[] modTypesPsp      = new string[modificationsPsp.Length];
             int[]    modPosPsp        = new int[modificationsPsp.Length];
             for (int i = 0; i < modificationsPsp.Length; i++)
             {
                 string[] w = modificationsPsp[i].Split(',');
                 modTypesPsp[i] = w[0];
                 bool success = Parser.TryInt(w[1], out modPosPsp[i]);
                 if (!success)
                 {
                     modPosPsp[i] = -1;
                 }
             }
             pa.ModTypesPsp = modTypesPsp;
             pa.ModPosPsp   = modPosPsp;
         }
     }
 }
Ejemplo n.º 2
0
        public static void FillInAnnotation(IDictionary <string, ProteinAnnotation> annots)
        {
            string file1 = Path.Combine(FileUtils.GetConfigPath(), "maxquantAnnot.txt");
            string file2 = file1 + ".gz";
            string file  = null;

            if (File.Exists(file1))
            {
                file = file1;
            }
            else if (File.Exists(file2))
            {
                file = file2;
            }
            if (file != null)
            {
                HashSet <string> acc = GetAccessions(annots);
                Dictionary <string, MiniProteinAnnotation> map = MiniProteinAnnotation.ReadMapping(file, acc);
                string phosphoSiteFolder          = FileUtils.executablePath + "\\conf\\PSP";
                Dictionary <string, string> psMap = PhosphoSitePlusParser.ParsePhosphoSite(phosphoSiteFolder);
                FillInAnnotation(annots, map, psMap);
            }
        }
Ejemplo n.º 3
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            string folder    = FileUtils.executablePath + "\\conf";
            string file      = folder + "\\maxquantAnnot.txt.gz";
            int    protInd   = param.GetParam <int>("Proteins").Value;
            int    posInd    = param.GetParam <int>("Positions within proteins").Value;
            bool   addStatus = param.GetParam <bool>("Add status column").Value;

            string[]         protCol    = mdata.StringColumns[protInd];
            HashSet <string> allProtIds = new HashSet <string>();

            string[][] protIds = new string[protCol.Length][];
            for (int i = 0; i < protCol.Length; i++)
            {
                protIds[i] = protCol[i].Length > 0 ? protCol[i].Split(';') : new string[0];
                foreach (string s in protIds[i])
                {
                    if (!allProtIds.Contains(s))
                    {
                        allProtIds.Add(s);
                    }
                }
            }
            Dictionary <string, MiniProteinAnnotation> map = MiniProteinAnnotation.ReadMapping(file, allProtIds);

            string[] posCol = mdata.StringColumns[posInd];
            int      nrows  = protCol.Length;

            string[][] pfamCol = new string[nrows][];
            Dictionary <FeatureType, string[][]> cols       = new Dictionary <FeatureType, string[][]>();
            Dictionary <FeatureType, string[][]> statusCols = new Dictionary <FeatureType, string[][]>();

            foreach (FeatureType t in FeatureType.allFeatureTypes)
            {
                cols.Add(t, new string[nrows][]);
                statusCols.Add(t, new string[nrows][]);
            }
            for (int i = 0; i < protCol.Length; i++)
            {
                string[]         posString = posCol[i].Length > 0 ? posCol[i].Split(';') : new string[0];
                HashSet <string> pfams     = new HashSet <string>();
                Dictionary <FeatureType, HashSet <string> > others       = new Dictionary <FeatureType, HashSet <string> >();
                Dictionary <FeatureType, HashSet <string> > othersStatus = new Dictionary <FeatureType, HashSet <string> >();
                for (int j = 0; j < protIds[i].Length; j++)
                {
                    string protId = protIds[i][j];
                    int    pos    = Parser.Int(posString[j]);
                    if (map.ContainsKey(protId))
                    {
                        MiniProteinAnnotation mpa = map[protId];
                        for (int k = 0; k < mpa.PfamIds.Length; k++)
                        {
                            if (Fits(pos, mpa.PfamStart[k], mpa.PfamEnd[k]))
                            {
                                pfams.Add(mpa.PfamNames[k]);
                            }
                        }
                        foreach (FeatureType featureType in mpa.Features.Keys)
                        {
                            foreach (UniprotFeature uf in mpa.Features[featureType])
                            {
                                if (!Parser.TryInt(uf.FeatureBegin, out int begin))
                                {
                                    begin = int.MaxValue;
                                }
                                if (!Parser.TryInt(uf.FeatureEnd, out int end))
                                {
                                    end = int.MinValue;
                                }
                                if (Fits(pos, begin, end))
                                {
                                    if (!others.ContainsKey(featureType))
                                    {
                                        others.Add(featureType, new HashSet <string>());
                                        othersStatus.Add(featureType, new HashSet <string>());
                                    }
                                    string x = uf.FeatureDescription;
                                    if (string.IsNullOrEmpty(x))
                                    {
                                        x = "+";
                                    }
                                    others[featureType].Add(x);
                                    string y = uf.FeatureStatus;
                                    if (!string.IsNullOrEmpty(y))
                                    {
                                        othersStatus[featureType].Add(y);
                                    }
                                }
                            }
                        }
                    }
                }
                pfamCol[i] = ToArray(pfams);
                foreach (FeatureType t in FeatureType.allFeatureTypes)
                {
                    if (others.ContainsKey(t))
                    {
                        cols[t][i] = ToArray(others[t]);
                    }
                    else
                    {
                        cols[t][i] = new string[0];
                    }
                    if (othersStatus.ContainsKey(t))
                    {
                        statusCols[t][i] = ToArray(othersStatus[t]);
                    }
                    else
                    {
                        statusCols[t][i] = new string[0];
                    }
                }
            }
            mdata.AddCategoryColumn("Pfam domains", "", pfamCol);
            foreach (FeatureType t in FeatureType.allFeatureTypes)
            {
                mdata.AddCategoryColumn(t.UniprotName, "", cols[t]);
                if (addStatus)
                {
                    mdata.AddCategoryColumn(t.UniprotName + " status", "", statusCols[t]);
                }
            }
        }