Пример #1
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="pfamId"></param>
        private void UpdatePfamLigandsClustersHmm(string pfamId, List <string> updateEntryList)
        {
            DataTable     jscoreTable         = GetPfamComHmmTable(pfamId);
            List <string> pdbDomainLigandList = GetPfamChainDomainLigandList(pfamId);

            double[,] jscoreMatrix = SetConnectMatrix(pdbDomainLigandList, jscoreTable);
            List <List <int> > existClusterList = GetExistingClusterList(pfamId, pdbDomainLigandList);

            int[]        updateLigandIndexes = GetUpdateLigandIndexes(updateEntryList, pdbDomainLigandList);
            List <int[]> indexClusterList    = ClusterPfamLigandsIndexes(jscoreMatrix, updateLigandIndexes, existClusterList);
            int          clusterId           = 1;

            string[] domainLigandFields = null;
            foreach (int[] cluster in indexClusterList)
            {
                foreach (int ligandIndex in cluster)
                {
                    domainLigandFields = pdbDomainLigandList[ligandIndex].Split('_');
                    DataRow dataRow = pfamLigandClusterTable.NewRow();
                    dataRow["PfamID"]        = pfamId;
                    dataRow["ClusterID"]     = clusterId;
                    dataRow["PdbID"]         = domainLigandFields[0];
                    dataRow["ChainDomainID"] = domainLigandFields[1];
                    dataRow["LigandChain"]   = domainLigandFields[2];
                    pfamLigandClusterTable.Rows.Add(dataRow);
                }
                clusterId++;
            }
            dbInsert.BatchInsertDataIntoDBtables(ProtCidSettings.protcidDbConnection, pfamLigandClusterTable);
            pfamLigandClusterTable.Clear();
        }
Пример #2
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="ligandComAtomTable"></param>
        public void ClusterPfamLigands(DataTable ligandComAtomTable)
        {
            //         string[] clusterCols = { "PfamID", "ClusterID", "PdbID", "ChainDomainID", "LigandChain" };
            List <string> pdbLigandList = GetPfamLigandsInOrder(ligandComAtomTable);

            int[,] connectMatrix = SetConnectMatrix(pdbLigandList, ligandComAtomTable);
            List <List <int> > clusterList       = ClusterPfamLigands(connectMatrix);
            List <List <int> > sortedClusterList = SortClustersInLigandNumbers(clusterList);
            int clusterId = 1;

            string[] ligandFields = null;
            string   pfamId       = ligandComAtomTable.Rows[0]["PfamID"].ToString();

            foreach (List <int> cluster in sortedClusterList)
            {
                foreach (int ligandIndex in cluster)
                {
                    ligandFields = pdbLigandList[ligandIndex].Split('_');
                    DataRow clusterRow = pfamLigandClusterTable.NewRow();
                    clusterRow["PfamID"]          = pfamId;
                    clusterRow["ClusterID"]       = clusterId;
                    clusterRow["PdbID"]           = ligandFields[0];
                    clusterRow["ChainDomainID"]   = ligandFields[1];
                    clusterRow["LigandChain"]     = ligandFields[2];
                    clusterRow["LigandFileChain"] = ligandFields[3];
                    pfamLigandClusterTable.Rows.Add(clusterRow);
                }
                clusterId++;
            }
            dbInsert.BatchInsertDataIntoDBtables(ProtCidSettings.protcidDbConnection, pfamLigandClusterTable);
            pfamLigandClusterTable.Clear();
        }
Пример #3
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="updateEntries"></param>
        private string[] ImportNewEntriesCrcCodes(string[] updateEntries)
        {
            int       numOfEntries      = 30;
            string    queryString       = "Select * From PDBCRCMap;";
            DataTable piscesPdbCrcTable = dbQuery.Query(remotePiscesDbConnect, queryString);
            ArrayList updateCrcList     = new ArrayList();

            for (int i = 0; i < updateEntries.Length; i += numOfEntries)
            {
                if (i + numOfEntries >= updateEntries.Length)
                {
                    numOfEntries = updateEntries.Length - i;
                }
                string[] queryEntries = new string[numOfEntries];
                Array.Copy(updateEntries, i, queryEntries, 0, numOfEntries);
                queryString = string.Format("Select * From PDBCRCMap Where pdb IN {0};", ParseHelper.FormatSqlListString(queryEntries));
                DataTable pdbCrcTable = dbQuery.Query(remotePiscesDbConnect, queryString);
                pdbCrcTable.TableName = "pdbcrcmap";
                dbInsert.BatchInsertDataIntoDBtables(localMysqlDbConnect, pdbCrcTable);

                string[] crcCodesInTable = GetCrcCodesInTable(pdbCrcTable);
                foreach (string crc in crcCodesInTable)
                {
                    if (!updateCrcList.Contains(crc))
                    {
                        updateCrcList.Add(crc);
                    }
                }
            }
            string[] updateCrcCodes = new string[updateCrcList.Count];
            updateCrcList.CopyTo(updateCrcCodes);
            return(updateCrcCodes);
        }
Пример #4
0
        /// <summary>
        /// build hh alignment table
        /// </summary>
        /// <param name="hhrListFile">the list of sequence codes, like uniprot code, pdb id, crc code</param>
        /// <param name="hhrFileDir">the directory where hhr files are</param>
        public void BuildHhAlignments(string hhrListFile, string hhrFileDir)
        {
            if (hhrListFile == "")
            {
                BuildHhAlignments(hhrFileDir);
                return;
            }
            bool isUpdate = false;

            Initialize(isUpdate);

            StreamReader lsReader      = new StreamReader(hhrListFile);
            string       seqCode       = "";
            string       hhrFile       = "";
            bool         deleteHhrFile = false;

            while ((seqCode = lsReader.ReadLine()) != null)
            {
                deleteHhrFile = false;
                hhrFile       = Path.Combine(hhrFileDir, seqCode + ".hhr");
                if (!File.Exists(hhrFile))
                {
                    hhrFile       = Path.Combine(hhrFileDir, seqCode + ".hhr.gz");
                    hhrFile       = ParseHelper.UnZipFile(hhrFile, ProtCidSettings.tempDir);
                    deleteHhrFile = true;
                }
                try
                {
                    ParseHhrFile(hhrFile, hhAlignTable);
                    dbInsert.BatchInsertDataIntoDBtables(ProtCidSettings.alignmentDbConnection, hhAlignTable);
                    //         InsertHhAlignDataToDb(hhAlignTable);
                }
                catch (Exception ex)
                {
                    ProtCidSettings.logWriter.WriteLine(seqCode + " Parsing hhr file error: " + ex.Message);
                    ProtCidSettings.logWriter.Flush();
                }
                hhAlignTable.Clear();

                if (deleteHhrFile)
                {
                    File.Delete(hhrFile);
                }
            }
            lsReader.Close();
            ProtCidSettings.logWriter.Flush();
        }
Пример #5
0
        /* Modified on Feb. 02, 2009
         * The alignments will be added at the next step,
         * because the best alignment is choosed from PSIBLAST, CE and FATCAT alignments.
         * And missed alignments will be added by outside of the project (FATCAT/CE no Windows executable)
         */
        /// <summary>
        ///
        /// </summary>
        /// <param name="sgEntryHash"></param>
        public void AddGroupChainPairsToTables(Dictionary <string, List <string> > sgEntryHash)
        {
            List <string> repPdbList = new List <string> ();

#if DEBUG
            string repEntryString     = "";
            string repHomoEntryString = "";
            int    subGroupNum        = 1;
#endif
            foreach (DataRow dRow in HomoGroupTables.homoGroupTables[HomoGroupTables.HomoSeqInfo].Rows)
            {
                List <string> pdbList = sgEntryHash[dRow["SpaceGroup"].ToString() + "_" + dRow["ASU"].ToString()];
                pdbList.Remove(dRow["PdbID"].ToString());
#if DEBUG
                repEntryString += dRow["PdbID"].ToString();
                repEntryString += ",";
#endif
                if (!repPdbList.Contains(dRow["PdbID"].ToString()))
                {
                    repPdbList.Add(dRow["PdbID"].ToString());
                }
                if (pdbList.Count > 0)
                {
                    AddDataToHomoRepEntryAlignTable(dRow["PdbID"].ToString(), pdbList, groupSeqNum);
                    //          entryAlignment.RetrieveRepEntryAlignment(dRow["PdbID"].ToString(), pdbList, groupSeqNum);
#if DEBUG
                    repHomoEntryString = dRow["PdbID"].ToString();
                    foreach (string pdbId in pdbList)
                    {
                        repHomoEntryString += ",";
                        repHomoEntryString += pdbId;
                    }
                    subGroupNum++;
#endif
                }
            }
            dbInsert.BatchInsertDataIntoDBtables(ProtCidSettings.protcidDbConnection, HomoGroupTables.homoGroupTables[HomoGroupTables.HomoRepEntryAlign]);
            HomoGroupTables.homoGroupTables[HomoGroupTables.HomoRepEntryAlign].Clear();

            // retrieve pairwise entries information for each group
            AddDataToRepEntryAlignTable(repPdbList, groupSeqNum);
            //    entryAlignment.GetEntryPairAlignment(repPdbList, groupSeqNum);
        }
Пример #6
0
        /// <summary>
        ///
        /// </summary>
        public void GenerateClansInPdbTable()
        {
            DbInsert  dbInsert            = new DbInsert();
            DataTable pfamClansInPdbTable = CreateTable(false);
            string    queryString         = "Select Distinct Clan_Acc From PfamInPdb, PfamClanFamily Where PfamInPdb.PfamAcc = PfamClanFamily.Pfam_Acc;";
            DataTable clansTable          = ProtCidSettings.pdbfamQuery.Query(queryString);
            string    clanAcc             = "";
            int       numPfams            = 0;
            int       numPfamsPdb         = 0;
            int       numPfamsPeptide     = 0;
            int       numPfamsDnaRna      = 0;
            int       numEntries          = 0;
            int       numUniProts         = 0;

            string[] clanPfamsPdb = null;
            foreach (DataRow clanRow in clansTable.Rows)
            {
                clanAcc = clanRow["Clan_Acc"].ToString().TrimEnd();
                DataRow clanInfoRow = pfamClansInPdbTable.NewRow();
                clanInfoRow["ClanAcc"] = clanAcc;
                numPfams                       = GetClanNumPfams(clanAcc);
                numPfamsPdb                    = GetClanNumPfamsInPdb(clanAcc, out clanPfamsPdb);
                numPfamsPeptide                = GetNumPfamsWithPeptides(clanPfamsPdb);
                numPfamsDnaRna                 = GetNumPfamsWithDnaRna(clanPfamsPdb);
                numEntries                     = GetClanNumEntries(clanAcc);
                numUniProts                    = GetClanNumUniProtsInPdb(clanAcc);
                clanInfoRow["NumPfams"]        = numPfams;
                clanInfoRow["NumPfamsPdb"]     = numPfamsPdb;
                clanInfoRow["NumPfamsPeptide"] = numPfamsPeptide;
                clanInfoRow["NumPfamsDnaRna"]  = numPfamsDnaRna;
                clanInfoRow["NumEntries"]      = numEntries;
                clanInfoRow["NumUniProts"]     = numUniProts;
                pfamClansInPdbTable.Rows.Add(clanInfoRow);
            }
            dbInsert.BatchInsertDataIntoDBtables(ProtCidSettings.pdbfamDbConnection, pfamClansInPdbTable);
        }
Пример #7
0
        /// <summary>
        /// parse one fatcat alignment output file
        /// insert data into database
        /// </summary>
        /// <param name="alignFile"></param>
        public void ParseFatcatAlignmentFile(string alignFile)
        {
            /* modified on April 5, 2010, change the input files of FATCAT from Guoli's regular files
             *       into my regular file with XMl sequential numbers and asymID
             *       instead of PDB sequence numbers.
             *       so no sequence nubmers conversion needed
             */
            if (logWriter == null)
            {
                logWriter = new StreamWriter("fatcatAlignmentsLog.txt", true);
            }
            logWriter.WriteLine(alignFile);
            StreamReader dataReader     = new StreamReader(alignFile);
            string       line           = "";
            int          scoreIdx       = -1;
            int          alignLenIdx    = -1;
            int          gapIdx         = -1;
            int          gapEndIdx      = -1;
            string       alignSequence1 = "";
            string       alignSequence2 = "";
            int          alignStart1    = -1;
            int          alignEnd1      = -1;
            int          alignStart2    = -1;
            int          alignEnd2      = -1;

            string[]     fields        = null;
            bool         chain1Started = false;
            bool         chain2Started = false;
            AlignSeqInfo alignInfo1    = new AlignSeqInfo();
            AlignSeqInfo alignInfo2    = new AlignSeqInfo();
            DataRow      dataRow       = null;
            Dictionary <string, string> entryAuthChainHash = new Dictionary <string, string> ();
            string dataLine = "";

            while ((line = dataReader.ReadLine()) != null)
            {
                if (line == "")
                {
                    continue;
                }

                if (line.IndexOf("Align") > -1 &&
                    line.Substring(0, "Align".Length) == "Align")
                {
                    // the fatcat format: Align 3v2dX.pdb 70 with 4garZ.pdb 58
                    fields = ParseHelper.SplitPlus(line, ' ');

                    // get the pdbid and chain id from the fileName
                    string[] entryChainFields1 = GetEntryChainFields(fields[1]);
                    string[] entryChainFields2 = GetEntryChainFields(fields[4]);

                    dataRow = FatcatTables.fatcatAlignTable.NewRow();

                    dataRow["QueryEntry"]  = entryChainFields1[0];
                    dataRow["QueryLength"] = fields[2];
                    dataRow["HitEntry"]    = entryChainFields2[0];
                    dataRow["HitLength"]   = fields[5];

                    alignInfo1.pdbId          = entryChainFields1[0];
                    alignInfo1.asymChainId    = entryChainFields1[1];
                    alignInfo1.chainId        = GetAuthorChainFromAsymID(alignInfo1.pdbId, alignInfo1.asymChainId, ref entryAuthChainHash);
                    dataRow["QueryChain"]     = alignInfo1.chainId;
                    dataRow["QueryAsymChain"] = alignInfo1.asymChainId;

                    alignInfo2.pdbId        = entryChainFields2[0];
                    alignInfo2.asymChainId  = entryChainFields2[1];
                    alignInfo2.chainId      = GetAuthorChainFromAsymID(alignInfo2.pdbId, alignInfo2.asymChainId, ref entryAuthChainHash);
                    dataRow["HitAsymChain"] = alignInfo2.asymChainId;
                    dataRow["HitChain"]     = alignInfo2.chainId;

                    alignSequence1 = "";
                    alignSequence2 = "";
                    chain1Started  = true;
                    chain2Started  = true;
                    dataLine       = "";
                }
                dataLine += (line + "\r\n");
                scoreIdx  = line.IndexOf("Score");
                if (scoreIdx > -1)
                {
                    // from opt-equ, equivalent positions
                    alignLenIdx          = line.IndexOf("align-len");
                    gapIdx               = line.IndexOf("gaps");
                    gapEndIdx            = line.LastIndexOf("(");
                    dataRow["Score"]     = line.Substring(scoreIdx + "Score".Length + 1, alignLenIdx - scoreIdx - "Score".Length - 1);
                    dataRow["Align_Len"] = line.Substring(alignLenIdx + "align-len".Length + 1,
                                                          gapIdx - alignLenIdx - "align-len".Length - 2);
                    dataRow["Gaps"] = line.Substring(gapIdx + "gaps".Length + 1, gapEndIdx - gapIdx - "gaps".Length - 2);
                }
                if (line.IndexOf("P-value") > -1)
                {
                    fields                = ParseHelper.SplitPlus(line, ' ');
                    dataRow["E_Value"]    = Convert.ToDouble(fields[1]);
                    dataRow["Identity"]   = fields[5].TrimEnd('%');
                    dataRow["Similarity"] = fields[7].TrimEnd('%');
                }
                if (line.IndexOf("Chain 1: ") > -1)
                {
                    fields = ParseHelper.SplitPlus(line, ' ');
                    if (chain1Started)
                    {
                        alignStart1   = ConvertSeqToInt(fields[2]);
                        chain1Started = false;
                    }
                    alignSequence1 += fields[3];
                    alignEnd1       = ConvertSeqToInt(fields[2]) + GetNonGapAlignedString(fields[3]).Length - 1;
                }
                if (line.IndexOf("Chain 2:") > -1)
                {
                    line   = line.Replace(':', ' ');
                    fields = ParseHelper.SplitPlus(line, ' ');
                    if (chain2Started)
                    {
                        alignStart2   = ConvertSeqToInt(fields[2]);
                        chain2Started = false;
                    }

                    alignSequence2 += fields[3];
                    alignEnd2       = ConvertSeqToInt(fields[2]) + GetNonGapAlignedString(fields[3]).Length - 1;
                }
                if (line.IndexOf("Note:") > -1)
                {
                    alignInfo1.alignStart    = alignStart1;
                    alignInfo1.alignEnd      = alignEnd1;
                    alignInfo1.alignSequence = alignSequence1;
                    alignInfo2.alignStart    = alignStart2;
                    alignInfo2.alignEnd      = alignEnd2;
                    alignInfo2.alignSequence = alignSequence2;

                    /*  if (IsAlignmentInDb(alignInfo1.pdbId, alignInfo1.chainId, alignInfo2.pdbId, alignInfo2.chainId))
                     *  {
                     *      continue;
                     *  }
                     */
                    // Convert aligned sequences to xml sequences
                    // add these residues with no-coordinate and no -Calpha to the alignment
                    // modified on August 31, 2012
                    try
                    {
                        seqConverter.AddDisorderResiduesToAlignment(ref alignInfo1, ref alignInfo2);
                    }
                    catch (Exception ex)
                    {
                        logWriter.WriteLine(alignInfo1.pdbId + alignInfo1.asymChainId + " " +
                                            alignInfo2.pdbId + alignInfo2.asymChainId + " filling out disorder residues failed: " + ex.Message);
                        logWriter.Flush();
                    }

                    dataRow["AlignmentLength"] = GetAlignmentLength(alignSequence1, alignSequence2);
                    dataRow["QuerySequence"]   = alignInfo1.alignSequence;
                    dataRow["HitSequence"]     = alignInfo2.alignSequence;
                    // modified on April 10, 2010. Since input files for FATCAT use XML sequential numbers
                    dataRow["QueryStart"] = alignInfo1.alignStart;
                    dataRow["QueryEnd"]   = alignInfo1.alignEnd;
                    dataRow["HitStart"]   = alignInfo2.alignStart;
                    dataRow["HitEnd"]     = alignInfo2.alignEnd;
                    //              DeleteDbData(alignInfo1.pdbId, alignInfo1.chainId, alignInfo2.pdbId, alignInfo2.chainId);

                    FatcatTables.fatcatAlignTable.Rows.Add(dataRow);

                    /*             try
                     *           {
                     *               dbInsert.InsertDataIntoDb(alignmentDbConnection, dataRow);
                     *           }
                     *           catch (Exception ex)
                     *           {
                     *               logWriter.WriteLine(alignFile + ": error " + ex.Message + "\r\n" + ParseHelper.FormatDataRow (dataRow) + " ");
                     *               logWriter.Flush();
                     *           }*/
                }
            }
            dataReader.Close();
            try
            {
                dbInsert.BatchInsertDataIntoDBtables(ProtCidSettings.alignmentDbConnection, FatcatTables.fatcatAlignTable);
                FatcatTables.fatcatAlignTable.Clear();
                // "too many open handles to database", try to close the handles before leave by commit or rollback
                dbUpdate.Update(ProtCidSettings.alignmentDbConnection, "Commit;");
            }
            catch (Exception ex)
            {
                logWriter.WriteLine(alignFile + ": error " + ex.Message + "\r\n" + ParseHelper.FormatDataRow(dataRow) + " ");
                logWriter.Flush();
            }
        }
Пример #8
0
 /// <summary>
 /// 
 /// </summary>
 /// <param name="pfamId"></param>
 public void CalculatePfamLigandComHmmPos(string pfamId)
 {
     if (ligandComHmmTable == null)
     {
         InitializeComHmmTable ();
     }
     string queryString = string.Format("Select PdbID, ChainDomainID, AsymChain, LigandChain, LigandSeqID, SeqID, HmmSeqID From PfamLigands" +
         " Where PfamID = '{0}' Order By PdbID, ChainDomainID, LigandChain;", pfamId);
     DataTable ligandInteractSeqTable = ProtCidSettings.protcidQuery.Query( queryString);
     List<string> entryList = new List<string>();
     string pdbId = "";
     foreach (DataRow seqRow in ligandInteractSeqTable.Rows)
     {
         pdbId = seqRow["PdbID"].ToString().TrimEnd();
         if (! entryList.Contains (pdbId))
         {
             entryList.Add(pdbId);
         }
     }
     int numOfComHmmPos = 0;
     double jScore = 0;
     for (int i = 0; i < entryList.Count; i++)
     {
         Dictionary<int, List<string>> chainDomainLigandsDictI = GetChainDomainLigandList(entryList[i], ligandInteractSeqTable);
         for (int j = i + 1; j < entryList.Count; j++)
         {
             Dictionary<int, List<string>> chainDomainLigandsDictJ = GetChainDomainLigandList(entryList[j], ligandInteractSeqTable);
             foreach (int chainDomainIdI in chainDomainLigandsDictI.Keys)
             {
                 List<string> ligandChainListI = chainDomainLigandsDictI[chainDomainIdI];
                 foreach (int chainDomainIdJ in chainDomainLigandsDictJ.Keys)
                 {
                     List<string> ligandChainListJ = chainDomainLigandsDictJ[chainDomainIdJ];
                     foreach (string ligandChainI in ligandChainListI)
                     {
                         List<int> hmmPosListI = GetLigandInteractingPfamHmmList(entryList[i], chainDomainIdI, ligandChainI, ligandInteractSeqTable);
                         foreach (string ligandChainJ in ligandChainListJ)
                         {
                             List<int> hmmPosListJ = GetLigandInteractingPfamHmmList(entryList[j], chainDomainIdJ, ligandChainJ, ligandInteractSeqTable);
                             jScore = CalculateJaccardScore(hmmPosListI, hmmPosListJ, out numOfComHmmPos);
                             if (numOfComHmmPos > 0)
                             {
                                 DataRow comHmmRow = ligandComHmmTable.NewRow();
                                 comHmmRow["PfamId"] = pfamId;
                                 comHmmRow["PdbID1"] = entryList[i];
                                 comHmmRow["ChainDomainID1"] = chainDomainIdI;
                                 comHmmRow["LigandChain1"] = ligandChainI;
                                 comHmmRow["PdbID2"] = entryList[j];
                                 comHmmRow["ChainDomainID2"] = chainDomainIdJ;
                                 comHmmRow["LigandChain2"] = ligandChainJ;
                                 comHmmRow["NumOfHmmSites1"] = hmmPosListI.Count;
                                 comHmmRow["NumOfHmmSites2"] = hmmPosListJ.Count;
                                 comHmmRow["NumOfComHmmSites"] = numOfComHmmPos;
                                 comHmmRow["Jscore"] = jScore;
                                 ligandComHmmTable.Rows.Add(comHmmRow);
                             } // if there is shared interacting Pfam HMM positions
                         }// end of ligandChainJ
                     } // end of ligandChainI
                 } // end of chainDomainJ
             } // end of chainDomainI
         } // end of entrylist J
         dbInsert.BatchInsertDataIntoDBtables(ProtCidSettings.protcidDbConnection, ligandComHmmTable);
         ligandComHmmTable.Clear();
     } // end of entryList I
 }
Пример #9
0
        /// <summary>
        /// update pdb_entry table
        /// </summary>
        public void CreatePdbEntryTable()
        {
            string queryString = "Select Upper(PdbEntry.PdbID) As pdb_id, title, descript as description, method as expdta, keywords, keywords_text, " +
                                 " cast(depositfiledate as varchar(10)) as pdb_ori_date, resolution From PdbEntry, BamEntryKeywords " +
                                 " where PdbEntry.PdbID = BamEntryKeywords.PdbID;";
            DataTable pdbEntryTable = dbQuery.Query(ProtCidSettings.pdbfamDbConnection, queryString);

            pdbEntryTable.TableName = "pdb_entry";
            dbInsert.BatchInsertDataIntoDBtables(bamDbConnect, pdbEntryTable);
        }