Exemplo n.º 1
0
 /// <summary>
 ///
 /// </summary>
 /// <param name="alignInfo"></param>
 /// <returns></returns>
 private string[] GetChainSequences(AlignSeqInfo alignInfo, DataTable seqTable)
 {
     DataRow[] seqRows = seqTable.Select(string.Format("PdbID = '{0}' AND AsymID = '{1}'",
                                                       alignInfo.pdbId, alignInfo.asymChainId));
     if (seqRows.Length == 0)
     {
         seqRows = seqTable.Select(string.Format("PdbID = '{0}' AND AuthorChain = '{1}'",
                                                 alignInfo.pdbId, alignInfo.chainId));
         if (seqRows.Length == 0)
         {
             return(null);
         }
     }
     string[] chainSequences = new string[2];
     chainSequences[0] = seqRows[0]["SequenceInCoord"].ToString();
     chainSequences[1] = seqRows[0]["Sequence"].ToString();
     return(chainSequences);
 }
Exemplo n.º 2
0
        /// <summary>
        /// Add residues with no-coordinates or no-Calpha to the alignment
        /// </summary>
        /// <param name="alignInfo1"></param>
        /// <param name="alignInfo2"></param>
        public void AddDisorderResiduesToAlignment(ref AlignSeqInfo alignInfo1, ref AlignSeqInfo alignInfo2)
        {
            List <string> pdbList = new List <string>  ();

            pdbList.Add(alignInfo1.pdbId);
            if (!pdbList.Contains(alignInfo2.pdbId))
            {
                pdbList.Add(alignInfo2.pdbId);
            }
            DataTable seqTable = GetSequenceTable(pdbList, asuSeqInfoTable);

            string[] chainSequences1 = GetChainSequences(alignInfo1, seqTable);
            string[] chainSequences2 = GetChainSequences(alignInfo2, seqTable);
            // no disorder residues in the middle of the chain
            if (!IsSequenceWithDisorderResidues(chainSequences1[0]) && !IsSequenceWithDisorderResidues(chainSequences2[0]))
            {
                return;
            }

            try
            {
                if (HasMissingResidues(alignInfo1.alignSequence, alignInfo1.alignStart, alignInfo1.alignEnd))
                {
                    int[] alignXmlSeqIndexes1 = GetXmlSeqIndexes(ref alignInfo1, chainSequences1[0]);
                    FillMissingResidues(ref alignInfo1, alignXmlSeqIndexes1, chainSequences1[1], ref alignInfo2);
                }
            }
            catch (Exception ex)
            {
                throw ex;
            }
            try
            {
                if (HasMissingResidues(alignInfo2.alignSequence, alignInfo2.alignStart, alignInfo2.alignEnd))
                {
                    int[] alignXmlSeqIndexes2 = GetXmlSeqIndexes(ref alignInfo2, chainSequences2[0]);
                    FillMissingResidues(ref alignInfo2, alignXmlSeqIndexes2, chainSequences2[1], ref alignInfo1);
                }
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// residue numbers to xml numbers
        /// </summary>
        /// <param name="alignInfo"></param>
        /// <param name="seqTable"></param>
        /// <returns></returns>
        private int[] GetXmlSeqIndexes(ref AlignSeqInfo alignInfo, string coordSequence)
        {
            string nonGapAlignString = GetNonGapSequenceString(alignInfo.alignSequence);

            int[] xmlSeqIndexes = GetXmlIndexes(nonGapAlignString, coordSequence);

            /*   if (xmlSeqNumbers.Length == 0)
             * {
             * since I used XML sequential numbers, no Blast needed.
             *     xmlSeqNumbers = MatchSequencesByBlast(nonGapAlignString, coordSequence);
             * }*/
            if (xmlSeqIndexes.Length == 0)
            {
                return(null);
            }
            alignInfo.alignStart = xmlSeqIndexes[0] + 1;  // add 1 on January 4, 2017
            alignInfo.alignEnd   = xmlSeqIndexes[xmlSeqIndexes.Length - 1] + 1;
            return(xmlSeqIndexes);
        }
Exemplo n.º 4
0
        /// <summary>
        /// parse one fatcat alignment output file
        /// insert data into database
        /// </summary>
        /// <param name="alignFile"></param>
        public void ParseFatcatAlignmentFile(string alignFile)
        {
            /* modified on April 5, 2010, change the input files of FATCAT from Guoli's regular files
             *       into my regular file with XMl sequential numbers and asymID
             *       instead of PDB sequence numbers.
             *       so no sequence nubmers conversion needed
             */
            if (logWriter == null)
            {
                logWriter = new StreamWriter("fatcatAlignmentsLog.txt", true);
            }
            logWriter.WriteLine(alignFile);
            StreamReader dataReader     = new StreamReader(alignFile);
            string       line           = "";
            int          scoreIdx       = -1;
            int          alignLenIdx    = -1;
            int          gapIdx         = -1;
            int          gapEndIdx      = -1;
            string       alignSequence1 = "";
            string       alignSequence2 = "";
            int          alignStart1    = -1;
            int          alignEnd1      = -1;
            int          alignStart2    = -1;
            int          alignEnd2      = -1;

            string[]     fields        = null;
            bool         chain1Started = false;
            bool         chain2Started = false;
            AlignSeqInfo alignInfo1    = new AlignSeqInfo();
            AlignSeqInfo alignInfo2    = new AlignSeqInfo();
            DataRow      dataRow       = null;
            Dictionary <string, string> entryAuthChainHash = new Dictionary <string, string> ();
            string dataLine = "";

            while ((line = dataReader.ReadLine()) != null)
            {
                if (line == "")
                {
                    continue;
                }

                if (line.IndexOf("Align") > -1 &&
                    line.Substring(0, "Align".Length) == "Align")
                {
                    // the fatcat format: Align 3v2dX.pdb 70 with 4garZ.pdb 58
                    fields = ParseHelper.SplitPlus(line, ' ');

                    // get the pdbid and chain id from the fileName
                    string[] entryChainFields1 = GetEntryChainFields(fields[1]);
                    string[] entryChainFields2 = GetEntryChainFields(fields[4]);

                    dataRow = FatcatTables.fatcatAlignTable.NewRow();

                    dataRow["QueryEntry"]  = entryChainFields1[0];
                    dataRow["QueryLength"] = fields[2];
                    dataRow["HitEntry"]    = entryChainFields2[0];
                    dataRow["HitLength"]   = fields[5];

                    alignInfo1.pdbId          = entryChainFields1[0];
                    alignInfo1.asymChainId    = entryChainFields1[1];
                    alignInfo1.chainId        = GetAuthorChainFromAsymID(alignInfo1.pdbId, alignInfo1.asymChainId, ref entryAuthChainHash);
                    dataRow["QueryChain"]     = alignInfo1.chainId;
                    dataRow["QueryAsymChain"] = alignInfo1.asymChainId;

                    alignInfo2.pdbId        = entryChainFields2[0];
                    alignInfo2.asymChainId  = entryChainFields2[1];
                    alignInfo2.chainId      = GetAuthorChainFromAsymID(alignInfo2.pdbId, alignInfo2.asymChainId, ref entryAuthChainHash);
                    dataRow["HitAsymChain"] = alignInfo2.asymChainId;
                    dataRow["HitChain"]     = alignInfo2.chainId;

                    alignSequence1 = "";
                    alignSequence2 = "";
                    chain1Started  = true;
                    chain2Started  = true;
                    dataLine       = "";
                }
                dataLine += (line + "\r\n");
                scoreIdx  = line.IndexOf("Score");
                if (scoreIdx > -1)
                {
                    // from opt-equ, equivalent positions
                    alignLenIdx          = line.IndexOf("align-len");
                    gapIdx               = line.IndexOf("gaps");
                    gapEndIdx            = line.LastIndexOf("(");
                    dataRow["Score"]     = line.Substring(scoreIdx + "Score".Length + 1, alignLenIdx - scoreIdx - "Score".Length - 1);
                    dataRow["Align_Len"] = line.Substring(alignLenIdx + "align-len".Length + 1,
                                                          gapIdx - alignLenIdx - "align-len".Length - 2);
                    dataRow["Gaps"] = line.Substring(gapIdx + "gaps".Length + 1, gapEndIdx - gapIdx - "gaps".Length - 2);
                }
                if (line.IndexOf("P-value") > -1)
                {
                    fields                = ParseHelper.SplitPlus(line, ' ');
                    dataRow["E_Value"]    = Convert.ToDouble(fields[1]);
                    dataRow["Identity"]   = fields[5].TrimEnd('%');
                    dataRow["Similarity"] = fields[7].TrimEnd('%');
                }
                if (line.IndexOf("Chain 1: ") > -1)
                {
                    fields = ParseHelper.SplitPlus(line, ' ');
                    if (chain1Started)
                    {
                        alignStart1   = ConvertSeqToInt(fields[2]);
                        chain1Started = false;
                    }
                    alignSequence1 += fields[3];
                    alignEnd1       = ConvertSeqToInt(fields[2]) + GetNonGapAlignedString(fields[3]).Length - 1;
                }
                if (line.IndexOf("Chain 2:") > -1)
                {
                    line   = line.Replace(':', ' ');
                    fields = ParseHelper.SplitPlus(line, ' ');
                    if (chain2Started)
                    {
                        alignStart2   = ConvertSeqToInt(fields[2]);
                        chain2Started = false;
                    }

                    alignSequence2 += fields[3];
                    alignEnd2       = ConvertSeqToInt(fields[2]) + GetNonGapAlignedString(fields[3]).Length - 1;
                }
                if (line.IndexOf("Note:") > -1)
                {
                    alignInfo1.alignStart    = alignStart1;
                    alignInfo1.alignEnd      = alignEnd1;
                    alignInfo1.alignSequence = alignSequence1;
                    alignInfo2.alignStart    = alignStart2;
                    alignInfo2.alignEnd      = alignEnd2;
                    alignInfo2.alignSequence = alignSequence2;

                    /*  if (IsAlignmentInDb(alignInfo1.pdbId, alignInfo1.chainId, alignInfo2.pdbId, alignInfo2.chainId))
                     *  {
                     *      continue;
                     *  }
                     */
                    // Convert aligned sequences to xml sequences
                    // add these residues with no-coordinate and no -Calpha to the alignment
                    // modified on August 31, 2012
                    try
                    {
                        seqConverter.AddDisorderResiduesToAlignment(ref alignInfo1, ref alignInfo2);
                    }
                    catch (Exception ex)
                    {
                        logWriter.WriteLine(alignInfo1.pdbId + alignInfo1.asymChainId + " " +
                                            alignInfo2.pdbId + alignInfo2.asymChainId + " filling out disorder residues failed: " + ex.Message);
                        logWriter.Flush();
                    }

                    dataRow["AlignmentLength"] = GetAlignmentLength(alignSequence1, alignSequence2);
                    dataRow["QuerySequence"]   = alignInfo1.alignSequence;
                    dataRow["HitSequence"]     = alignInfo2.alignSequence;
                    // modified on April 10, 2010. Since input files for FATCAT use XML sequential numbers
                    dataRow["QueryStart"] = alignInfo1.alignStart;
                    dataRow["QueryEnd"]   = alignInfo1.alignEnd;
                    dataRow["HitStart"]   = alignInfo2.alignStart;
                    dataRow["HitEnd"]     = alignInfo2.alignEnd;
                    //              DeleteDbData(alignInfo1.pdbId, alignInfo1.chainId, alignInfo2.pdbId, alignInfo2.chainId);

                    FatcatTables.fatcatAlignTable.Rows.Add(dataRow);

                    /*             try
                     *           {
                     *               dbInsert.InsertDataIntoDb(alignmentDbConnection, dataRow);
                     *           }
                     *           catch (Exception ex)
                     *           {
                     *               logWriter.WriteLine(alignFile + ": error " + ex.Message + "\r\n" + ParseHelper.FormatDataRow (dataRow) + " ");
                     *               logWriter.Flush();
                     *           }*/
                }
            }
            dataReader.Close();
            try
            {
                dbInsert.BatchInsertDataIntoDBtables(ProtCidSettings.alignmentDbConnection, FatcatTables.fatcatAlignTable);
                FatcatTables.fatcatAlignTable.Clear();
                // "too many open handles to database", try to close the handles before leave by commit or rollback
                dbUpdate.Update(ProtCidSettings.alignmentDbConnection, "Commit;");
            }
            catch (Exception ex)
            {
                logWriter.WriteLine(alignFile + ": error " + ex.Message + "\r\n" + ParseHelper.FormatDataRow(dataRow) + " ");
                logWriter.Flush();
            }
        }
Exemplo n.º 5
0
        /// <summary>
        /// Fill out missing residues in aligned sequence
        /// </summary>
        /// <param name="alignInfo1">alignment info for first aligned chain</param>
        /// <param name="alignXmlSeq1">xml sequential numbers for first aligned chain</param>
        /// <param name="seqString">xml sequence for the first aligned chain</param>
        /// <param name="alignInfo2">alignment info for the second chain</param>
        private void FillMissingResidues(ref AlignSeqInfo alignInfo1, int[] xmlSeqIndexes, string seqString, ref AlignSeqInfo alignInfo2)
        {
            Dictionary <int, int> xmlSeqAlignIdxHash = new Dictionary <int, int> ();
            int startAlignIdx = 0;
            int alignIdx      = -1;

            Array.Sort(xmlSeqIndexes);
            int seqIdx = 0;

            for (int i = 0; i < xmlSeqIndexes.Length; i++)
            {
                alignIdx = GetAlignIndex(alignInfo1.alignSequence, i, startAlignIdx, ref seqIdx);
                if (alignIdx < 0)
                {
                    throw new Exception("Get aligned index error for " + xmlSeqIndexes[i].ToString());
                }
                xmlSeqAlignIdxHash.Add(xmlSeqIndexes[i], alignIdx);
                startAlignIdx = alignIdx;
            }

            int endAlignIdx = -1;
            int xmlSeqDif   = 0;
            int alignSeqDif = 0;

            try
            {
                for (int i = 0; i < xmlSeqIndexes.Length - 1; i++)
                {
                    if (xmlSeqIndexes[i + 1] > xmlSeqIndexes[i] + 1)
                    {
                        xmlSeqDif     = xmlSeqIndexes[i + 1] - xmlSeqIndexes[i];
                        startAlignIdx = (int)xmlSeqAlignIdxHash[xmlSeqIndexes[i]];
                        endAlignIdx   = (int)xmlSeqAlignIdxHash[xmlSeqIndexes[i + 1]];
                        alignSeqDif   = endAlignIdx - startAlignIdx;
                        if (xmlSeqDif > alignSeqDif) // need inserted
                        {
                            // propogate the difference for following aligned residues
                            int dif = xmlSeqDif - alignSeqDif;
                            for (int j = i + 1; j < xmlSeqIndexes.Length; j++)
                            {
                                xmlSeqAlignIdxHash[xmlSeqIndexes[j]] = (int)xmlSeqAlignIdxHash[xmlSeqIndexes[j]] + dif;
                            }
                            string insertTemp = "";
                            int    k          = 0;
                            while (k < dif)
                            {
                                insertTemp += "-";
                                k++;
                            }
                            // place holder
                            alignInfo1.alignSequence = alignInfo1.alignSequence.Insert(startAlignIdx + 1, insertTemp);
                            alignInfo2.alignSequence = alignInfo2.alignSequence.Insert(startAlignIdx + 1, insertTemp);
                        }
                        // remove gaps first
                        // fill out by real residue names from asymunit
                        string missingResidueString = seqString.Substring(xmlSeqIndexes[i] + 1, xmlSeqDif - 1);
                        alignInfo1.alignSequence = alignInfo1.alignSequence.Remove(startAlignIdx + 1, missingResidueString.Length);
                        alignInfo1.alignSequence = alignInfo1.alignSequence.Insert(startAlignIdx + 1, missingResidueString);
                    }
                }
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }