Exemplo n.º 1
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="alignInfo"></param>
        private void FindEndPosition(ref AlignSeqInfo alignInfo)
        {
            string    noGapAlignSequence  = GetNonGapAlignedString(alignInfo.alignSequence);
            string    sequenceInCoord     = GetChainSequenceInCoordinates(alignInfo.pdbId, Convert.ToInt32(alignInfo.asymChainId));
            string    noGapCoordSeq       = "";
            Hashtable coordSeqIdSeqIdHash = GetCoordSeqToSeqHash(sequenceInCoord, out noGapCoordSeq, alignInfo.alignStart);
            int       alignStartIdx       = noGapCoordSeq.IndexOf(noGapAlignSequence);

            if (alignStartIdx > -1)
            {
                int alignEndIdx = alignStartIdx + noGapAlignSequence.Length;
                // get the XML sequential id
                int alignEnd = (int)coordSeqIdSeqIdHash[alignEndIdx];
                alignInfo.alignEnd = alignEnd;
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// the aligned seqeuence only for those with coordinates
        /// Fatcat don't provide the end position which is in the PDB file
        /// have to find the real start and end positions in XML residue sequential number
        /// </summary>
        /// <param name="pdbId"></param>
        /// <param name="asymChain"></param>
        /// <param name="startPos"></param>
        /// <param name="alignInfo"></param>
        private void FindStartEndPosition(string pdbId, int entityID, int startPos, ref AlignSeqInfo alignInfo)
        {
            string    noGapAlignSequence  = GetNonGapAlignedString(alignInfo.alignSequence);
            string    sequenceInCoord     = GetChainSequenceInCoordinates(pdbId, entityID);
            string    noGapCoordSeq       = "";
            Hashtable coordSeqIdSeqIdHash = GetCoordSeqToSeqHash(sequenceInCoord, out noGapCoordSeq, startPos);
            int       alignStartIdx       = noGapCoordSeq.IndexOf(noGapAlignSequence);

            if (alignStartIdx > -1)
            {
                alignInfo.alignStart = (int)coordSeqIdSeqIdHash[alignStartIdx + 1];
                int alignEndIdx = alignStartIdx + noGapAlignSequence.Length;
                // get the XML sequential id
                int alignEnd = (int)coordSeqIdSeqIdHash[alignEndIdx];
                alignInfo.alignEnd = alignEnd;
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// parse one fatcat alignment output file
        /// insert data into database
        /// </summary>
        /// <param name="alignFile"></param>
        public void ParsePfamFatcatAlignmentFile(string alignFile, bool isUpdate)
        {
            StreamReader dataReader     = new StreamReader(alignFile);
            string       line           = "";
            int          scoreIdx       = -1;
            int          alignLenIdx    = -1;
            int          gapIdx         = -1;
            int          gapEndIdx      = -1;
            string       alignSequence1 = "";
            string       alignSequence2 = "";
            int          alignStart1    = -1;
            int          alignEnd1      = -1;
            int          alignStart2    = -1;
            int          alignEnd2      = -1;

            string[]     fields        = null;
            bool         chain1Started = false;
            bool         chain2Started = false;
            AlignSeqInfo alignInfo1    = new AlignSeqInfo();
            AlignSeqInfo alignInfo2    = new AlignSeqInfo();
            DataRow      dataRow       = FatcatTables.fatcatAlignTable.NewRow();
            string       dataLine      = "";
            // the asymchain and startpos for this domain
            Hashtable domainChainInfoHash = new Hashtable();

            while ((line = dataReader.ReadLine()) != null)
            {
                if (line == "")
                {
                    continue;
                }

                try
                {
                    dataLine += (line + "\r\n");
                    if (line.IndexOf("Align") > -1 &&
                        line.Substring(0, "Align".Length) == "Align")
                    {
                        fields = ParseHelper.SplitPlus(line, ' ');
                        // domain 1
                        string[] domainInfo1 = ParseDomainName(fields[1], ref domainChainInfoHash);
                        dataRow["QueryEntry"]       = domainInfo1[0];
                        dataRow["QueryDomainID"]    = domainInfo1[1];
                        dataRow["QueryEntity"]      = domainInfo1[2];
                        dataRow["QueryDomainStart"] = domainInfo1[3];
                        dataRow["QueryLength"]      = fields[2];
                        // domain 2
                        string[] domainInfo2 = ParseDomainName(fields[4], ref domainChainInfoHash);
                        dataRow["HitEntry"]       = domainInfo2[0];
                        dataRow["HitDomainID"]    = domainInfo2[1];
                        dataRow["HitEntity"]      = domainInfo2[2];
                        dataRow["HitDomainStart"] = domainInfo2[3];
                        dataRow["HitLength"]      = fields[5];
                        alignInfo1.pdbId          = fields[1].Substring(0, 4);
                        alignInfo1.asymChainId    = domainInfo1[2];
                        alignInfo2.pdbId          = fields[4].Substring(0, 4);
                        alignInfo2.asymChainId    = domainInfo2[2];
                        alignSequence1            = "";
                        alignSequence2            = "";
                        chain1Started             = true;
                        chain2Started             = true;
                    }
                    scoreIdx = line.IndexOf("Score");
                    if (scoreIdx > -1)
                    {
                        // from opt-equ, equivalent positions
                        //	dataRow["AlignmentLength"] =
                        alignLenIdx          = line.IndexOf("align-len");
                        gapIdx               = line.IndexOf("gaps");
                        gapEndIdx            = line.LastIndexOf("(");
                        dataRow["Score"]     = line.Substring(scoreIdx + "Score".Length + 1, alignLenIdx - scoreIdx - "Score".Length - 1);
                        dataRow["Align_Len"] = line.Substring(alignLenIdx + "align-len".Length + 1,
                                                              gapIdx - alignLenIdx - "align-len".Length - 2);
                        dataRow["Gaps"] = line.Substring(gapIdx + "gaps".Length + 1, gapEndIdx - gapIdx - "gaps".Length - 2);
                    }
                    if (line.IndexOf("P-value") > -1)
                    {
                        fields                = ParseHelper.SplitPlus(line, ' ');
                        dataRow["E_Value"]    = Convert.ToDouble(fields[1]);
                        dataRow["Identity"]   = fields[5].TrimEnd('%');
                        dataRow["Similarity"] = fields[7].TrimEnd('%');
                    }
                    if (line.IndexOf("Chain 1:") > -1)
                    {
                        // contain alignStart and aligned sequence
                        fields = ParseChainAlignSeqLine(line);
                        if (chain1Started)
                        {
                            alignStart1   = ConvertSeqToInt(fields[0]);
                            chain1Started = false;
                        }
                        alignSequence1 += fields[1];
                        alignEnd1       = ConvertSeqToInt(fields[0]) + GetNonGapAlignedString(fields[1]).Length - 1;
                    }
                    if (line.IndexOf("Chain 2:") > -1)
                    {
                        fields = ParseChainAlignSeqLine(line);
                        if (chain2Started)
                        {
                            alignStart2   = ConvertSeqToInt(fields[0]);
                            chain2Started = false;
                        }

                        alignSequence2 += fields[1];
                        alignEnd2       = ConvertSeqToInt(fields[0]) + GetNonGapAlignedString(fields[1]).Length - 1;
                    }
                    if (line.IndexOf("Note:") > -1)
                    {
                        if (alignSequence1 == "")
                        {
                            continue;
                        }
                        alignInfo1.alignStart    = alignStart1;
                        alignInfo1.alignEnd      = alignEnd1;
                        alignInfo1.alignSequence = alignSequence1;
                        if (alignInfo1.alignStart < 0)
                        {
                            FindStartEndPosition(dataRow["QueryEntry"].ToString(),
                                                 Convert.ToInt32(dataRow["QueryEntity"].ToString()),
                                                 Convert.ToInt16(dataRow["QueryDomainStart"].ToString()), ref alignInfo1);
                        }
                        alignInfo2.alignStart    = alignStart2;
                        alignInfo2.alignEnd      = alignEnd2;
                        alignInfo2.alignSequence = alignSequence2;
                        if (alignInfo2.alignStart < 0)
                        {
                            FindStartEndPosition(dataRow["HitEntry"].ToString(),
                                                 Convert.ToInt32(dataRow["HitEntity"].ToString()),
                                                 Convert.ToInt16(dataRow["HitDomainStart"].ToString()), ref alignInfo2);
                        }

                        dataRow["AlignmentLength"] = GetAlignmentLength(alignSequence1, alignSequence2);
                        dataRow["QuerySequence"]   = alignInfo1.alignSequence;
                        dataRow["HitSequence"]     = alignInfo2.alignSequence;
                        dataRow["QueryStart"]      = alignInfo1.alignStart;
                        dataRow["QueryEnd"]        = alignInfo1.alignEnd;
                        dataRow["HitStart"]        = alignInfo2.alignStart;
                        dataRow["HitEnd"]          = alignInfo2.alignEnd;
                        // delete the previous data
                        if (isUpdate)
                        {
                            DeletePfamAlignment(dataRow["QueryEntry"].ToString(), Convert.ToInt64(dataRow["QueryDomainID"].ToString()),
                                                dataRow["HitEntry"].ToString(), Convert.ToInt64(dataRow["HitDomainID"].ToString()));
                        }

                        if (FatcatTables.fatcatAlignTable.Columns.Contains("QuerySeqNumbers"))
                        {
                            AddQueryHitSeqNumbers(dataRow);
                        }
                        dbInsert.InsertDataIntoDb(AppSettings.alignmentDbConnection, dataRow);
                        alignSequence1 = "";
                        alignSequence2 = "";
                        dataLine       = "";
                    }
                    if (line.IndexOf("#Time used") > -1)
                    {
                        break;
                    }
                }
                catch (Exception ex)
                {
                    logWriter.WriteLine(ex.Message);
                    logWriter.WriteLine(line);
                    logWriter.WriteLine(dataLine);
                    logWriter.Flush();
                    dataLine = "";
                }
            }
            dataReader.Close();
        }