/// <summary> /// /// </summary> public void CompressBigRelations() { int[] relSeqIds = { 2055, 2124, 2178, 11619, 11642, 14647, 14811 }; ProtCidSettings.dirSettings.seqFastaPath = ProtCidSettings.dirSettings.seqFastaPath.Replace("\\fasta", "\\DomainFasta"); webFastaFileDir = ProtCidSettings.dirSettings.seqFastaPath.Replace("\\DomainFasta", "\\WebDomainFasta"); List <string> seqFastaFileList = new List <string>(); string relationSeqFile = ""; string srcRelSeqFile = ""; string destRelSeqFile = ""; // string relationFolder = ""; string relationName = ""; foreach (int relSeqId in relSeqIds) { seqFastaFileList.Clear(); string[] relFastaFiles = Directory.GetFiles(ProtCidSettings.dirSettings.seqFastaPath, "Cluster" + relSeqId.ToString() + "A_*.fasta"); foreach (string fastaFile in relFastaFiles) { FileInfo fileInfo = new FileInfo(fastaFile); seqFastaFileList.Add(fileInfo.Name); } relFastaFiles = Directory.GetFiles(ProtCidSettings.dirSettings.seqFastaPath, "Cluster" + relSeqId.ToString() + "B_*.fasta"); foreach (string fastaFile in relFastaFiles) { FileInfo fileInfo = new FileInfo(fastaFile); seqFastaFileList.Add(fileInfo.Name); } seqFastaFileList.Add("Group" + relSeqId.ToString() + "A.fasta"); if (File.Exists(Path.Combine(ProtCidSettings.dirSettings.seqFastaPath, "Group" + relSeqId.ToString() + "B.fasta"))) { seqFastaFileList.Add("Group" + relSeqId.ToString() + "B.fasta"); } string[] seqFastaFiles = new string[seqFastaFileList.Count]; seqFastaFileList.CopyTo(seqFastaFiles); // relationFolder = MoveSeqFastaFilesToGroupFolder (seqFastaFiles, ProtCidSettings.dirSettings.seqFastaPath, relSeqId); // relationSeqFile = TarFastaFilesOnFolder (relSeqId, relationFolder); relationName = DownloadableFileName.GetDomainRelationName(relSeqId); string seqTarFile = "Seq_" + relationName + ".tar.gz"; fileCompress.RunTar(seqTarFile, seqFastaFiles, ProtCidSettings.dirSettings.seqFastaPath, true); // move the tar file to the web folder srcRelSeqFile = Path.Combine(ProtCidSettings.dirSettings.seqFastaPath, relationSeqFile); destRelSeqFile = Path.Combine(webFastaFileDir, relationSeqFile); if (File.Exists(destRelSeqFile)) { File.Delete(destRelSeqFile); } File.Move(srcRelSeqFile, destRelSeqFile); } }
/// <summary> /// /// </summary> public string GenerateDomainGroupIdFileNameMatch() { string domainGroupIdNameLsFile = Path.Combine(ProtCidSettings.dirSettings.interfaceFilePath, "DomainGroupIdNameMap.txt"); StreamWriter lsFileWriter = new StreamWriter(domainGroupIdNameLsFile); string queryString = "Select Distinct RelSeqID From PfamDomainFamilyRelation;"; DataTable domainGroupTable = ProtCidSettings.protcidQuery.Query(queryString); int relSeqId = 0; string groupName = ""; foreach (DataRow domainGroupRow in domainGroupTable.Rows) { relSeqId = Convert.ToInt32(domainGroupRow["RelSeqID"].ToString()); groupName = DownloadableFileName.GetDomainRelationName(relSeqId); lsFileWriter.Write(relSeqId + "\t" + groupName + "\n"); } lsFileWriter.Close(); return(domainGroupIdNameLsFile); }
/// <summary> /// /// </summary> public string GenerateChainGroupIdFileNameMatch() { string chainGroupIdNameLsFile = Path.Combine(ProtCidSettings.dirSettings.interfaceFilePath, "ChainGroupIdNameMap.txt"); StreamWriter lsFileWriter = new StreamWriter(chainGroupIdNameLsFile); string queryString = "Select Distinct SuperGroupSeqID From PfamSuperGroups;"; DataTable chainGroupTable = ProtCidSettings.protcidQuery.Query(queryString); int chainGroupId = 0; string groupName = ""; foreach (DataRow chainGroupRow in chainGroupTable.Rows) { chainGroupId = Convert.ToInt32(chainGroupRow["SuperGroupSeqID"].ToString()); groupName = DownloadableFileName.GetChainGroupTarGzFileName(chainGroupId); lsFileWriter.Write(chainGroupId + "\t" + groupName + "\n"); } lsFileWriter.Close(); return(chainGroupIdNameLsFile); }
/// <summary> /// /// </summary> /// <param name="superGroupId"></param> /// <param name="clusterReverseFilesHash"></param> public void CompressGroupClustersFiles(int superGroupId) { ProtCidSettings.progressInfo.currentOperationNum++; ProtCidSettings.progressInfo.currentStepNum++; ProtCidSettings.progressInfo.currentFileName = superGroupId.ToString(); int[] groupClusters = GetClustersForGroup(superGroupId); string groupName = DownloadableFileName.GetChainGroupTarGzFileName(superGroupId); List <string> clusterFileList = new List <string> (); string clusterFile = ""; foreach (int clusterId in groupClusters) { ProtCidSettings.progressInfo.currentFileName = superGroupId.ToString() + "_" + clusterId.ToString(); try { clusterFile = CompressGroupClusterInterfaceFiles(superGroupId, clusterId, groupName); if (clusterFile != "") { clusterFileList.Add(clusterFile); } } catch (Exception ex) { ProtCidSettings.progressInfo.progStrQueue.Enqueue(superGroupId.ToString() + "_" + clusterId.ToString() + "Compress cluster interface files errors: " + ex.Message); ProtCidSettings.logWriter.WriteLine(superGroupId.ToString() + "_" + clusterId.ToString() + "Compress cluster interface files errors: " + ex.Message); ProtCidSettings.logWriter.Flush(); } } // tar cluster files to group string groupTarFileName = groupName + ".tar"; fileCompress.RunTar(groupTarFileName, clusterFileList.ToArray(), clusterFileDir, false); }
/// <summary> /// /// </summary> /// <param name="superGroupIds"></param> public void UpdateClusterFastaFiles(int[] superGroupIds) { webFastaFileDir = ProtCidSettings.dirSettings.seqFastaPath.Replace("\\ChainFasta", "\\webChainFasta"); if (!Directory.Exists(webFastaFileDir)) { Directory.CreateDirectory(webFastaFileDir); } // string[] allGroupFastaFiles = Directory.GetFiles(ProtCidSettings.dirSettings.seqFastaPath, "group*.fasta"); string groupIdWithType = ""; string clusterInfoFile = ""; List <string> fastaFileList = new List <string> (); string tarFile = ""; ProtCidSettings.progressInfo.ResetCurrentProgressInfo(); ProtCidSettings.progressInfo.totalOperationNum = superGroupIds.Length; ProtCidSettings.progressInfo.totalStepNum = superGroupIds.Length; ProtCidSettings.progressInfo.currentOperationLabel = "Tar Seq Files"; foreach (int groupId in superGroupIds) { ProtCidSettings.progressInfo.currentFileName = groupId.ToString(); ProtCidSettings.progressInfo.currentStepNum++; ProtCidSettings.progressInfo.currentOperationNum++; fastaFileList.Clear(); string[] groupFastaFiles = GetGroupSeqFastaFiles(groupId); foreach (string groupFastaFile in groupFastaFiles) { groupIdWithType = GetGroupIdWithTypeFromFileName(groupFastaFile); fastaFileList.Add("Group" + groupIdWithType + ".fasta"); try { clusterInfoFile = Path.Combine(ProtCidSettings.dirSettings.seqFastaPath, "Cluster" + groupIdWithType + ".txt"); Dictionary <string, string> groupEntitySequenceHash = null; Dictionary <string, string> groupEntityAnnotationHash = null; Dictionary <int, string[]> clusterEntityHash = GetClusterEntityHash(clusterInfoFile); ReadEntitySequenceHash(groupFastaFile, out groupEntitySequenceHash, out groupEntityAnnotationHash); foreach (int clusterId in clusterEntityHash.Keys) { string[] clusterEntities = (string[])clusterEntityHash[clusterId]; string clusterFastaFile = WriteClusterEntitySequencesToFile(groupIdWithType, clusterId, clusterEntities, groupEntitySequenceHash, groupEntityAnnotationHash); fastaFileList.Add("Cluster" + groupIdWithType + "_" + clusterId.ToString() + ".fasta"); } } catch (Exception ex) { logWriter.WriteLine("Output cluster fasta files errors: " + groupFastaFile + " " + ex.Message); logWriter.Flush(); } } string[] fastaFiles = new string[fastaFileList.Count]; fastaFileList.CopyTo(fastaFiles); string chainGroupName = DownloadableFileName.GetChainGroupTarGzFileName(groupId); try { DeleteObsoleteWebFastaFiles(chainGroupName); // tarFile = TarFastaFiles(groupId, fastaFiles); tarFile = "Seq_" + chainGroupName + ".tar.gz"; tarFile = fileCompress.RunTar(tarFile, fastaFiles, ProtCidSettings.dirSettings.seqFastaPath, true); File.Move(Path.Combine(ProtCidSettings.dirSettings.seqFastaPath, tarFile), Path.Combine(webFastaFileDir, tarFile)); } catch (Exception ex) { logWriter.WriteLine("Tar and move file error for " + tarFile + " : " + ex.Message); logWriter.Flush(); } } ProtCidSettings.progressInfo.progStrQueue.Enqueue("Tar sequence files done!"); ProtCidSettings.logWriter.WriteLine("Tar sequence files done!"); }
/// <summary> /// /// </summary> public void PrintClusterFastaFiles() { string[] allGroupFastaFiles = Directory.GetFiles(ProtCidSettings.dirSettings.seqFastaPath, "group*.fasta"); Dictionary <int, List <string> > groupFastaFilesHash = GetGroupIDsFromFiles(allGroupFastaFiles); string groupIdWithType = ""; string clusterInfoFile = ""; List <string> fastaFileList = new List <string> (); List <int> groupIdList = new List <int> (groupFastaFilesHash.Keys); groupIdList.Sort(); string tarFile = ""; ProtCidSettings.progressInfo.ResetCurrentProgressInfo(); ProtCidSettings.progressInfo.currentOperationLabel = "Tar Seq Files"; ProtCidSettings.progressInfo.totalOperationNum = groupIdList.Count; ProtCidSettings.progressInfo.totalStepNum = groupIdList.Count; foreach (int chainGroupId in groupIdList) { ProtCidSettings.progressInfo.currentFileName = chainGroupId.ToString(); ProtCidSettings.progressInfo.currentOperationNum++; ProtCidSettings.progressInfo.currentStepNum++; fastaFileList.Clear(); foreach (string groupFastaFile in groupFastaFilesHash[chainGroupId]) { groupIdWithType = GetGroupIdWithTypeFromFileName(groupFastaFile); fastaFileList.Add("Group" + groupIdWithType + ".fasta"); try { clusterInfoFile = Path.Combine(ProtCidSettings.dirSettings.seqFastaPath, "Cluster" + groupIdWithType + ".txt"); Dictionary <string, string> groupEntitySequenceHash = null; Dictionary <string, string> groupEntityAnnotationHash = null; Dictionary <int, string[]> clusterEntityHash = GetClusterEntityHash(clusterInfoFile); ReadEntitySequenceHash(groupFastaFile, out groupEntitySequenceHash, out groupEntityAnnotationHash); foreach (int clusterId in clusterEntityHash.Keys) { string[] clusterEntities = (string[])clusterEntityHash[clusterId]; string clusterFastaFile = WriteClusterEntitySequencesToFile(groupIdWithType, clusterId, clusterEntities, groupEntitySequenceHash, groupEntityAnnotationHash); fastaFileList.Add("Cluster" + groupIdWithType + "_" + clusterId.ToString() + ".fasta"); } } catch (Exception ex) { logWriter.WriteLine("Output cluster fasta files errors: " + groupFastaFile + " " + ex.Message); logWriter.Flush(); } } string[] fastaFiles = new string[fastaFileList.Count]; fastaFileList.CopyTo(fastaFiles); string chainGroupName = DownloadableFileName.GetChainGroupTarGzFileName(chainGroupId); try { string fastaTarFile = "Seq_" + chainGroupName + ".tar.gz"; /* if (fastaFiles.Length > 100) * { * string groupFolder = MoveSeqFastaFilesToGroupFolder(fastaFiles, ProtCidSettings.dirSettings.seqFastaPath, groupId); * tarFile = TarFastaFilesOnFolder(groupId, groupFolder); * } * else * { * tarFile = TarFastaFiles(groupId, fastaFiles); * }*/ fastaTarFile = fileCompress.RunTar(fastaTarFile, fastaFiles, ProtCidSettings.dirSettings.seqFastaPath, true); File.Move(Path.Combine(ProtCidSettings.dirSettings.seqFastaPath, fastaTarFile), Path.Combine(webFastaFileDir, fastaTarFile)); } catch (Exception ex) { logWriter.WriteLine("Tar and move file error for " + tarFile + " : " + ex.Message); logWriter.Flush(); } } ProtCidSettings.progressInfo.progStrQueue.Enqueue("Tar sequence files done!"); }
/// <summary> /// /// </summary> public void UpdateClusterDomainSequencesForDebug(int[] relSeqIds) { ProtCidSettings.dirSettings.seqFastaPath = ProtCidSettings.dirSettings.seqFastaPath.Replace("\\fasta", "\\DomainFasta"); if (!Directory.Exists(ProtCidSettings.dirSettings.seqFastaPath)) { Directory.CreateDirectory(ProtCidSettings.dirSettings.seqFastaPath); } webFastaFileDir = ProtCidSettings.dirSettings.seqFastaPath.Replace("\\DomainFasta", "\\WebDomainFasta"); if (!Directory.Exists(webFastaFileDir)) { Directory.CreateDirectory(webFastaFileDir); } StreamWriter lsFileWriter = new StreamWriter(Path.Combine(webFastaFileDir, "relSeq-newls.txt"), true); string relationSeqFile = ""; ProtCidSettings.progressInfo.ResetCurrentProgressInfo(); ProtCidSettings.progressInfo.totalOperationNum = relSeqIds.Length; ProtCidSettings.progressInfo.totalStepNum = relSeqIds.Length; string srcRelSeqFile = ""; string destRelSeqFile = ""; string relationName = ""; foreach (int relSeqId in relSeqIds) { ProtCidSettings.progressInfo.currentFileName = relSeqId.ToString(); ProtCidSettings.progressInfo.currentOperationNum++; ProtCidSettings.progressInfo.currentStepNum++; try { string[] fastaSeqFiles = GetRelationClusterSeqFastaFiles(relSeqId); relationName = DownloadableFileName.GetDomainRelationName(relSeqId); relationSeqFile = "Seq_" + relationName + ".tar.gz"; fileCompress.RunTar(relationSeqFile, fastaSeqFiles, ProtCidSettings.dirSettings.seqFastaPath, true); lsFileWriter.WriteLine(relationSeqFile); lsFileWriter.Flush(); // move the tar file to the web folder srcRelSeqFile = Path.Combine(ProtCidSettings.dirSettings.seqFastaPath, relationSeqFile); destRelSeqFile = Path.Combine(webFastaFileDir, relationSeqFile); if (File.Exists(destRelSeqFile)) { File.Delete(destRelSeqFile); } File.Move(srcRelSeqFile, destRelSeqFile); } catch (Exception ex) { ProtCidSettings.progressInfo.progStrQueue.Enqueue(relSeqId.ToString() + " Writing sequences to fasta files errors: " + ex.Message); ProtCidSettings.logWriter.WriteLine(relSeqId.ToString() + " Writing sequences to fasta files errors: " + ex.Message); ProtCidSettings.logWriter.Flush(); } } lsFileWriter.Close(); ProtCidSettings.progressInfo.progStrQueue.Enqueue("Done!"); }
/// <summary> /// /// </summary> public void PrintClusterDomainSequences() { ProtCidSettings.dirSettings.seqFastaPath = ProtCidSettings.dirSettings.seqFastaPath.Replace("\\fasta", "\\DomainFasta"); if (!Directory.Exists(ProtCidSettings.dirSettings.seqFastaPath)) { Directory.CreateDirectory(ProtCidSettings.dirSettings.seqFastaPath); } webFastaFileDir = ProtCidSettings.dirSettings.seqFastaPath.Replace("\\DomainFasta", "\\WebDomainFasta"); if (!Directory.Exists(webFastaFileDir)) { Directory.CreateDirectory(webFastaFileDir); } StreamWriter lsFileWriter = new StreamWriter(Path.Combine(webFastaFileDir, "relSeq-ls.txt"), true); string relationSeqFile = ""; string queryString = "Select Distinct RelSeqID From PfamDomainInterfaceCluster;"; DataTable relSeqIdTable = ProtCidSettings.protcidQuery.Query(queryString); int relSeqId = 0; ProtCidSettings.progressInfo.ResetCurrentProgressInfo(); ProtCidSettings.progressInfo.totalOperationNum = relSeqIdTable.Rows.Count; ProtCidSettings.progressInfo.totalStepNum = relSeqIdTable.Rows.Count; queryString = "Select PdbID, EntityID, AsymID, Sequence From AsymUnit WHere PolymerType = 'polypeptide';"; DataTable entitySeqTable = ProtCidSettings.pdbfamQuery.Query(queryString); queryString = "Select PdbID, DomainID, EntityID, SeqStart, SeqEnd, Pfam_ID, Pfam_Acc From PdbPfam;"; DataTable domainTable = ProtCidSettings.pdbfamQuery.Query(queryString); string srcRelSeqFile = ""; string destRelSeqFile = ""; string relationName = ""; foreach (DataRow relSeqIdRow in relSeqIdTable.Rows) { relSeqId = Convert.ToInt32(relSeqIdRow["RelSeqID"].ToString()); relationName = DownloadableFileName.GetDomainRelationName(relSeqId); ProtCidSettings.progressInfo.currentFileName = relSeqId.ToString(); ProtCidSettings.progressInfo.currentOperationNum++; ProtCidSettings.progressInfo.currentStepNum++; try { relationSeqFile = "Seq_" + relationName + ".tar.gz"; if (!File.Exists(relationSeqFile)) { string[] fastaSeqFiles = PrintRelationClusterSeqFasta(relSeqId, entitySeqTable, domainTable); relationSeqFile = fileCompress.RunTar(relationSeqFile, fastaSeqFiles, ProtCidSettings.dirSettings.seqFastaPath, true); } lsFileWriter.WriteLine(relationSeqFile); lsFileWriter.Flush(); // move the tar file to the web folder srcRelSeqFile = Path.Combine(ProtCidSettings.dirSettings.seqFastaPath, relationSeqFile); destRelSeqFile = Path.Combine(webFastaFileDir, relationSeqFile); if (File.Exists(destRelSeqFile)) { File.Delete(destRelSeqFile); } File.Move(srcRelSeqFile, destRelSeqFile); } catch (Exception ex) { ProtCidSettings.progressInfo.progStrQueue.Enqueue(relSeqId.ToString() + " Writing sequences to fasta files errors: " + ex.Message); ProtCidSettings.logWriter.WriteLine(relSeqId.ToString() + " Writing sequences to fasta files errors: " + ex.Message); ProtCidSettings.logWriter.Flush(); } } lsFileWriter.Close(); ProtCidSettings.progressInfo.progStrQueue.Enqueue("Done!"); }
/// <summary> /// /// </summary> public void UpdateClusterDomainSequences(int[] relSeqIds) { ProtCidSettings.dirSettings.seqFastaPath = ProtCidSettings.dirSettings.seqFastaPath.Replace("\\fasta", "\\DomainFasta"); if (!Directory.Exists(ProtCidSettings.dirSettings.seqFastaPath)) { Directory.CreateDirectory(ProtCidSettings.dirSettings.seqFastaPath); } webFastaFileDir = ProtCidSettings.dirSettings.seqFastaPath.Replace("\\DomainFasta", "\\WebDomainFasta"); if (Directory.Exists(webFastaFileDir)) { Directory.Delete(webFastaFileDir, true); } Directory.CreateDirectory(webFastaFileDir); StreamWriter lsFileWriter = new StreamWriter(Path.Combine(webFastaFileDir, "relSeq-newls.txt"), true); string relationSeqFile = ""; ProtCidSettings.progressInfo.ResetCurrentProgressInfo(); ProtCidSettings.progressInfo.totalOperationNum = relSeqIds.Length; ProtCidSettings.progressInfo.totalStepNum = relSeqIds.Length; string queryString = "Select PdbID, EntityID, AsymID, Sequence From AsymUnit WHere PolymerType = 'polypeptide';"; DataTable entitySeqTable = ProtCidSettings.pdbfamQuery.Query(queryString); queryString = "Select PdbID, DomainID, EntityID, SeqStart, SeqEnd, Pfam_ID, Pfam_Acc From PdbPfam;"; DataTable domainTable = ProtCidSettings.pdbfamQuery.Query(queryString); string srcRelSeqFile = ""; string destRelSeqFile = ""; string relationName = ""; foreach (int relSeqId in relSeqIds) { ProtCidSettings.progressInfo.currentFileName = relSeqId.ToString(); ProtCidSettings.progressInfo.currentOperationNum++; ProtCidSettings.progressInfo.currentStepNum++; try { DeleteRelationSeqFiles(relSeqId); // delete the old files string[] fastaSeqFiles = PrintRelationClusterSeqFasta(relSeqId, entitySeqTable, domainTable); // the length of parameters of Cmd.exe cannot be longer than 8191 relationName = DownloadableFileName.GetDomainRelationName(relSeqId); relationSeqFile = "Seq_" + relationName + ".tar.gz"; fileCompress.RunTar(relationSeqFile, fastaSeqFiles, ProtCidSettings.dirSettings.seqFastaPath, true); lsFileWriter.WriteLine(relationSeqFile); lsFileWriter.Flush(); // move the tar file to the web folder srcRelSeqFile = Path.Combine(ProtCidSettings.dirSettings.seqFastaPath, relationSeqFile); destRelSeqFile = Path.Combine(webFastaFileDir, relationSeqFile); if (File.Exists(destRelSeqFile)) { File.Delete(destRelSeqFile); } File.Move(srcRelSeqFile, destRelSeqFile); } catch (Exception ex) { ProtCidSettings.progressInfo.progStrQueue.Enqueue(relSeqId.ToString() + " Writing sequences to fasta files errors: " + ex.Message); ProtCidSettings.logWriter.WriteLine(relSeqId.ToString() + " Writing sequences to fasta files errors: " + ex.Message); ProtCidSettings.logWriter.Flush(); } } lsFileWriter.Close(); ProtCidSettings.progressInfo.progStrQueue.Enqueue("Done!"); }