public static bool DeleteAllDuplicateStructures(string filename, out int totalreccnt, out int dupreccnt) { bool blStatus = false; int intDupRecCnt = 0; int intTotalRecCnt = 0; try { MolInputStream molInStream = new MolInputStream(new FileInputStream(filename)); MolImporter molImp = new MolImporter(molInStream); Molecule objMol = new Molecule(); DataOutputStream dOutStream = new DataOutputStream(new FileOutputStream(filename)); MolExporter molExpt = new MolExporter(dOutStream, "sdf"); bool blIsChiral = false; string strInchiKey = ""; ArrayList molInchiList = new ArrayList(); while (molImp.read(objMol)) { objMol = StandardizeMolecule(objMol, out blIsChiral); strInchiKey = objMol.toFormat("inchi:key"); strInchiKey = GetInchiKeyFromInchiString(strInchiKey); if (!molInchiList.Contains(strInchiKey)) { molInchiList.Add(strInchiKey); molExpt.write(objMol); } else { intDupRecCnt++; } intTotalRecCnt++; } //Close all the import & export objects molImp.close(); molInStream.close(); dOutStream.close(); molExpt.close(); blStatus = true; } catch (Exception ex) { ErrorHandling.WriteErrorLog(ex.ToString()); } totalreccnt = intTotalRecCnt; dupreccnt = intDupRecCnt; return(blStatus); }
public static bool CheckForDuplicateStructure(string filename, string qrymolfile, int recindex, out Molecule mol_out) { bool blStatus = false; try { bool blIsChiral = false; MolHandler mHandler = new MolHandler(qrymolfile); Molecule qryMol = mHandler.getMolecule(); qryMol = StandardizeMolecule(qryMol, out blIsChiral); string strqryMolInchi = qryMol.toFormat("inchi:key"); strqryMolInchi = GetInchiKeyFromInchiString(strqryMolInchi); //Specify input file to MolInputStream object MolInputStream molInStream = new MolInputStream(new FileInputStream(filename)); MolImporter molImp = new MolImporter(molInStream); Molecule objMol = new Molecule(); blIsChiral = false; string strInchiKey = ""; int intRecIndx = 0; Molecule molObj_Stand = null; while (molImp.read(objMol)) { molObj_Stand = StandardizeMolecule(objMol, out blIsChiral); strInchiKey = objMol.toFormat("inchi:key"); strInchiKey = GetInchiKeyFromInchiString(strInchiKey); intRecIndx++; if ((strInchiKey == strqryMolInchi) && (intRecIndx != recindex)) { blStatus = true; mol_out = objMol; return(blStatus); } } molImp.close(); // molInStream.close(); } catch (Exception ex) { ErrorHandling.WriteErrorLog(ex.ToString()); } mol_out = null; return(blStatus); }
public static bool DeleteRecordFromSDFile(string _infilename, int _recindex) { bool blStatus = false; try { int intRecIndex = 0; MolInputStream molInStream = new MolInputStream(new FileInputStream(_infilename)); MolImporter molImp = new MolImporter(molInStream); string strInputFilePath = System.IO.Path.GetDirectoryName(_infilename); string strExecPath = Application.StartupPath; string strFileName = System.IO.Path.GetFileName(_infilename); string strOutFile = strExecPath + "\\" + strFileName; DataOutputStream dOutStream = new DataOutputStream(new FileOutputStream(strOutFile)); MolExporter mExpt = new MolExporter(dOutStream, "sdf"); Molecule objMol = new Molecule(); while (molImp.read(objMol)) { intRecIndex++; if (intRecIndex != _recindex) { mExpt.write(objMol); } } molImp.close(); molInStream.close(); mExpt.close(); dOutStream.close(); System.IO.File.Copy(strOutFile, _infilename, true); System.IO.File.Delete(strOutFile); blStatus = true; } catch (Exception ex) { ErrorHandling_NTS.WriteErrorLog(ex.ToString()); } return(blStatus); }
public static void ImportAgentsMaster(string filePath) { DataTable dtSolvAgents = null; try { if (GlobalVariables.SolvCatalystMaster == null) { FileInputStream fInStream = new FileInputStream(filePath); MolImporter molImp = new MolImporter(fInStream, "sdf"); Molecule objMol = new Molecule(); GlobalVariables.SolvCatalystMaster = new DataTable(); GlobalVariables.SolvCatalystMaster.Columns.Add("MOL_FILE"); GlobalVariables.SolvCatalystMaster.Columns.Add("IUPAC_NAME"); GlobalVariables.SolvCatalystMaster.Columns.Add("INCHI_KEY"); GlobalVariables.SolvCatalystMaster.Columns.Add("OTHER_NAMES"); while (molImp.read(objMol)) { DataRow dRow = GlobalVariables.SolvCatalystMaster.NewRow(); dRow["MOL_FILE"] = objMol.toFormat("mol"); dRow["IUPAC_NAME"] = objMol.getProperty("MOL:SYMBOL(1)"); dRow["INCHI_KEY"] = objMol.getProperty("MOL:INCHIKEY"); int propCnt = objMol.getPropertyCount(); string strOtherName = ""; for (int i = 0; i < propCnt; i++) { string propName = objMol.getPropertyKey(i); if (propName.ToUpper() != "MOL:SYMBOL(1)" && propName.ToUpper().StartsWith("MOL:SYMBOL(")) { strOtherName = string.IsNullOrEmpty(strOtherName) ? objMol.getPropertyObject(propName).ToString() : strOtherName.Trim() + "," + objMol.getPropertyObject(propName).ToString(); } } dRow["OTHER_NAMES"] = strOtherName; GlobalVariables.SolvCatalystMaster.Rows.Add(dRow); } } } catch (Exception) { throw; } // return dtSolvAgents; }
public static int GetDuplicateRecordsCount(string filename, out int totalreccnt) { int intDupRecCnt = 0; int intTotalRecCnt = 0; try { MolInputStream molInStream = new MolInputStream(new FileInputStream(filename)); MolImporter molImp = new MolImporter(molInStream); Molecule objMol = new Molecule(); bool blIsChiral = false; string strInchiKey = ""; ArrayList molInchiList = new ArrayList(); while (molImp.read(objMol)) { objMol = StandardizeMolecule(objMol, out blIsChiral); strInchiKey = objMol.toFormat("inchi:key"); strInchiKey = GetInchiKeyFromInchiString(strInchiKey); if (!molInchiList.Contains(strInchiKey)) { molInchiList.Add(strInchiKey); } else { intDupRecCnt++; } intTotalRecCnt++; } molImp.close(); //molInStream.close(); } catch (Exception ex) { ErrorHandling.WriteErrorLog(ex.ToString()); } totalreccnt = intTotalRecCnt; return(intDupRecCnt); }
private DataTable ReadAllMoleculesIntoTable(MolImporter _molImporter) { DataTable dtMolData = null; try { dtMolData = ChemistryOperations.CreateTANDetailsTable(); #region Code Commented //dtMolData.Columns.Add("Structure", typeof(object)); //dtMolData.Columns.Add("MolWeight", typeof(string)); //dtMolData.Columns.Add("MolFormula", typeof(string)); //dtMolData.Columns.Add("IupacName", typeof(string)); //dtMolData.Columns.Add("PageNumber", typeof(string)); //dtMolData.Columns.Add("PageLabel", typeof(string)); //dtMolData.Columns.Add("ExampleNumber", typeof(string)); //dtMolData.Columns.Add("EnName", typeof(string)); //dtMolData.Columns.Add("IsChiral", typeof(string)); #endregion DataRow dtRow = null; while (_molImporter.read(mol)) { dtRow = dtMolData.NewRow(); //Mol Structure dtRow["Structure"] = mol.toFormat("mol"); //Mol Weight dtRow["MolWeight"] = mol.getMass().ToString(); //Mol Formula dtRow["MolFormula"] = mol.getFormula(); //Page No dtRow["PageNumber"] = mol.getProperty("Page Number").Trim(); //Page Label dtRow["PageLabel"] = mol.getProperty("Page Label").Trim(); //Example Number dtRow["ExampleNumber"] = mol.getProperty("Example Number").Trim(); //IUPAC Name dtRow["IupacName"] = mol.getProperty("IUPAC Name").Trim(); //en name dtRow["EnName"] = mol.getProperty("en name").Trim(); //Is Chiral if (mol.isAbsStereo()) { dtRow["IsChiral"] = "True"; } else { dtRow["IsChiral"] = "False"; } dtMolData.Rows.Add(dtRow); } return(dtMolData); } catch (Exception ex) { ErrorHandling_NTS.WriteErrorLog(ex.ToString()); } return(dtMolData); }
public static bool WriteXmlFile(string infilename, string tannumber, string outputfilepath) { System.IO.StreamWriter sWriter = null; try { sWriter = new System.IO.StreamWriter(outputfilepath); //xml file header information sWriter.WriteLine("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>"); sWriter.WriteLine("<patent xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:noNamespaceSchemaLocation=\"PatentEnhancedPrioritySubstanceIndexing-2.3.xsd\">"); sWriter.WriteLine("<patentInfo>"); sWriter.WriteLine("<tan>" + tannumber + "</tan>"); sWriter.WriteLine("<language>en</language>"); sWriter.WriteLine("</patentInfo>"); sWriter.WriteLine("<propheticSubstances>"); //Specify input file to MolInputStream object MolInputStream molInStream = new MolInputStream(new FileInputStream(infilename)); MolImporter molImp = new MolImporter(molInStream); Molecule mol = new Molecule(); //Declare mol property variables string strPage_No = ""; string strPage_Lbl = ""; string strExample_Lbl = ""; string strEn_name = ""; string strIUPAC_Name = ""; string strStandMol = ""; string strMolBase64 = ""; //Read molecules from molImporter while (molImp.read(mol)) { sWriter.WriteLine("<propheticSubstance>"); sWriter.WriteLine("<patentLocation>"); //Page No strPage_No = ""; strPage_No = mol.getProperty("Page Number").Trim(); sWriter.WriteLine("<pageNumber>" + strPage_No + "</pageNumber>"); //Page Label strPage_Lbl = ""; strPage_Lbl = mol.getProperty("Page Label").Trim(); sWriter.WriteLine("<pageLabel>" + strPage_Lbl + "</pageLabel>"); //Example label strExample_Lbl = ""; strExample_Lbl = mol.getProperty("Example Number").Trim(); sWriter.WriteLine("<exampleLabel>" + strExample_Lbl + "</exampleLabel>"); sWriter.WriteLine("</patentLocation>"); sWriter.WriteLine("<names>"); //en Name strEn_name = ""; strEn_name = mol.getProperty("en name").Trim();//En Name sWriter.WriteLine("<name lang=\"en\">" + strEn_name + "</name>"); //IUPAC Name strIUPAC_Name = ""; strIUPAC_Name = mol.getProperty("IUPAC Name").Trim(); sWriter.WriteLine("<IUPACName>" + strIUPAC_Name + "</IUPACName>"); sWriter.WriteLine("</names>"); //Check here for V2000 format, if not write in error log string mol2d = mol.toFormat("mol"); int v3000 = mol2d.IndexOf("V3000"); if (v3000 != -1) { //System.out.println("V3000 Error has occured! at Molecule Number: " + molCount + " in SDF file " + fileName); //System.exit(0); } double nullMol = mol.getExactMass(); if (nullMol == 0) { //System.out.println("NULL Mol Error has occured! at Molecule Number: " + molCount + " in SDF file " + fileName); //System.exit(0); } strStandMol = ""; strStandMol = mol.toFormat("mol");//MoleculeStandardizer.GetStandardizedMolecule(mol.toFormat("mol")); strMolBase64 = ""; strMolBase64 = ConvertToBase64.GetConvertedMolString(strStandMol); sWriter.WriteLine("<structureData encoding=\"Base64\" type=\"MDL Molfile V2000\">" + strMolBase64 + "</structureData>");//Base64 Molstring sWriter.WriteLine("</propheticSubstance>"); } sWriter.WriteLine("</propheticSubstances>"); sWriter.WriteLine("</patent>"); sWriter.Close(); sWriter.Dispose(); return(true); } catch (Exception ex) { ErrorHandling_NTS.WriteErrorLog(ex.ToString()); } finally { sWriter.Close(); sWriter.Dispose(); } return(true); }
public static bool WriteXmlFileUsingXSD(string infilename, string tannumber, string outputfilepath) { bool blStatus = false; try { patentInfo patentInfo_Obj = new patentInfo(); patentInfo_Obj.tan = tannumber; patentInfo_Obj.language = languageType.en; //Specify input file to MolInputStream object MolInputStream molInStream = new MolInputStream(new FileInputStream(infilename)); MolImporter molImp = new MolImporter(molInStream); Molecule mol = new Molecule(); //int intMolCnt = molImp.getRecordCount(); int intMolCnt = ChemistryOperations.GetMoleculeCountFromFile(infilename); patentLocation patLoc = null; name[] name_Arr = null; name name_obj = null; names names_Arr = null; nameType namType = null; structureDataType structDtype = null; string[] strIUpacName = null; propheticSubstance propSubstance_Obj = null; propheticSubstance[] propSubstance_Arr = new propheticSubstance[intMolCnt]; propheticSubstances propSubstances_Obj = null; patent patent_Obj = new patent(); int intCntr = 0; //Read molecules from molImporter while (molImp.read(mol)) { patLoc = new patentLocation(); patLoc.pageLabel = mol.getProperty("Page Label"); patLoc.pageNumber = mol.getProperty("Page Number"); patLoc.exampleLabel = mol.getProperty("Example Number"); name_Arr = new name[1]; name_obj = new name(); name_obj.lang = languageType.en; string[] strArr_EnName = new string[1]; strArr_EnName[0] = mol.getProperty("en name").Trim(); name_obj.Text = strArr_EnName; name_Arr[0] = name_obj; strIUpacName = new string[1]; strIUpacName[0] = mol.getProperty("IUPAC Name").Trim(); namType = new nameType(); namType.Text = strIUpacName; names_Arr = new names(); names_Arr.IUPACName = namType; names_Arr.name = name_Arr; string strMol = mol.toFormat("mol"); int v3000 = strMol.IndexOf("V3000"); double nullMol = mol.getExactMass(); byte[] barr_Mol = null; if (v3000 == -1 && nullMol != 0) { barr_Mol = System.Text.ASCIIEncoding.ASCII.GetBytes(strMol); } structDtype = new structureDataType(); structDtype.Value = barr_Mol; propSubstance_Obj = new propheticSubstance(); propSubstance_Obj.structureData = structDtype; propSubstance_Obj.patentLocation = patLoc; propSubstance_Obj.names = names_Arr; propSubstance_Arr[intCntr] = propSubstance_Obj; intCntr++; } propSubstances_Obj = new propheticSubstances(); propSubstances_Obj.propheticSubstance = propSubstance_Arr; patent_Obj.patentInfo = patentInfo_Obj; patent_Obj.propheticSubstances = propSubstances_Obj; // Serialization XmlSerializer xmlSer = new XmlSerializer(typeof(patent)); TextWriter txtWriter = new StreamWriter(outputfilepath); xmlSer.Serialize(txtWriter, patent_Obj); txtWriter.Close(); txtWriter.Dispose(); molInStream.close(); molImp.close(); blStatus = true; return(blStatus); } catch (Exception ex) { ErrorHandling_NTS.WriteErrorLog(ex.ToString()); } return(blStatus); }
public static DataTable GetDuplicateRecords(string filename, string qrymolstring, out int totalrecs_out) { DataTable dtDupRecs = null; int totalRecCnt = 0; try { dtDupRecs = CreateTANDetailsTable(); dtDupRecs.Columns.Add("OrigRecIndex", typeof(Int32)); bool blIsChiral = false; string InchiKey_Qry = ""; string InchiKey_Trgt = ""; MolHandler mHandler = new MolHandler(qrymolstring); Molecule qryMol = mHandler.getMolecule(); StandardizeMolecule(qryMol, out blIsChiral); InchiKey_Qry = qryMol.toFormat("inchi:key"); InchiKey_Qry = Validations.GetInchiKeyFromInchiString(InchiKey_Qry); MolInputStream molInStream = new MolInputStream(new FileInputStream(filename)); MolImporter molImp = new MolImporter(molInStream); Molecule objMol = new Molecule(); DataRow dtRow = null; while (molImp.read(objMol)) { objMol = StandardizeMolecule(objMol, out blIsChiral); InchiKey_Trgt = objMol.toFormat("inchi:key"); InchiKey_Trgt = Validations.GetInchiKeyFromInchiString(InchiKey_Trgt); if (InchiKey_Qry == InchiKey_Trgt) { dtRow = dtDupRecs.NewRow(); //Mol Structure dtRow["Structure"] = objMol.toFormat("mol"); //Mol Weight dtRow["MolWeight"] = objMol.getMass().ToString(); //Mol Formula dtRow["MolFormula"] = objMol.getFormula(); //Page No dtRow["PageNumber"] = objMol.getProperty("Page Number").Trim(); //Page Label dtRow["PageLabel"] = objMol.getProperty("Page Label").Trim(); //Example Number dtRow["ExampleNumber"] = objMol.getProperty("Example Number").Trim(); //IUPAC Name dtRow["IupacName"] = objMol.getProperty("IUPAC Name").Trim(); //en name dtRow["EnName"] = objMol.getProperty("en name").Trim(); //Is Chiral if (objMol.isAbsStereo()) { dtRow["IsChiral"] = "True"; } else { dtRow["IsChiral"] = "False"; } dtRow["OrigRecIndex"] = totalRecCnt; dtDupRecs.Rows.Add(dtRow); } totalRecCnt++; } molImp.close(); molInStream.close(); totalrecs_out = totalRecCnt; return(dtDupRecs); } catch (Exception ex) { ErrorHandling_NTS.WriteErrorLog(ex.ToString()); } totalrecs_out = totalRecCnt; return(dtDupRecs); }
public static bool UpdateRecordInSdFile(string filename, int recindex, string molstring, string pagenum, string pagelabel, string examplenum, string iupacname, string enname) { bool blStatus = false; try { int intRecIndex = 0; MolInputStream molInStream = new MolInputStream(new FileInputStream(filename)); MolImporter molImp = new MolImporter(molInStream); string strInputFilePath = System.IO.Path.GetDirectoryName(filename); string strExecPath = Application.StartupPath; string strFileName = System.IO.Path.GetFileName(filename); string strOutFile = strExecPath + "\\" + strFileName; DataOutputStream dOutStream = new DataOutputStream(new FileOutputStream(strOutFile)); MolExporter mExpt = new MolExporter(dOutStream, "sdf"); Molecule objMolecule = new Molecule(); try { while (molImp.read(objMolecule)) { intRecIndex++; if (intRecIndex == recindex) { MolHandler molHandler = new MolHandler(molstring); Molecule molObj = molHandler.getMolecule(); objMolecule = molObj; objMolecule.setProperty("Page Number", pagenum); objMolecule.setProperty("Page Label", pagelabel); objMolecule.setProperty("Example Number", examplenum); objMolecule.setProperty("IUPAC Name", iupacname); objMolecule.setProperty("en name", enname); blStatus = true; } mExpt.write(objMolecule); } molImp.close(); molInStream.close(); mExpt.close(); dOutStream.close(); System.IO.File.Copy(strOutFile, filename, true); System.IO.File.Delete(strOutFile); } catch (Exception ex) { ErrorHandling_NTS.WriteErrorLog(ex.ToString()); } finally { molImp.close(); molInStream.close(); mExpt.close(); dOutStream.close(); } } catch (Exception ex) { ErrorHandling_NTS.WriteErrorLog(ex.ToString()); } return(blStatus); }