/// <inheritdoc /> protected override void AddWeakFingerprint(IFingerprinter fingerprinter, SealDirectory sealDirectory) { Contract.Requires(fingerprinter != null); Contract.Requires(sealDirectory != null); base.AddWeakFingerprint(fingerprinter, sealDirectory); if (!ExcludeSemiStableHashOnFingerprintingSealDirectory && !sealDirectory.Kind.IsDynamicKind()) { // A statically sealed directory can exist as multiple different instances, e.g., one can have partially sealed directories with the same root and member set. // To distinguish those instances, we include the semi stable hash as part of the static fingerprint. fingerprinter.Add("SemiStableHash", sealDirectory.SemiStableHash); } }
public virtual void TestBug934819() { // inlined molecules - note this test fails if implicit hydrogens are // included. generally PubCheMFingerprint can't be used for substructure filter IAtomContainer superStructure = Bug934819_2(); IAtomContainer subStructure = Bug934819_1(); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(superStructure); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(subStructure); AddImplicitHydrogens(superStructure); AddImplicitHydrogens(subStructure); IFingerprinter fingerprinter = GetBitFingerprinter(); BitArray superBS = fingerprinter.GetBitFingerprint(superStructure).AsBitSet(); BitArray subBS = fingerprinter.GetBitFingerprint(subStructure).AsBitSet(); Assert.IsTrue(BitArrays.Equals(subBS, And(superBS, subBS))); }
public virtual void TestBug706786() { // inlined molecules - note this test fails if implicit hydrogens are // included. generally MACCS and ESTATE can't be used for substructure filter // check those subclasses which check the bits are set IAtomContainer superStructure = Bug706786_1(); IAtomContainer subStructure = Bug706786_2(); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(superStructure); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(subStructure); AddImplicitHydrogens(superStructure); AddImplicitHydrogens(subStructure); IFingerprinter fingerprinter = GetBitFingerprinter(); BitArray superBS = fingerprinter.GetBitFingerprint(superStructure).AsBitSet(); BitArray subBS = fingerprinter.GetBitFingerprint(subStructure).AsBitSet(); Assert.IsTrue(BitArrays.Equals(subBS, And(superBS, subBS))); }
/// <inheritdoc /> public void WriteFingerprintInputs(IFingerprinter writer) { using (var stream = new MemoryStream()) { using (var buildXLWriter = new BuildXLWriter( debug: false, stream: stream, leaveOpen: false, logStats: false)) { UnsafeOptions.Serialize(buildXLWriter); writer.Add("SerializedUnsafeOptions", System.BitConverter.ToString(stream.ToArray())); } } var thisRef = this; writer.AddNested(ObservedPathEntryConstants.PathSet, w => { foreach (var p in thisRef.PathEntries) { w.Add(ObservedPathEntryConstants.Path, p.Path); if (p.Flags != ObservedPathEntryFlags.None) { w.Add(ObservedPathEntryConstants.Flags, p.Flags.ToString()); } if (p.EnumeratePatternRegex != null) { w.Add(ObservedPathEntryConstants.EnumeratePatternRegex, p.EnumeratePatternRegex); } } }); writer.AddCollection <StringId, ReadOnlyArray <StringId> >( "ObservedAccessedFileNames", ObservedAccessedFileNames, (w, v) => w.Add(v)); // Observed inputs are included directly into the strong fingerprint hash computation, // so they do not need to be serialized here }
public void TestBug771485() { var builder = CDK.Builder; var filename = "NCDK.Data.MDL.bug771485-1.mol"; var ins = ResourceLoader.GetAsStream(filename); var reader = new MDLV2000Reader(ins, ChemObjectReaderMode.Strict); IAtomContainer structure1 = (IAtomContainer)reader.Read(builder.NewAtomContainer()); filename = "NCDK.Data.MDL.bug771485-2.mol"; ins = ResourceLoader.GetAsStream(filename); reader = new MDLV2000Reader(ins, ChemObjectReaderMode.Strict); IAtomContainer structure2 = (IAtomContainer)reader.Read(builder.NewAtomContainer()); // these molecules are different resonance forms of the same molecule // make sure aromaticity is detected. although some fingerprinters do this // one should not expected all implementations to do so. AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(structure1); AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(structure2); Aromaticity.CDKLegacy.Apply(structure1); Aromaticity.CDKLegacy.Apply(structure2); AddImplicitHydrogens(structure1); AddImplicitHydrogens(structure2); Kekulization.Kekulize(structure1); Kekulization.Kekulize(structure2); // hydrogens loaded from MDL mol files if non-query. Structure 2 has // query aromatic bonds and the hydrogen counts are not assigned - ensure // this is done here. CDK.HydrogenAdder.AddImplicitHydrogens(structure1); CDK.HydrogenAdder.AddImplicitHydrogens(structure2); IFingerprinter fingerprinter = GetBitFingerprinter(); BitArray superBS = fingerprinter.GetBitFingerprint(structure2).AsBitSet(); BitArray subBS = fingerprinter.GetBitFingerprint(structure1).AsBitSet(); bool isSubset = FingerprinterTool.IsSubset(superBS, subBS); Assert.IsTrue(isSubset); }
/// <inheritdoc /> protected override void AddWeakFingerprint(IFingerprinter fingerprinter, SealDirectory sealDirectory) { Contract.Requires(fingerprinter != null); Contract.Requires(sealDirectory != null); base.AddWeakFingerprint(fingerprinter, sealDirectory); // For non-composite shared opaque directories, contents and composed directories are always empty, and therefore the static fingerprint // is not strong enough, i.e. multiple shared opaques can share the same directory root. So in this case we need to add the fingerprint of the producer if (sealDirectory.Kind == SealDirectoryKind.SharedOpaque && !sealDirectory.IsComposite) { DirectoryArtifact directory = sealDirectory.Directory; fingerprinter.Add(directory, m_directoryProducerFingerprintLookup(directory).Hash); } if (!ExcludeSemiStableHashOnFingerprintingSealDirectory && !sealDirectory.Kind.IsDynamicKind()) { // A statically sealed directory can exist as multiple different instances, e.g., one can have partially sealed directories with the same root and member set. // To distinguish those instances, we include the semi stable hash as part of the static fingerprint. fingerprinter.Add("SemiStableHash", sealDirectory.SemiStableHash); } }
/// <summary> /// Compute fingerprint associated with this unsafe options. /// </summary> public void ComputeFingerprint(IFingerprinter fingerprinter) { fingerprinter.Add(nameof(UnsafeConfiguration.SandboxKind), UnsafeConfiguration.SandboxKind.ToString()); fingerprinter.Add(nameof(UnsafeConfiguration.ExistingDirectoryProbesAsEnumerations), getBoolString(UnsafeConfiguration.ExistingDirectoryProbesAsEnumerations)); fingerprinter.Add(nameof(UnsafeConfiguration.IgnoreGetFinalPathNameByHandle), getBoolString(UnsafeConfiguration.IgnoreGetFinalPathNameByHandle)); fingerprinter.Add(nameof(UnsafeConfiguration.IgnoreNonCreateFileReparsePoints), getBoolString(UnsafeConfiguration.IgnoreNonCreateFileReparsePoints)); fingerprinter.Add(nameof(UnsafeConfiguration.IgnoreReparsePoints), getBoolString(UnsafeConfiguration.IgnoreReparsePoints)); fingerprinter.Add(nameof(UnsafeConfiguration.IgnoreSetFileInformationByHandle), getBoolString(UnsafeConfiguration.IgnoreSetFileInformationByHandle)); fingerprinter.Add(nameof(UnsafeConfiguration.IgnoreZwOtherFileInformation), getBoolString(UnsafeConfiguration.IgnoreZwOtherFileInformation)); fingerprinter.Add(nameof(UnsafeConfiguration.IgnoreZwRenameFileInformation), getBoolString(UnsafeConfiguration.IgnoreZwRenameFileInformation)); fingerprinter.Add(nameof(UnsafeConfiguration.MonitorFileAccesses), getBoolString(UnsafeConfiguration.MonitorFileAccesses)); fingerprinter.Add(nameof(UnsafeConfiguration.MonitorNtCreateFile), getBoolString(UnsafeConfiguration.MonitorNtCreateFile)); fingerprinter.Add(nameof(UnsafeConfiguration.MonitorZwCreateOpenQueryFile), getBoolString(UnsafeConfiguration.MonitorZwCreateOpenQueryFile)); fingerprinter.Add(nameof(UnsafeConfiguration.PreserveOutputs), UnsafeConfiguration.PreserveOutputs.ToString()); fingerprinter.Add(nameof(UnsafeConfiguration.UnexpectedFileAccessesAreErrors), getBoolString(UnsafeConfiguration.UnexpectedFileAccessesAreErrors)); fingerprinter.Add(nameof(UnsafeConfiguration.IgnorePreloadedDlls), getBoolString(UnsafeConfiguration.IgnorePreloadedDlls)); fingerprinter.Add(nameof(UnsafeConfiguration.IgnoreDynamicWritesOnAbsentProbes), UnsafeConfiguration.IgnoreDynamicWritesOnAbsentProbes.ToString()); fingerprinter.Add(nameof(UnsafeConfiguration.DoubleWritePolicy), UnsafeConfiguration.DoubleWritePolicy.HasValue ? UnsafeConfiguration.DoubleWritePolicy.Value.ToString() : string.Empty); fingerprinter.Add(nameof(UnsafeConfiguration.IgnoreUndeclaredAccessesUnderSharedOpaques), getBoolString(UnsafeConfiguration.IgnoreUndeclaredAccessesUnderSharedOpaques)); if (m_preservedOutputInfo.HasValue) { fingerprinter.AddNested("PreserveOutputInfo", fp => m_preservedOutputInfo.Value.ComputeFingerprint(fp)); }
/// <summary> /// Computes fingerprint. /// </summary> public void ComputeFingerprint(IFingerprinter fingerprinter) { fingerprinter.Add(nameof(Salt), Salt.ToHex()); fingerprinter.Add(nameof(PreserveOutputTrustLevel), PreserveOutputTrustLevel); }
/// <summary> /// Build fingerprint /// </summary> /// <param name="mol"></param> public static BitSetFingerprint BuildBitSetFingerprint( IAtomContainer mol, FingerprintType fpType, int fpSubtype = -1, int fpLen = -1) { // Data for Tanimoto similarity using various fingerprint types for CorpId 123456 query. // Cart - Standard MDL Oracle Cartridge scores // // Similarity Score // ------------------------------------------------ // Size -> 192 896 1024 1024 128 1024 320 // CorpId Cart MACCS PbChm ECFP4 EXT EState Basic Sbstr // ------ ---- ---- ---- ---- ---- ---- ---- ---- // 123456 0.99 1.00 1.00 1.00 1.00 1.00 1.00 1.00 // 123456 0.99 0.98 0.96 0.77 0.95 1.00 0.95 1.00 // 123456 0.99 0.98 0.96 0.77 0.95 1.00 0.94 1.00 // 123456 0.99 1.00 1.00 1.00 1.00 1.00 1.00 1.00 // 123456 0.99 1.00 1.00 1.00 1.00 1.00 1.00 1.00 // 123456 0.99 0.91 1.00 0.81 1.00 1.00 1.00 1.00 // 123456 0.98 0.95 1.00 0.74 0.92 1.00 0.93 0.94 // 123456 0.98 1.00 1.00 1.00 1.00 1.00 1.00 1.00 // 123456 0.98 1.00 1.00 1.00 1.00 1.00 1.00 1.00 // 123456 0.98 1.00 0.83 0.76 0.77 0.90 0.76 0.94 // LSH Bin Count - The number of LSH bins (of 25) that match the query bin values //-------------- // CorpId MAC PbC ECFP EX // ------ --- --- --- --- // 123456 25 25 25 25 // 123456 25 20 7 16 // 123456 25 20 9 19 // 123456 25 25 25 25 // 123456 25 25 25 25 // 123456 20 25 9 25 // 123456 21 25 11 17 // 123456 25 25 25 25 // 123456 25 25 25 25 // 123456 25 9 6 11 // Data for Tanimoto similarity using various Circular fingerprint types. // Using 2 molecules where the 2nd just has an added methyl group. // // Measure Score // -------- ----- // ECFP0 1.00 // ECFP2 .88 // ECFP4 .75 // ECFP6 .64 // FCFP0 1.00 // FCFP2 .92 // FCFP4 .84 // FCFP6 .74 IFingerprinter ifptr = null; IBitFingerprint ibfp = null; BitSetFingerprint bfp = null; IAtomContainer mol2; string s = ""; DateTime t0 = DateTime.Now; double getFptrTime = 0, buildFpTime = 0; if (fpType == FingerprintType.Basic) // size = 1024 { ifptr = new Fingerprinter(); } else if (fpType == FingerprintType.Circular) // size variable { CircularFingerprinterClass cfpClass = (CircularFingerprinterClass)fpSubtype; if (cfpClass < CircularFingerprinterClass.ECFP0 || cfpClass > CircularFingerprinterClass.ECFP6) { cfpClass = (CircularFingerprinterClass)CircularFingerprintType.DefaultCircularClass; // default class } if (fpLen < 0) { fpLen = CircularFingerprintType.DefaultCircularLength; // default length } ifptr = new CircularFingerprinter(cfpClass, fpLen); //CircularFingerprinter cfp = (CircularFingerprinter)ifptr; //ICountFingerprint cntFp = cfp.getCountFingerprint(mol); // debug //s = CircularFpToString(cfp); // debug } else if (fpType == FingerprintType.Extended) // size = 1024 { ifptr = new ExtendedFingerprinter(); // use DEFAULT_SIZE and DEFAULT_SEARCH_DEPTH } else if (fpType == FingerprintType.EState) // size = 128 { ifptr = new EStateFingerprinter(); // use DEFAULT_SIZE and DEFAULT_SEARCH_DEPTH } else if (fpType == FingerprintType.MACCS) // size = 192 { if (MACCSFp == null) { MACCSFp = new MACCSFingerprinter(); } ifptr = MACCSFp; } else if (fpType == FingerprintType.PubChem) // size = 896 { //IChemObjectBuilder builder = DefaultChemObjectBuilder.getInstance(); ifptr = new PubchemFingerprinter(); } else if (fpType == FingerprintType.ShortestPath) // size = { ifptr = new ShortestPathFingerprinter(); // fails with atom type issue for many structures (e.g. 123456) } else if (fpType == FingerprintType.Signature) // size = { ifptr = new SignatureFingerprinter(); // can't convert array fingerprint to bitsetfingerprint } else if (fpType == FingerprintType.Substructure) // size = 320 { ifptr = new SubstructureFingerprinter(); } else { throw new Exception("Invalid CdkFingerprintType: " + fpType); } getFptrTime = TimeOfDay.Delta(ref t0); ibfp = ifptr.GetBitFingerprint(mol); bfp = (BitSetFingerprint)ibfp; buildFpTime = TimeOfDay.Delta(ref t0); //long size = bfp.size(); //int card = bfp.Cardinality; return(bfp); }