Пример #1
0
        /// <summary>Create a classifier that maps a character to the ID of its associated minterm.</summary>
        /// <param name="minterms">A BDD for classifying all characters (ASCII and non-ASCII) to their corresponding minterm IDs.</param>
        public MintermClassifier(BDD[] minterms)
        {
            Debug.Assert(minterms.Length > 0, "Requires at least");

            CharSetSolver solver = CharSetSolver.Instance;

            if (minterms.Length == 1)
            {
                // With only a single minterm, the mapping is trivial: everything maps to it (ID 0).
                // For ASCII, use an array containing all zeros.  For non-ASCII, use a BDD that maps everything to 0.
                _ascii    = AllAsciiIsZeroMintermArray;
                _nonAscii = solver.ReplaceTrue(BDD.True, 0);
                return;
            }

            // Create a multi-terminal BDD for mapping any character to its associated minterm.
            BDD anyCharacterToMintermId = BDD.False;

            for (int i = 0; i < minterms.Length; i++)
            {
                // Each supplied minterm BDD decides whether a given character maps to it or not.
                // We need to combine all of those into a multi-terminal BDD that decides which
                // minterm a character maps to.  To do that, we take each minterm BDD and replace
                // its True result with the ID of the minterm, such that a character that would
                // have returned True for that BDD now returns the minterm ID.
                BDD charToTargetMintermId = solver.ReplaceTrue(minterms[i], i);

                // Now union this BDD with the multi-terminal BDD we've built up thus far. Unioning
                // is valid because every character belongs to exactly one minterm and thus will
                // only map to an ID instead of False in exactly one of the input BDDs.
                anyCharacterToMintermId = solver.Or(anyCharacterToMintermId, charToTargetMintermId);
            }

            // Now that we have our mapping that supports any input character, we want to optimize for
            // ASCII inputs.  Rather than forcing every input ASCII character to consult the BDD at match
            // time, we precompute a lookup table, where each ASCII character can be used to index into the
            // array to determine the ID for its corresponding minterm.
            var ascii = new int[128];

            for (int i = 0; i < ascii.Length; i++)
            {
                ascii[i] = anyCharacterToMintermId.Find(i);
            }
            _ascii = ascii;

            // We can also further optimize the BDD in two ways:
            // 1. We can now remove the ASCII characters from it, as we'll always consult the lookup table first
            //    for ASCII inputs and thus will never use the BDD for them.  While optional (skipping this step will not
            //    affect correctness), removing the ASCII values from the BDD reduces the size of the multi-terminal BDD.
            // 2. We can check if every character now maps to the same minterm ID (the same terminal in the
            //    multi-terminal BDD).  This can be relatively common after (1) above is applied, as many
            //    patterns don't distinguish between any non-ASCII characters (e.g. "[0-9]*").  If every character
            //    in the BDD now maps to the same minterm, we can replace the BDD with a much simpler/faster/smaller one.
            BDD nonAsciiBDD = solver.And(anyCharacterToMintermId, solver._nonAscii);

            nonAsciiBDD = nonAsciiBDD.IsEssentiallyBoolean(out BDD? singleTerminalBDD) ? singleTerminalBDD : nonAsciiBDD;
            _nonAscii   = nonAsciiBDD;
        }