Beispiel #1
0
        /// <summary>
        /// Build DFAs for multiple languages simultaneously.
        ///
        /// Each language is specified as a subset of available <typeparamref name="TResult"/>s, and will include
        /// patterns for each result in its set.
        ///
        /// Languages built simultaneously will be globally minimized and will share as many states as possible.
        /// </summary>
        /// <param name="languages">Sets defining the languages to build</param>
        /// <param name="ambiguityResolver">When patterns for multiple results match the same string, this is called to
        /// combine the multiple results into one.	If this is null, then a DfaAmbiguityException will be thrown in that
        /// case.</param>
        /// <returns>Start states for DFAs that match the given languages.  This will have the same length as languages,
        /// with corresponding start states in corresponding positions.</returns>
        public IList <DfaState <TResult> > Build(IList <ISet <TResult> > languages, DfaAmbiguityResolver <TResult> ambiguityResolver)
        {
            if (languages.Count < 1)
            {
                return(new List <DfaState <TResult> >());
            }

            SerializableDfa <TResult> serializableDfa;

            if (cache == null)
            {
                serializableDfa = _build(languages, ambiguityResolver);
            }
            else
            {
                var cacheKey = GetCacheKey(DfaTypeMatcher, languages, ambiguityResolver);
                serializableDfa = (SerializableDfa <TResult>)cache.GetCachedItem(cacheKey);
                if (serializableDfa == null)
                {
                    serializableDfa = _build(languages, ambiguityResolver);
                    cache.MaybeCacheItem(cacheKey, serializableDfa);
                }
            }

            return(serializableDfa.GetStartStates());
        }
Beispiel #2
0
        private SerializableDfa <TResult> _build(IList <ISet <TResult> > languages, DfaAmbiguityResolver <TResult> ambiguityResolver)
        {
            var nfa = new Nfa <TResult>();

            var nfaStartStates = new int[languages.Count];

            for (var i = 0; i < languages.Count; ++i)
            {
                nfaStartStates[i] = nfa.AddState();
            }

            ambiguityResolver ??= DefaultAmbiguityResolver;

            foreach (var patEntry in patterns)
            {
                var patList = patEntry.Value;
                if (patList == null || patList.Count < 1)
                {
                    continue;
                }

                var matchState = -1; //start state for matching this token
                for (var i = 0; i < languages.Count; ++i)
                {
                    if (!languages[i].Contains(patEntry.Key))
                    {
                        continue;
                    }

                    if (matchState < 0)
                    {
                        var acceptState = nfa.AddState(patEntry.Key); //final state accepting this token
                        if (patList.Count > 1)
                        {
                            //we have multiple patterns.  Make a union
                            matchState = nfa.AddState();
                            foreach (var pat in patList)
                            {
                                nfa.AddEpsilon(matchState, pat.AddToNfa(nfa, acceptState));
                            }
                        }
                        else
                        {
                            //only one pattern no union necessary
                            matchState = patList[0].AddToNfa(nfa, acceptState);
                        }
                    }

                    //language i matches these patterns
                    nfa.AddEpsilon(nfaStartStates[i], matchState);
                }
            }

            var rawDfa          = new DfaFromNfa <TResult>(nfa, nfaStartStates, ambiguityResolver).GetDfa();
            var minimalDfa      = new DfaMinimizer <TResult>(rawDfa).GetMinimizedDfa();
            var serializableDfa = new SerializableDfa <TResult>(minimalDfa);

            return(serializableDfa);
        }
Beispiel #3
0
        private SerializableDfa <bool> _buildReverseFinders(IList <ISet <TResult> > languages)
        {
            var nfa = new Nfa <bool>();

            var startState = nfa.AddState();
            var endState   = nfa.AddState(true);
            DfaAmbiguityResolver <bool> ambiguityResolver = DefaultAmbiguityResolver;

            //First, make an NFA that matches the reverse of all the patterns
            foreach (var patEntry in patterns)
            {
                var patList = patEntry.Value;
                if (patList == null || patList.Count < 1)
                {
                    continue;
                }

                foreach (var language in languages)
                {
                    if (!language.Contains(patEntry.Key))
                    {
                        continue;
                    }

                    foreach (var pat in patEntry.Value)
                    {
                        var st = pat.Reversed.AddToNfa(nfa, endState);
                        nfa.AddEpsilon(startState, st);
                    }
                }
            }

            //omit the empty string
            startState = nfa.Disemptify(startState);

            //allow anything first
            startState = Pattern.MaybeRepeat(CharRange.All).AddToNfa(nfa, startState);

            //build the DFA
            var rawDfa          = new DfaFromNfa <bool>(nfa, new[] { startState }, ambiguityResolver).GetDfa();
            var minimalDfa      = new DfaMinimizer <bool>(rawDfa).GetMinimizedDfa();
            var serializableDfa = new SerializableDfa <bool>(minimalDfa);

            return(serializableDfa);
        }
Beispiel #4
0
        private string GetCacheKey(int dfaType, IList <ISet <TResult> > languages, DfaAmbiguityResolver <TResult> ambiguityResolver)
        {
            string cacheKey;
            var    hashAlg = new SHA256Managed();

            using (var ms = new MemoryStream())
            {
                using var cs = new CryptoStream(ms, hashAlg, CryptoStreamMode.Write);
                var bf = new BinaryFormatter();
                bf.Serialize(ms, dfaType);
                var numLangs = languages.Count;
                bf.Serialize(ms, numLangs);

                //write key stuff out in an order based on our LinkedHashMap, for deterministic serialization
                foreach (var patEntry in patterns)
                {
                    var included = false;
                    var patList  = patEntry.Value;
                    if (patList.Count == 0)
                    {
                        continue;
                    }

                    for (var i = 0; i < numLangs; ++i)
                    {
                        if (!languages[i].Contains(patEntry.Key))
                        {
                            continue;
                        }

                        included = true;
                        break;
                    }

                    if (!included)
                    {
                        continue;
                    }

                    bf.Serialize(ms, patList.Count);
                    if (numLangs > 1)
                    {
                        var bits = languages[0].Contains(patEntry.Key) ? 1 : 0;
                        for (var i = 1; i < languages.Count; ++i)
                        {
                            if ((i & 31) == 0)
                            {
                                bf.Serialize(ms, bits);
                                bits = 0;
                            }

                            if (languages[i].Contains(patEntry.Key))
                            {
                                bits |= 1 << (i & 31);
                            }
                        }

                        bf.Serialize(ms, bits);
                    }

                    foreach (var pat in patList)
                    {
                        bf.Serialize(ms, pat);
                    }

                    bf.Serialize(ms, patEntry.Key);
                }

                bf.Serialize(ms, 0); //0-size pattern list terminates pattern map
                bf.Serialize(ms, ambiguityResolver ?? (object)0);
                ms.Flush();
                cs.FlushFinalBlock();

                cacheKey = Base32.GetDigest(hashAlg.Hash);
            }

            return(cacheKey);
        }
Beispiel #5
0
        /// <summary>
        /// Build DFAs from a provided NFA
        ///
        /// This method is used when you want to build the NFA yourself instead of letting this class do it.
        ///
        /// Languages built simultaneously will be globally minimized and will share as many states as possible.
        /// </summary>
        /// <param name="nfa">The NFA</param>
        /// <param name="nfaStartStates">The return value will include the DFA states corresponding to these NFA states, in the same order</param>
        /// <param name="ambiguityResolver">When patterns for multiple results match the same string, this is called to
        /// combine the multiple results into one.  If this is null, then a DfaAmbiguityException will be thrown in that case.</param>
        /// <param name="cache">If this cache is non-null, it will be checked for a memoized result for this NFA, and will be populated
        /// with a memoized result when the call is complete.</param>
        /// <returns>DFA start states that are equivalent to the given NFA start states.  This will have the same length as nfaStartStates, with
        /// corresponding start states in corresponding positions.</returns>
        public static IList <DfaState <TResult> > BuildFromNfa(Nfa <TResult> nfa, int[] nfaStartStates, DfaAmbiguityResolver <TResult> ambiguityResolver,
                                                               IBuilderCache cache)
        {
            string cacheKey = null;
            SerializableDfa <TResult> serializableDfa = null;

            if (cache != null)
            {
                var hashAlg = new SHA256Managed();
                using (var ms = new MemoryStream())
                {
                    using var cs = new CryptoStream(ms, hashAlg, CryptoStreamMode.Write);
                    var bf = new BinaryFormatter();
                    bf.Serialize(ms, nfaStartStates);
                    bf.Serialize(ms, nfa);
                    bf.Serialize(ms, ambiguityResolver);
                    ms.Flush();
                    cs.FlushFinalBlock();

                    cacheKey = Base32.GetDigest(hashAlg.Hash);
                }

                serializableDfa = (SerializableDfa <TResult>)cache.GetCachedItem(cacheKey);
            }

            if (serializableDfa == null)
            {
                var rawDfa     = new DfaFromNfa <TResult>(nfa, nfaStartStates, ambiguityResolver).GetDfa();
                var minimalDfa = new DfaMinimizer <TResult>(rawDfa).GetMinimizedDfa();
                serializableDfa = new SerializableDfa <TResult>(minimalDfa);
                if (cacheKey != null)
                {
                    cache.MaybeCacheItem(cacheKey, serializableDfa);
                }
            }

            return(serializableDfa.GetStartStates());
        }
Beispiel #6
0
 /// <summary>
 /// Build a <see cref="StringSearcher{TResult}"/> for all the patterns that have been added to this builder
 /// </summary>
 /// <param name="ambiguityResolver">When patterns for multiple results match the same string, this is called to
 /// combine the multiple results into one.  If this is null, then a <see cref="DfaAmbiguityException{TResult}"/>
 /// will be thrown in that case.</param>
 /// <returns>A <see cref="StringSearcher{TResult}"/> for all the patterns in this builder</returns>
 public StringSearcher <TResult> BuildStringSearcher(DfaAmbiguityResolver <TResult> ambiguityResolver)
 {
     return(new StringSearcher <TResult>(Build(ambiguityResolver), BuildReverseFinder()));
 }
Beispiel #7
0
 /// <summary>
 /// Build DFA for a single language.
 ///
 /// The language is specified as a subset of available <typeparamref name="TResult"/>s, and will include
 /// patterns for each result in its set.
 /// </summary>
 /// <param name="language">Set defining the language to build</param>
 /// <param name="ambiguityResolver">When patterns for multiple results match the same string, this is called to
 /// combine the multiple results into one.  If this is null, then a DfaAmbiguityException will be thrown in that
 /// case.</param>
 /// <returns>The start state for a DFA that matches the set of patterns in language</returns>
 public DfaState <TResult> Build(ISet <TResult> language, DfaAmbiguityResolver <TResult> ambiguityResolver)
 {
     return(Build(new List <ISet <TResult> > {
         language
     }, ambiguityResolver)[0]);
 }
Beispiel #8
0
 /// <summary>
 /// Build DFA for a single language.
 ///
 /// The resulting DFA matches ALL patterns that have been added to this builder.
 /// </summary>
 /// <param name="ambiguityResolver">When patterns for multiple results match the same string, this is called to
 /// combine the multiple results into one. If this is null, then a <see cref="DfaAmbiguityException{TResult}"/>
 /// will be thrown in that case.</param>
 /// <returns>The start state for a DFA that matches the set of patterns in language</returns>
 public DfaState <TResult> Build(DfaAmbiguityResolver <TResult> ambiguityResolver)
 {
     return(Build(new List <ISet <TResult> > {
         new HashSet <TResult>(patterns.Keys)
     }, ambiguityResolver)[0]);
 }