// below are original, unoptimized implementations of DFA operations for testing. // These are from brics automaton, full license (BSD) below: /* * dk.brics.automaton * * Copyright (c) 2001-2009 Anders Moeller * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /// <summary> /// Simple, original brics implementation of Brzozowski Minimize() /// </summary> public static void MinimizeSimple(Automaton a) { if (a.IsSingleton) { return; } DeterminizeSimple(a, SpecialOperations.Reverse(a)); DeterminizeSimple(a, SpecialOperations.Reverse(a)); }
/// <summary> /// below are original, unoptimized implementations of DFA operations for testing. /// These are from brics automaton, full license (BSD) below: /// </summary> /* * dk.brics.automaton * * Copyright (c) 2001-2009 Anders Moeller * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /// <summary> /// Simple, original brics implementation of Brzozowski minimize() /// </summary> public static void MinimizeSimple(Automaton a) { if (!String.IsNullOrEmpty(a.Singleton)) { return; } DeterminizeSimple(a, SpecialOperations.Reverse(a)); DeterminizeSimple(a, SpecialOperations.Reverse(a)); }
public virtual void TestIsFinite() { int num = AtLeast(200); for (int i = 0; i < num; i++) { Automaton a = AutomatonTestUtil.RandomAutomaton(Random); Automaton b = (Automaton)a.Clone(); Assert.AreEqual(AutomatonTestUtil.IsFiniteSlow(a), SpecialOperations.IsFinite(b)); } }
/// <summary> /// Builds a DFA for some string, and checks all Lev automata /// up to some maximum distance. /// </summary> private void AssertLev(string s, int maxDistance) { LevenshteinAutomata builder = new LevenshteinAutomata(s, false); LevenshteinAutomata tbuilder = new LevenshteinAutomata(s, true); Automaton[] automata = new Automaton[maxDistance + 1]; Automaton[] tautomata = new Automaton[maxDistance + 1]; for (int n = 0; n < automata.Length; n++) { automata[n] = builder.ToAutomaton(n); tautomata[n] = tbuilder.ToAutomaton(n); Assert.IsNotNull(automata[n]); Assert.IsNotNull(tautomata[n]); Assert.IsTrue(automata[n].Deterministic); Assert.IsTrue(tautomata[n].Deterministic); Assert.IsTrue(SpecialOperations.IsFinite(automata[n])); Assert.IsTrue(SpecialOperations.IsFinite(tautomata[n])); AutomatonTestUtil.AssertNoDetachedStates(automata[n]); AutomatonTestUtil.AssertNoDetachedStates(tautomata[n]); // check that the dfa for n-1 accepts a subset of the dfa for n if (n > 0) { Assert.IsTrue(automata[n - 1].SubsetOf(automata[n])); Assert.IsTrue(automata[n - 1].SubsetOf(tautomata[n])); Assert.IsTrue(tautomata[n - 1].SubsetOf(automata[n])); Assert.IsTrue(tautomata[n - 1].SubsetOf(tautomata[n])); Assert.AreNotSame(automata[n - 1], automata[n]); } // check that Lev(N) is a subset of LevT(N) Assert.IsTrue(automata[n].SubsetOf(tautomata[n])); // special checks for specific n switch (n) { case 0: // easy, matches the string itself Assert.IsTrue(BasicOperations.SameLanguage(BasicAutomata.MakeString(s), automata[0])); Assert.IsTrue(BasicOperations.SameLanguage(BasicAutomata.MakeString(s), tautomata[0])); break; case 1: // generate a lev1 naively, and check the accepted lang is the same. Assert.IsTrue(BasicOperations.SameLanguage(NaiveLev1(s), automata[1])); Assert.IsTrue(BasicOperations.SameLanguage(NaiveLev1T(s), tautomata[1])); break; default: AssertBruteForce(s, automata[n], n); AssertBruteForceT(s, tautomata[n], n); break; } } }
/// <summary> /// Returns the longest string that is a suffix of all accepted strings and /// visits each state at most once. /// </summary> /// <returns> Common suffix. </returns> public static string GetCommonSuffix(Automaton a) { if (a.IsSingleton) // if singleton, the suffix is the string itself. { return(a.singleton); } // reverse the language of the automaton, then reverse its common prefix. Automaton r = (Automaton)a.Clone(); Reverse(r); r.Determinize(); return((new StringBuilder(SpecialOperations.GetCommonPrefix(r))).Reverse().ToString()); }
public static BytesRef GetCommonSuffixBytesRef(Automaton a) { if (a.IsSingleton) // if singleton, the suffix is the string itself. { return(new BytesRef(a.singleton)); } // reverse the language of the automaton, then reverse its common prefix. Automaton r = (Automaton)a.Clone(); Reverse(r); r.Determinize(); BytesRef @ref = SpecialOperations.GetCommonPrefixBytesRef(r); ReverseBytes(@ref); return(@ref); }
public virtual void TestFiniteStrings() { Automaton a = BasicOperations.Union(BasicAutomata.MakeString("dog"), BasicAutomata.MakeString("duck")); MinimizationOperations.Minimize(a); ISet <Int32sRef> strings = SpecialOperations.GetFiniteStrings(a, -1); Assert.AreEqual(2, strings.Count); Int32sRef dog = new Int32sRef(); Util.ToInt32sRef(new BytesRef("dog"), dog); Assert.IsTrue(strings.Contains(dog)); Int32sRef duck = new Int32sRef(); Util.ToInt32sRef(new BytesRef("duck"), duck); Assert.IsTrue(strings.Contains(duck)); }
public void AssertLexicon(List<Automaton> a, List<string> terms) { var automata = CollectionsHelper.Shuffle(a); var lex = BasicOperations.Union(automata); lex.Determinize(); Assert.IsTrue(SpecialOperations.IsFinite(lex)); foreach (string s in terms) { Assert.IsTrue(BasicOperations.Run(lex, s)); } var lexByte = new ByteRunAutomaton(lex); foreach (string s in terms) { sbyte[] bytes = s.GetBytes(Encoding.UTF8); Assert.IsTrue(lexByte.Run(bytes, 0, bytes.Length)); } }
public void AssertLexicon() { Collections.Shuffle(automata, Random()); var lex = BasicOperations.Union(automata); lex.Determinize(); Assert.IsTrue(SpecialOperations.IsFinite(lex)); foreach (string s in terms) { assertTrue(BasicOperations.Run(lex, s)); } var lexByte = new ByteRunAutomaton(lex); foreach (string s in terms) { var bytes = s.GetBytes(Encoding.UTF8); assertTrue(lexByte.Run(bytes, 0, bytes.Length)); } }
public CompiledAutomaton(Automaton automaton, bool?finite, bool simplify) { if (simplify) { // Test whether the automaton is a "simple" form and // if so, don't create a runAutomaton. Note that on a // large automaton these tests could be costly: if (BasicOperations.IsEmpty(automaton)) { // matches nothing Type = AUTOMATON_TYPE.NONE; Term = null; CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else if (BasicOperations.IsTotal(automaton)) { // matches all possible strings Type = AUTOMATON_TYPE.ALL; Term = null; CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else { string commonPrefix; string singleton; if (automaton.Singleton == null) { commonPrefix = SpecialOperations.GetCommonPrefix(automaton); if (commonPrefix.Length > 0 && BasicOperations.SameLanguage(automaton, BasicAutomata.MakeString(commonPrefix))) { singleton = commonPrefix; } else { singleton = null; } } else { commonPrefix = null; singleton = automaton.Singleton; } if (singleton != null) { // matches a fixed string in singleton or expanded // representation Type = AUTOMATON_TYPE.SINGLE; Term = new BytesRef(singleton); CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else if (BasicOperations.SameLanguage(automaton, BasicOperations.Concatenate(BasicAutomata.MakeString(commonPrefix), BasicAutomata.MakeAnyString()))) { // matches a constant prefix Type = AUTOMATON_TYPE.PREFIX; Term = new BytesRef(commonPrefix); CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } } } Type = AUTOMATON_TYPE.NORMAL; Term = null; if (finite == null) { this.Finite = SpecialOperations.IsFinite(automaton); } else { this.Finite = finite; } Automaton utf8 = (new UTF32ToUTF8()).Convert(automaton); if (this.Finite == true) { CommonSuffixRef = null; } else { CommonSuffixRef = SpecialOperations.GetCommonSuffixBytesRef(utf8); } RunAutomaton = new ByteRunAutomaton(utf8, true); sortedTransitions = utf8.GetSortedTransitions(); }
/// <summary> /// Gets character class of given codepoint /// </summary> internal int GetCharClass(int c) { return(SpecialOperations.FindIndex(c, _points)); }