Example #1
0
 /// <summary>
 /// Returns a new (deterministic and minimal) automaton that accepts the union
 /// of the given collection of <seealso cref="BytesRef"/>s representing UTF-8 encoded
 /// strings.
 /// </summary>
 /// <param name="utf8Strings">
 ///          The input strings, UTF-8 encoded. The collection must be in sorted
 ///          order.
 /// </param>
 /// <returns> An <seealso cref="Automaton"/> accepting all input strings. The resulting
 ///         automaton is codepoint based (full unicode codepoints on
 ///         transitions). </returns>
 public static Automaton MakeStringUnion(ICollection <BytesRef> utf8Strings)
 {
     if (utf8Strings.Count == 0)
     {
         return(MakeEmpty());
     }
     else
     {
         return(DaciukMihovAutomatonBuilder.Build(utf8Strings));
     }
 }
        private CompiledAutomaton Build(params string[] strings)
        {
            List <BytesRef> terms = new List <BytesRef>();

            foreach (string s in strings)
            {
                terms.Add(new BytesRef(s));
            }
            terms.Sort();
            Automaton a = DaciukMihovAutomatonBuilder.Build(terms);

            return(new CompiledAutomaton(a, true, false));
        }
Example #3
0
        /// <summary>
        /// Build a minimal, deterministic automaton from a sorted list of <see cref="BytesRef"/> representing
        /// strings in UTF-8. These strings must be binary-sorted.
        /// </summary>
        public static Automaton Build(ICollection <BytesRef> input)
        {
            DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();

            CharsRef scratch = new CharsRef();

            foreach (BytesRef b in input)
            {
                UnicodeUtil.UTF8toUTF16(b, scratch);
                builder.Add(scratch);
            }

            return(new Automaton
            {
                initial = Convert(builder.Complete(), new JCG.Dictionary <State, Lucene.Net.Util.Automaton.State>(IdentityEqualityComparer <State> .Default)),
                deterministic = true
            });
        }
Example #4
0
        /// <summary>
        /// Build a minimal, deterministic automaton from a sorted list of <seealso cref="BytesRef"/> representing
        /// strings in UTF-8. These strings must be binary-sorted.
        /// </summary>
        public static Automaton Build(ICollection <BytesRef> input)
        {
            DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();

            CharsRef scratch = new CharsRef();

            foreach (BytesRef b in input)
            {
                UnicodeUtil.UTF8toUTF16(b, scratch);
                builder.Add(scratch);
            }

            Automaton a = new Automaton();

            a.initial       = Convert(builder.Complete(), new IdentityHashMap <State, Lucene.Net.Util.Automaton.State>());
            a.deterministic = true;
            return(a);
        }
        /// <summary>
        /// Build a minimal, deterministic automaton from a sorted list of <seealso cref="BytesRef"/> representing
        /// strings in UTF-8. These strings must be binary-sorted.
        /// </summary>
        public static Automaton Build(ICollection<BytesRef> input)
        {
            DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();

            CharsRef scratch = new CharsRef();
            foreach (BytesRef b in input)
            {
                UnicodeUtil.UTF8toUTF16(b, scratch);
                builder.Add(scratch);
            }

            Automaton a = new Automaton();
            a.Initial = Convert(builder.Complete(), new IdentityHashMap<State, Lucene.Net.Util.Automaton.State>());
            a.deterministic = true;
            return a;
        }