Example #1
0
 /// <summary>
 /// Returns a new (deterministic and minimal) automaton that accepts the union
 /// of the given collection of <see cref="BytesRef"/>s representing UTF-8 encoded
 /// strings.
 /// </summary>
 /// <param name="utf8Strings">
 ///          The input strings, UTF-8 encoded. The collection must be in sorted
 ///          order.
 /// </param>
 /// <returns> An <see cref="Automaton"/> accepting all input strings. The resulting
 ///         automaton is codepoint based (full unicode codepoints on
 ///         transitions). </returns>
 public static Automaton MakeStringUnion(ICollection <BytesRef> utf8Strings)
 {
     if (utf8Strings.Count == 0)
     {
         return(MakeEmpty());
     }
     else
     {
         return(DaciukMihovAutomatonBuilder.Build(utf8Strings));
     }
 }
Example #2
0
        /// <summary>
        /// Build a minimal, deterministic automaton from a sorted list of <see cref="BytesRef"/> representing
        /// strings in UTF-8. These strings must be binary-sorted.
        /// </summary>
        public static Automaton Build(ICollection <BytesRef> input)
        {
            DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();

            CharsRef scratch = new CharsRef();

            foreach (BytesRef b in input)
            {
                UnicodeUtil.UTF8toUTF16(b, scratch);
                builder.Add(scratch);
            }

            return(new Automaton
            {
                initial = Convert(builder.Complete(), new JCG.Dictionary <State, Lucene.Net.Util.Automaton.State>(IdentityEqualityComparer <State> .Default)),
                deterministic = true
            });
        }
        /// <summary>
        /// Build a minimal, deterministic automaton from a sorted list of <see cref="BytesRef"/> representing
        /// strings in UTF-8. These strings must be binary-sorted.
        /// </summary>
        public static Automaton Build(ICollection <BytesRef> input)
        {
            DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();

            CharsRef scratch = new CharsRef();

            foreach (BytesRef b in input)
            {
                UnicodeUtil.UTF8toUTF16(b, scratch);
                builder.Add(scratch);
            }

            Automaton a = new Automaton();

            a.initial       = Convert(builder.Complete(), new IdentityHashMap <State, Lucene.Net.Util.Automaton.State>());
            a.deterministic = true;
            return(a);
        }