/// <summary> /// Returns a new (deterministic and minimal) automaton that accepts the union /// of the given collection of <see cref="BytesRef"/>s representing UTF-8 encoded /// strings. /// </summary> /// <param name="utf8Strings"> /// The input strings, UTF-8 encoded. The collection must be in sorted /// order. /// </param> /// <returns> An <see cref="Automaton"/> accepting all input strings. The resulting /// automaton is codepoint based (full unicode codepoints on /// transitions). </returns> public static Automaton MakeStringUnion(ICollection <BytesRef> utf8Strings) { if (utf8Strings.Count == 0) { return(MakeEmpty()); } else { return(DaciukMihovAutomatonBuilder.Build(utf8Strings)); } }
/// <summary> /// Build a minimal, deterministic automaton from a sorted list of <see cref="BytesRef"/> representing /// strings in UTF-8. These strings must be binary-sorted. /// </summary> public static Automaton Build(ICollection <BytesRef> input) { DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder(); CharsRef scratch = new CharsRef(); foreach (BytesRef b in input) { UnicodeUtil.UTF8toUTF16(b, scratch); builder.Add(scratch); } return(new Automaton { initial = Convert(builder.Complete(), new JCG.Dictionary <State, Lucene.Net.Util.Automaton.State>(IdentityEqualityComparer <State> .Default)), deterministic = true }); }
/// <summary> /// Build a minimal, deterministic automaton from a sorted list of <see cref="BytesRef"/> representing /// strings in UTF-8. These strings must be binary-sorted. /// </summary> public static Automaton Build(ICollection <BytesRef> input) { DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder(); CharsRef scratch = new CharsRef(); foreach (BytesRef b in input) { UnicodeUtil.UTF8toUTF16(b, scratch); builder.Add(scratch); } Automaton a = new Automaton(); a.initial = Convert(builder.Complete(), new IdentityHashMap <State, Lucene.Net.Util.Automaton.State>()); a.deterministic = true; return(a); }