/// <summary> /// Returns a new (deterministic and minimal) automaton that accepts the union /// of the given collection of <seealso cref="BytesRef"/>s representing UTF-8 encoded /// strings. /// </summary> /// <param name="utf8Strings"> /// The input strings, UTF-8 encoded. The collection must be in sorted /// order. /// </param> /// <returns> An <seealso cref="Automaton"/> accepting all input strings. The resulting /// automaton is codepoint based (full unicode codepoints on /// transitions). </returns> public static Automaton MakeStringUnion(ICollection <BytesRef> utf8Strings) { if (utf8Strings.Count == 0) { return(MakeEmpty()); } else { return(DaciukMihovAutomatonBuilder.Build(utf8Strings)); } }
private CompiledAutomaton Build(params string[] strings) { List <BytesRef> terms = new List <BytesRef>(); foreach (string s in strings) { terms.Add(new BytesRef(s)); } terms.Sort(); Automaton a = DaciukMihovAutomatonBuilder.Build(terms); return(new CompiledAutomaton(a, true, false)); }
/// <summary> /// Build a minimal, deterministic automaton from a sorted list of <see cref="BytesRef"/> representing /// strings in UTF-8. These strings must be binary-sorted. /// </summary> public static Automaton Build(ICollection <BytesRef> input) { DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder(); CharsRef scratch = new CharsRef(); foreach (BytesRef b in input) { UnicodeUtil.UTF8toUTF16(b, scratch); builder.Add(scratch); } return(new Automaton { initial = Convert(builder.Complete(), new JCG.Dictionary <State, Lucene.Net.Util.Automaton.State>(IdentityEqualityComparer <State> .Default)), deterministic = true }); }
/// <summary> /// Build a minimal, deterministic automaton from a sorted list of <seealso cref="BytesRef"/> representing /// strings in UTF-8. These strings must be binary-sorted. /// </summary> public static Automaton Build(ICollection <BytesRef> input) { DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder(); CharsRef scratch = new CharsRef(); foreach (BytesRef b in input) { UnicodeUtil.UTF8toUTF16(b, scratch); builder.Add(scratch); } Automaton a = new Automaton(); a.initial = Convert(builder.Complete(), new IdentityHashMap <State, Lucene.Net.Util.Automaton.State>()); a.deterministic = true; return(a); }
/// <summary> /// Build a minimal, deterministic automaton from a sorted list of <seealso cref="BytesRef"/> representing /// strings in UTF-8. These strings must be binary-sorted. /// </summary> public static Automaton Build(ICollection<BytesRef> input) { DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder(); CharsRef scratch = new CharsRef(); foreach (BytesRef b in input) { UnicodeUtil.UTF8toUTF16(b, scratch); builder.Add(scratch); } Automaton a = new Automaton(); a.Initial = Convert(builder.Complete(), new IdentityHashMap<State, Lucene.Net.Util.Automaton.State>()); a.deterministic = true; return a; }