/// <summary> /// Returns the strings that can be produced from the given state, or /// false if more than <code>limit</code> strings are found. /// <code>limit</code><0 means "infinite". /// </summary> private static bool GetFiniteStrings(State s, HashSet <State> pathstates, HashSet <IntsRef> strings, IntsRef path, int limit) { pathstates.Add(s); foreach (Transition t in s.Transitions) { if (pathstates.Contains(t.To)) { return(false); } for (int n = t.Min_Renamed; n <= t.Max_Renamed; n++) { path.Grow(path.Length + 1); path.Ints[path.Length] = n; path.Length++; if (t.To.accept) { strings.Add(IntsRef.DeepCopyOf(path)); if (limit >= 0 && strings.Count > limit) { return(false); } } if (!GetFiniteStrings(t.To, pathstates, strings, path, limit)) { return(false); } path.Length--; } } pathstates.Remove(s); return(true); }
/// <summary> /// Increments the byte buffer to the next String in binary order after s that will not put /// the machine into a reject state. If such a string does not exist, returns /// false. /// /// The correctness of this method depends upon the automaton being deterministic, /// and having no transitions to dead states. /// </summary> /// <returns> true if more possible solutions exist for the DFA </returns> private bool NextString() { int state; int pos = 0; SavedStates.Grow(SeekBytesRef.Length + 1); int[] states = SavedStates.Ints; states[0] = RunAutomaton.InitialState; while (true) { CurGen++; Linear_Renamed = false; // walk the automaton until a character is rejected. for (state = states[pos]; pos < SeekBytesRef.Length; pos++) { Visited[state] = CurGen; int nextState = RunAutomaton.Step(state, SeekBytesRef.Bytes[pos] & 0xff); if (nextState == -1) { break; } states[pos + 1] = nextState; // we found a loop, record it for faster enumeration if ((Finite == false) && !Linear_Renamed && Visited[nextState] == CurGen) { Linear = pos; } state = nextState; } // take the useful portion, and the last non-reject state, and attempt to // append characters that will match. if (NextString(state, pos)) { return(true); } // no more solutions exist from this useful portion, backtrack else { if ((pos = Backtrack(pos)) < 0) // no more solutions at all { return(false); } int newState = RunAutomaton.Step(states[pos], SeekBytesRef.Bytes[pos] & 0xff); if (newState >= 0 && RunAutomaton.IsAccept(newState)) /* String is good to go as-is */ { return(true); } /* else advance further */ // TODO: paranoia? if we backtrack thru an infinite DFA, the loop detection is important! // for now, restart from scratch for all infinite DFAs if (Finite == false) { pos = 0; } } } }
/// <summary> /// Just takes unsigned byte values from the BytesRef and /// converts into an IntsRef. /// </summary> public static IntsRef ToIntsRef(BytesRef input, IntsRef scratch) { scratch.Grow(input.Length); for (int i = 0; i < input.Length; i++) { scratch.Ints[i] = input.Bytes[i + input.Offset] & 0xFF; } scratch.Length = input.Length; return(scratch); }
internal static IntsRef ToIntsRef(BytesRef br, IntsRef ir) { if (br.Length > ir.Ints.Length) { ir.Grow(br.Length); } for (int i = 0; i < br.Length; i++) { ir.Ints[i] = br.Bytes[br.Offset + i] & 0xFF; } ir.Length = br.Length; return(ir); }
/// <summary> /// Just maps each UTF16 unit (char) to the ints in an /// IntsRef. /// </summary> public static IntsRef ToUTF16(string s, IntsRef scratch) { int charLimit = s.Length; scratch.Offset = 0; scratch.Length = charLimit; scratch.Grow(charLimit); for (int idx = 0; idx < charLimit; idx++) { scratch.Ints[idx] = (int)s[idx]; } return(scratch); }
private T RandomAcceptedWord(FST <T> fst, IntsRef @in) { FST.Arc <T> arc = fst.GetFirstArc(new FST.Arc <T>()); IList <FST.Arc <T> > arcs = new List <FST.Arc <T> >(); @in.Length = 0; @in.Offset = 0; T NO_OUTPUT = fst.Outputs.NoOutput; T output = NO_OUTPUT; FST.BytesReader fstReader = fst.BytesReader; while (true) { // read all arcs: fst.ReadFirstTargetArc(arc, arc, fstReader); arcs.Add((new FST.Arc <T>()).CopyFrom(arc)); while (!arc.Last) { fst.ReadNextArc(arc, fstReader); arcs.Add((new FST.Arc <T>()).CopyFrom(arc)); } // pick one arc = arcs[Random.Next(arcs.Count)]; arcs.Clear(); // accumulate output output = fst.Outputs.Add(output, arc.Output); // append label if (arc.Label == FST <T> .END_LABEL) { break; } if (@in.Ints.Length == @in.Length) { @in.Grow(1 + @in.Length); } @in.Ints[@in.Length++] = arc.Label; } return(output); }
/// <summary> /// Decodes the Unicode codepoints from the provided /// CharSequence and places them in the provided scratch /// IntsRef, which must not be null, returning it. /// </summary> public static IntsRef ToUTF32(string s, IntsRef scratch) { int charIdx = 0; int intIdx = 0; int charLimit = s.Length; while (charIdx < charLimit) { scratch.Grow(intIdx + 1); int utf32 = Character.CodePointAt(s, charIdx); scratch.Ints[intIdx] = utf32; charIdx += Character.CharCount(utf32); intIdx++; } scratch.Length = intIdx; return(scratch); }
/// <summary> /// Decodes the Unicode codepoints from the provided /// char[] and places them in the provided scratch /// IntsRef, which must not be null, returning it. /// </summary> public static IntsRef ToUTF32(char[] s, int offset, int length, IntsRef scratch) { int charIdx = offset; int intIdx = 0; int charLimit = offset + length; while (charIdx < charLimit) { scratch.Grow(intIdx + 1); int utf32 = Character.CodePointAt(s, charIdx, charLimit); scratch.Ints[intIdx] = utf32; charIdx += Character.CharCount(utf32); intIdx++; } scratch.Length = intIdx; return(scratch); }
internal static IntsRef ToIntsRefUTF32(string s, IntsRef ir) { int charLength = s.Length; int charIdx = 0; int intIdx = 0; while (charIdx < charLength) { if (intIdx == ir.Ints.Length) { ir.Grow(intIdx + 1); } int utf32 = Character.CodePointAt(s, charIdx); ir.Ints[intIdx] = utf32; charIdx += Character.CharCount(utf32); intIdx++; } ir.Length = intIdx; return(ir); }
/// <summary> /// Returns the strings that can be produced from the given state, or /// false if more than <code>limit</code> strings are found. /// <code>limit</code><0 means "infinite". /// </summary> private static bool GetFiniteStrings(State s, HashSet<State> pathstates, HashSet<IntsRef> strings, IntsRef path, int limit) { pathstates.Add(s); foreach (Transition t in s.Transitions) { if (pathstates.Contains(t.To)) { return false; } for (int n = t.Min_Renamed; n <= t.Max_Renamed; n++) { path.Grow(path.Length + 1); path.Ints[path.Length] = n; path.Length++; if (t.To.accept) { strings.Add(IntsRef.DeepCopyOf(path)); if (limit >= 0 && strings.Count > limit) { return false; } } if (!GetFiniteStrings(t.To, pathstates, strings, path, limit)) { return false; } path.Length--; } } pathstates.Remove(s); return true; }
/// <summary> /// Expert: like <seealso cref="Util#getByOutput(FST, long)"/> except reusing /// BytesReader, initial and scratch Arc, and result. /// </summary> public static IntsRef GetByOutput(FST <long?> fst, long targetOutput, FST <long?> .BytesReader @in, FST <long?> .Arc <long?> arc, FST <long?> .Arc <long?> scratchArc, IntsRef result) { long output = arc.Output.Value; int upto = 0; //System.out.println("reverseLookup output=" + targetOutput); while (true) { //System.out.println("loop: output=" + output + " upto=" + upto + " arc=" + arc); if (arc.Final) { long finalOutput = output + arc.NextFinalOutput.Value; //System.out.println(" isFinal finalOutput=" + finalOutput); if (finalOutput == targetOutput) { result.Length = upto; //System.out.println(" found!"); return(result); } else if (finalOutput > targetOutput) { //System.out.println(" not found!"); return(null); } } if (FST <long?> .TargetHasArcs(arc)) { //System.out.println(" targetHasArcs"); if (result.Ints.Length == upto) { result.Grow(1 + upto); } fst.ReadFirstRealTargetArc(arc.Target, arc, @in); if (arc.BytesPerArc != 0) { int low = 0; int high = arc.NumArcs - 1; int mid = 0; //System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + " output=" + output); bool exact = false; while (low <= high) { mid = (int)((uint)(low + high) >> 1); @in.Position = arc.PosArcsStart; @in.SkipBytes(arc.BytesPerArc * mid); var flags = (sbyte)@in.ReadByte(); fst.ReadLabel(@in); long minArcOutput; if ((flags & FST <long> .BIT_ARC_HAS_OUTPUT) != 0) { long arcOutput = fst.Outputs.Read(@in).Value; minArcOutput = output + arcOutput; } else { minArcOutput = output; } if (minArcOutput == targetOutput) { exact = true; break; } else if (minArcOutput < targetOutput) { low = mid + 1; } else { high = mid - 1; } } if (high == -1) { return(null); } else if (exact) { arc.ArcIdx = mid - 1; } else { arc.ArcIdx = low - 2; } fst.ReadNextRealArc(arc, @in); result.Ints[upto++] = arc.Label; output += arc.Output.Value; } else { FST <long?> .Arc <long?> prevArc = null; while (true) { //System.out.println(" cycle label=" + arc.label + " output=" + arc.output); // this is the min output we'd hit if we follow // this arc: long minArcOutput = output + arc.Output.Value; if (minArcOutput == targetOutput) { // Recurse on this arc: //System.out.println(" match! break"); output = minArcOutput; result.Ints[upto++] = arc.Label; break; } else if (minArcOutput > targetOutput) { if (prevArc == null) { // Output doesn't exist return(null); } else { // Recurse on previous arc: arc.CopyFrom(prevArc); result.Ints[upto++] = arc.Label; output += arc.Output.Value; //System.out.println(" recurse prev label=" + (char) arc.label + " output=" + output); break; } } else if (arc.Last) { // Recurse on this arc: output = minArcOutput; //System.out.println(" recurse last label=" + (char) arc.label + " output=" + output); result.Ints[upto++] = arc.Label; break; } else { // Read next arc in this node: prevArc = scratchArc; prevArc.CopyFrom(arc); //System.out.println(" after copy label=" + (char) prevArc.label + " vs " + (char) arc.label); fst.ReadNextRealArc(arc, @in); } } } } else { //System.out.println(" no target arcs; not found!"); return(null); } } }
private void ProcessFacetFields(TaxonomyWriter taxoWriter, IDictionary <string, IList <FacetField> > byField, Document doc) { foreach (KeyValuePair <string, IList <FacetField> > ent in byField) { string indexFieldName = ent.Key; //System.out.println(" indexFieldName=" + indexFieldName + " fields=" + ent.getValue()); IntsRef ordinals = new IntsRef(32); foreach (FacetField facetField in ent.Value) { FacetsConfig.DimConfig ft = GetDimConfig(facetField.dim); if (facetField.path.Length > 1 && ft.Hierarchical == false) { throw new System.ArgumentException("dimension \"" + facetField.dim + "\" is not hierarchical yet has " + facetField.path.Length + " components"); } FacetLabel cp = new FacetLabel(facetField.dim, facetField.path); checkTaxoWriter(taxoWriter); int ordinal = taxoWriter.AddCategory(cp); if (ordinals.Length == ordinals.Ints.Length) { ordinals.Grow(ordinals.Length + 1); } ordinals.Ints[ordinals.Length++] = ordinal; //System.out.println("ords[" + (ordinals.length-1) + "]=" + ordinal); //System.out.println(" add cp=" + cp); if (ft.MultiValued && (ft.Hierarchical || ft.RequireDimCount)) { //System.out.println(" add parents"); // Add all parents too: int parent = taxoWriter.GetParent(ordinal); while (parent > 0) { if (ordinals.Ints.Length == ordinals.Length) { ordinals.Grow(ordinals.Length + 1); } ordinals.Ints[ordinals.Length++] = parent; parent = taxoWriter.GetParent(parent); } if (ft.RequireDimCount == false) { // Remove last (dimension) ord: ordinals.Length--; } } // Drill down: for (int i = 1; i <= cp.Length; i++) { doc.Add(new StringField(indexFieldName, PathToString(cp.Components, i), Field.Store.NO)); } } // Facet counts: // DocValues are considered stored fields: doc.Add(new BinaryDocValuesField(indexFieldName, DedupAndEncode(ordinals))); } }