Пример #1
0
            /// <summary>
            /// Adds all leaving arcs, including 'finished' arc, if
            /// the node is final, from this node into the queue.
            /// </summary>
            public virtual void AddStartPaths(FST.Arc <T> node, T startOutput, bool allowEmptyString, Int32sRef input)
            {
                // De-dup NO_OUTPUT since it must be a singleton:
                if (startOutput.Equals(fst.Outputs.NoOutput))
                {
                    startOutput = fst.Outputs.NoOutput;
                }

                FSTPath <T> path = new FSTPath <T>(startOutput, node, input);

                fst.ReadFirstTargetArc(node, path.Arc, bytesReader);

                //System.out.println("add start paths");

                // Bootstrap: find the min starting arc
                while (true)
                {
                    if (allowEmptyString || path.Arc.Label != FST.END_LABEL)
                    {
                        AddIfCompetitive(path);
                    }
                    if (path.Arc.IsLast)
                    {
                        break;
                    }
                    fst.ReadNextArc(path.Arc, bytesReader);
                }
            }
Пример #2
0
        /// <summary>
        /// Returns the first exact match by traversing root arcs, starting from the
        /// arc <paramref name="rootArcIndex"/>.
        /// </summary>
        /// <param name="rootArcIndex">
        ///          The first root arc index in <see cref="rootArcs"/> to consider when
        ///          matching.
        /// </param>
        /// <param name="utf8">
        ///          The sequence of utf8 bytes to follow.
        /// </param>
        /// <returns> Returns the bucket number of the match or <code>-1</code> if no
        ///         match was found. </returns>
        private int GetExactMatchStartingFromRootArc(int rootArcIndex, BytesRef utf8)
        {
            // Get the UTF-8 bytes representation of the input key.
            try
            {
                FST.Arc <object> scratch   = new FST.Arc <object>();
                FST.BytesReader  fstReader = automaton.BytesReader;
                for (; rootArcIndex < rootArcs.Length; rootArcIndex++)
                {
                    FST.Arc <object> rootArc = rootArcs[rootArcIndex];
                    FST.Arc <object> arc     = scratch.CopyFrom(rootArc);

                    // Descend into the automaton using the key as prefix.
                    if (DescendWithPrefix(arc, utf8))
                    {
                        automaton.ReadFirstTargetArc(arc, arc, fstReader);
                        if (arc.Label == Lucene.Net.Util.Fst.FST.END_LABEL)
                        {
                            // Normalize prefix-encoded weight.
                            return(rootArc.Label);
                        }
                    }
                }
            }
            catch (IOException e)
            {
                // Should never happen, but anyway.
                throw new Exception(e.Message, e);
            }

            // No match.
            return(-1);
        }
Пример #3
0
        /// <summary>
        /// Cache the root node's output arcs starting with completions with the
        /// highest weights.
        /// </summary>
        private static FST.Arc <object>[] CacheRootArcs(FST <object> automaton)
        {
            try
            {
                IList <FST.Arc <object> > rootArcs  = new List <FST.Arc <object> >();
                FST.Arc <object>          arc       = automaton.GetFirstArc(new FST.Arc <object>());
                FST.BytesReader           fstReader = automaton.BytesReader;
                automaton.ReadFirstTargetArc(arc, arc, fstReader);
                while (true)
                {
                    rootArcs.Add((new FST.Arc <object>()).CopyFrom(arc));
                    if (arc.IsLast)
                    {
                        break;
                    }
                    automaton.ReadNextArc(arc, fstReader);
                }

                // we want highest weights first.
                return(rootArcs.Reverse().ToArray());
            }
            catch (IOException e)
            {
                throw new Exception(e.Message, e);
            }
        }
Пример #4
0
        /// <summary>
        /// Cache the root node's output arcs starting with completions with the
        /// highest weights.
        /// </summary>
        private static FST.Arc <object>[] CacheRootArcs(FST <object> automaton)
        {
            try
            {
                // LUCENENET specific: Using a stack rather than List, as we want the results in reverse
                Stack <FST.Arc <object> > rootArcs  = new Stack <FST.Arc <object> >();
                FST.Arc <object>          arc       = automaton.GetFirstArc(new FST.Arc <object>());
                FST.BytesReader           fstReader = automaton.GetBytesReader();
                automaton.ReadFirstTargetArc(arc, arc, fstReader);
                while (true)
                {
                    rootArcs.Push(new FST.Arc <object>().CopyFrom(arc));
                    if (arc.IsLast)
                    {
                        break;
                    }
                    automaton.ReadNextArc(arc, fstReader);
                }

                // we want highest weights first.
                return(rootArcs.ToArray());
            }
            catch (Exception e) when(e.IsIOException())
            {
                throw RuntimeException.Create(e);
            }
        }
Пример #5
0
        /// <summary>
        /// Returns the first exact match by traversing root arcs, starting from the
        /// arc <code>rootArcIndex</code>.
        /// </summary>
        /// <param name="rootArcIndex">
        ///          The first root arc index in <seealso cref="#rootArcs"/> to consider when
        ///          matching.
        /// </param>
        /// <param name="utf8">
        ///          The sequence of utf8 bytes to follow.
        /// </param>
        /// <returns> Returns the bucket number of the match or <code>-1</code> if no
        ///         match was found. </returns>
        private int GetExactMatchStartingFromRootArc(int rootArcIndex, BytesRef utf8)
        {
            // Get the UTF-8 bytes representation of the input key.
            try
            {
                //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
                //ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<Object> scratch = new org.apache.lucene.util.fst.FST.Arc<>();
                FST.Arc <object> scratch   = new FST.Arc <object>();
                FST.BytesReader  fstReader = automaton.BytesReader;
                for (; rootArcIndex < rootArcs.Length; rootArcIndex++)
                {
                    //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
                    //ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<Object> rootArc = rootArcs[rootArcIndex];
                    FST.Arc <object> rootArc = rootArcs[rootArcIndex];
                    //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
                    //ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<Object> arc = scratch.copyFrom(rootArc);
                    FST.Arc <object> arc = scratch.CopyFrom(rootArc);

                    // Descend into the automaton using the key as prefix.
                    if (descendWithPrefix(arc, utf8))
                    {
                        automaton.ReadFirstTargetArc(arc, arc, fstReader);
                        if (arc.Label == FST.END_LABEL)
                        {
                            // Normalize prefix-encoded weight.
                            return(rootArc.Label);
                        }
                    }
                }
            }
            catch (IOException e)
            {
                // Should never happen, but anyway.
                throw new Exception(e);
            }

            // No match.
            return(-1);
        }
Пример #6
0
        private T RandomAcceptedWord(FST <T> fst, IntsRef @in)
        {
            FST.Arc <T> arc = fst.GetFirstArc(new FST.Arc <T>());

            IList <FST.Arc <T> > arcs = new List <FST.Arc <T> >();

            @in.Length = 0;
            @in.Offset = 0;
            T NO_OUTPUT = fst.Outputs.NoOutput;
            T output    = NO_OUTPUT;

            FST.BytesReader fstReader = fst.BytesReader;

            while (true)
            {
                // read all arcs:
                fst.ReadFirstTargetArc(arc, arc, fstReader);
                arcs.Add((new FST.Arc <T>()).CopyFrom(arc));
                while (!arc.Last)
                {
                    fst.ReadNextArc(arc, fstReader);
                    arcs.Add((new FST.Arc <T>()).CopyFrom(arc));
                }

                // pick one
                arc = arcs[Random.Next(arcs.Count)];
                arcs.Clear();

                // accumulate output
                output = fst.Outputs.Add(output, arc.Output);

                // append label
                if (arc.Label == FST <T> .END_LABEL)
                {
                    break;
                }

                if (@in.Ints.Length == @in.Length)
                {
                    @in.Grow(1 + @in.Length);
                }
                @in.Ints[@in.Length++] = arc.Label;
            }

            return(output);
        }
Пример #7
0
        // Uncomment for debugging:

        /*
         * public static <T> void dotToFile(FST<T> fst, String filePath) throws IOException {
         * Writer w = new OutputStreamWriter(new FileOutputStream(filePath));
         * toDot(fst, w, true, true);
         * w.Dispose();
         * }
         */

        /// <summary>
        /// Reads the first arc greater or equal that the given label into the provided
        /// arc in place and returns it iff found, otherwise return <c>null</c>.
        /// </summary>
        /// <param name="label"> the label to ceil on </param>
        /// <param name="fst"> the fst to operate on </param>
        /// <param name="follow"> the arc to follow reading the label from </param>
        /// <param name="arc"> the arc to read into in place </param>
        /// <param name="in"> the fst's <see cref="FST.BytesReader"/> </param>
        public static FST.Arc <T> ReadCeilArc <T>(int label, FST <T> fst, FST.Arc <T> follow, FST.Arc <T> arc, FST.BytesReader @in)
        {
            // TODO maybe this is a useful in the FST class - we could simplify some other code like FSTEnum?
            if (label == FST.END_LABEL)
            {
                if (follow.IsFinal)
                {
                    if (follow.Target <= 0)
                    {
                        arc.Flags = (sbyte)FST.BIT_LAST_ARC;
                    }
                    else
                    {
                        arc.Flags = 0;
                        // NOTE: nextArc is a node (not an address!) in this case:
                        arc.NextArc = follow.Target;
                        arc.Node    = follow.Target;
                    }
                    arc.Output = follow.NextFinalOutput;
                    arc.Label  = FST.END_LABEL;
                    return(arc);
                }
                else
                {
                    return(null);
                }
            }

            if (!FST <T> .TargetHasArcs(follow))
            {
                return(null);
            }
            fst.ReadFirstTargetArc(follow, arc, @in);
            if (arc.BytesPerArc != 0 && arc.Label != FST.END_LABEL)
            {
                // Arcs are fixed array -- use binary search to find
                // the target.

                int low  = arc.ArcIdx;
                int high = arc.NumArcs - 1;
                int mid  = 0;
                // System.out.println("do arc array low=" + low + " high=" + high +
                // " targetLabel=" + targetLabel);
                while (low <= high)
                {
                    mid          = (int)((uint)(low + high) >> 1);
                    @in.Position = arc.PosArcsStart;
                    @in.SkipBytes(arc.BytesPerArc * mid + 1);
                    int midLabel = fst.ReadLabel(@in);
                    int cmp      = midLabel - label;
                    // System.out.println("  cycle low=" + low + " high=" + high + " mid=" +
                    // mid + " midLabel=" + midLabel + " cmp=" + cmp);
                    if (cmp < 0)
                    {
                        low = mid + 1;
                    }
                    else if (cmp > 0)
                    {
                        high = mid - 1;
                    }
                    else
                    {
                        arc.ArcIdx = mid - 1;
                        return(fst.ReadNextRealArc(arc, @in));
                    }
                }
                if (low == arc.NumArcs)
                {
                    // DEAD END!
                    return(null);
                }

                arc.ArcIdx = (low > high ? high : low);
                return(fst.ReadNextRealArc(arc, @in));
            }

            // Linear scan
            fst.ReadFirstRealTargetArc(follow.Target, arc, @in);

            while (true)
            {
                // System.out.println("  non-bs cycle");
                // TODO: we should fix this code to not have to create
                // object for the output of every arc we scan... only
                // for the matching arc, if found
                if (arc.Label >= label)
                {
                    // System.out.println("    found!");
                    return(arc);
                }
                else if (arc.IsLast)
                {
                    return(null);
                }
                else
                {
                    fst.ReadNextRealArc(arc, @in);
                }
            }
        }
Пример #8
0
        /// <summary>
        /// Rewinds enum state to match the shared prefix between
        /// current term and target term
        /// </summary>
        protected void RewindPrefix()
        {
            if (m_upto == 0)
            {
                //System.out.println("  init");
                m_upto = 1;
                m_fst.ReadFirstTargetArc(GetArc(0), GetArc(1), m_fstReader);
                return;
            }
            //System.out.println("  rewind upto=" + upto + " vs targetLength=" + targetLength);

            int currentLimit = m_upto;

            m_upto = 1;
            while (m_upto < currentLimit && m_upto <= m_targetLength + 1)
            {
                int cmp = CurrentLabel - TargetLabel;
                if (cmp < 0)
                {
                    // seek forward
                    //System.out.println("    seek fwd");
                    break;
                }
                else if (cmp > 0)
                {
                    // seek backwards -- reset this arc to the first arc
                    FST.Arc <T> arc = GetArc(m_upto);
                    m_fst.ReadFirstTargetArc(GetArc(m_upto - 1), arc, m_fstReader);
                    //System.out.println("    seek first arc");
                    break;
                }
                m_upto++;
            }
            //System.out.println("  fall through upto=" + upto);
        }