Ejemplo n.º 1
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: static<T> void walk(util.fst.FST<T> fst) throws java.io.IOException
        internal static void walk <T>(FST <T> fst)
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.ArrayList<util.fst.FST.Arc<T>> queue = new java.util.ArrayList<>();
            List <FST.Arc <T> > queue = new List <FST.Arc <T> >();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.BitSet seen = new java.util.BitSet();
            BitArray seen = new BitArray();

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.fst.FST.BytesReader reader = fst.getBytesReader();
            FST.BytesReader reader = fst.BytesReader;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.fst.FST.Arc<T> startArc = fst.getFirstArc(new util.fst.FST.Arc<T>());
            FST.Arc <T> startArc = fst.getFirstArc(new FST.Arc <T>());
            queue.Add(startArc);
            while (queue.Count > 0)
            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final util.fst.FST.Arc<T> arc = queue.remove(0);
                FST.Arc <T> arc = queue.Remove(0);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final long node = arc.target;
                long node = arc.target;
                //System.out.println(arc);
                if (FST.targetHasArcs(arc) && !seen.Get((int)node))
                {
                    seen.Set((int)node, true);
                    fst.readFirstRealTargetArc(node, arc, reader);
                    while (true)
                    {
                        queue.Add((new FST.Arc <T>()).copyFrom(arc));
                        if (arc.Last)
                        {
                            break;
                        }
                        else
                        {
                            fst.readNextRealArc(arc, reader);
                        }
                    }
                }
            }
        }
Ejemplo n.º 2
0
        // Use the builder to create:
        private NormalizeCharMap(FST <CharsRef> map)
        {
            this.map = map;
            if (map != null)
            {
                try
                {
                    // Pre-cache root arcs:
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.CharsRef> scratchArc = new org.apache.lucene.util.fst.FST.Arc<>();
                    FST.Arc <CharsRef> scratchArc = new FST.Arc <CharsRef>();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.BytesReader fstReader = map.getBytesReader();
                    FST.BytesReader fstReader = map.BytesReader;
                    map.getFirstArc(scratchArc);
                    if (FST.targetHasArcs(scratchArc))
                    {
                        map.readFirstRealTargetArc(scratchArc.target, scratchArc, fstReader);
                        while (true)
                        {
                            Debug.Assert(scratchArc.label != FST.END_LABEL);
                            cachedRootArcs[Convert.ToChar((char)scratchArc.label)] = (new FST.Arc <CharsRef>()).copyFrom(scratchArc);
                            if (scratchArc.Last)
                            {
                                break;
                            }
                            map.readNextRealArc(scratchArc, fstReader);
                        }
                    }
                    //System.out.println("cached " + cachedRootArcs.size() + " root arcs");
                }
                catch (IOException ioe)
                {
                    // Bogus FST IOExceptions!!  (will never happen)
                    throw new Exception(ioe);
                }
            }
        }
Ejemplo n.º 3
0
 internal bool canGrow(Frame frame)   // can walk forward on both fst&fsa
 {
     return(frame.fsaState != -1 && FST.targetHasArcs(frame.fstArc));
 }
Ejemplo n.º 4
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public int read() throws java.io.IOException
        public override int read()
        {
            //System.out.println("\nread");
            while (true)
            {
                if (replacement != null && replacementPointer < replacement.length)
                {
                    //System.out.println("  return repl[" + replacementPointer + "]=" + replacement.chars[replacement.offset + replacementPointer]);
                    return(replacement.chars[replacement.offset + replacementPointer++]);
                }

                // TODO: a more efficient approach would be Aho/Corasick's
                // algorithm
                // (http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm)
                // or this generalizatio: www.cis.uni-muenchen.de/people/Schulz/Pub/dictle5.ps
                //
                // I think this would be (almost?) equivalent to 1) adding
                // epsilon arcs from all final nodes back to the init
                // node in the FST, 2) adding a .* (skip any char)
                // loop on the initial node, and 3) determinizing
                // that.  Then we would not have to restart matching
                // at each position.

                int      lastMatchLen = -1;
                CharsRef lastMatch    = null;

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int firstCH = buffer.get(inputOff);
                int firstCH = buffer.get(inputOff);
                if (firstCH != -1)
                {
                    FST.Arc <CharsRef> arc = cachedRootArcs[Convert.ToChar((char)firstCH)];
                    if (arc != null)
                    {
                        if (!FST.targetHasArcs(arc))
                        {
                            // Fast pass for single character match:
                            Debug.Assert(arc.Final);
                            lastMatchLen = 1;
                            lastMatch    = arc.output;
                        }
                        else
                        {
                            int      lookahead = 0;
                            CharsRef output    = arc.output;
                            while (true)
                            {
                                lookahead++;

                                if (arc.Final)
                                {
                                    // Match! (to node is final)
                                    lastMatchLen = lookahead;
                                    lastMatch    = outputs.add(output, arc.nextFinalOutput);
                                    // Greedy: keep searching to see if there's a
                                    // longer match...
                                }

                                if (!FST.targetHasArcs(arc))
                                {
                                    break;
                                }

                                int ch = buffer.get(inputOff + lookahead);
                                if (ch == -1)
                                {
                                    break;
                                }
                                if ((arc = map.findTargetArc(ch, arc, scratchArc, fstReader)) == null)
                                {
                                    // Dead end
                                    break;
                                }
                                output = outputs.add(output, arc.output);
                            }
                        }
                    }
                }

                if (lastMatch != null)
                {
                    inputOff += lastMatchLen;
                    //System.out.println("  match!  len=" + lastMatchLen + " repl=" + lastMatch);

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int diff = lastMatchLen - lastMatch.length;
                    int diff = lastMatchLen - lastMatch.length;

                    if (diff != 0)
                    {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int prevCumulativeDiff = getLastCumulativeDiff();
                        int prevCumulativeDiff = LastCumulativeDiff;
                        if (diff > 0)
                        {
                            // Replacement is shorter than matched input:
                            addOffCorrectMap(inputOff - diff - prevCumulativeDiff, prevCumulativeDiff + diff);
                        }
                        else
                        {
                            // Replacement is longer than matched input: remap
                            // the "extra" chars all back to the same input
                            // offset:
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int outputStart = inputOff - prevCumulativeDiff;
                            int outputStart = inputOff - prevCumulativeDiff;
                            for (int extraIDX = 0; extraIDX < -diff; extraIDX++)
                            {
                                addOffCorrectMap(outputStart + extraIDX, prevCumulativeDiff - extraIDX - 1);
                            }
                        }
                    }

                    replacement        = lastMatch;
                    replacementPointer = 0;
                }
                else
                {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int ret = buffer.get(inputOff);
                    int ret = buffer.get(inputOff);
                    if (ret != -1)
                    {
                        inputOff++;
                        buffer.freeBefore(inputOff);
                    }
                    return(ret);
                }
            }
        }