C# (CSharp) ISequenceEncoder.Decode Examples

Programming Language: C# (CSharp)

Class/Type: ISequenceEncoder

Method/Function: Decode

Examples at hotexamples.com: 3

C# (CSharp) ISequenceEncoder.Decode - 3 examples found. These are the top rated real world C# (CSharp) examples of ISequenceEncoder.Decode extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Decode(3)

GetType(2)

Encode(1)

Frequently Used Methods

Decode (3)

GetType (2)

Encode (1)

Example #1

Show file

File: SequenceEncodersTest.cs Project: NightOwl888/Morfologik.Stemming

        private void assertRoundtripEncode(ISequenceEncoder coder, String srcString, String dstString)
        {
            ByteBuffer source = ByteBuffer.Wrap(Encoding.UTF8.GetBytes(srcString));
            ByteBuffer target = ByteBuffer.Wrap(Encoding.UTF8.GetBytes(dstString));

            ByteBuffer encoded = coder.Encode(ByteBuffer.Allocate(Random.Next(30)), source, target);
            ByteBuffer decoded = coder.Decode(ByteBuffer.Allocate(Random.Next(30)), source, encoded);

            if (!decoded.Equals(target))
            {
                Console.Out.WriteLine("src: " + BufferUtils.ToString(source, Encoding.UTF8));
                Console.Out.WriteLine("dst: " + BufferUtils.ToString(target, Encoding.UTF8));
                Console.Out.WriteLine("enc: " + BufferUtils.ToString(encoded, Encoding.UTF8));
                Console.Out.WriteLine("dec: " + BufferUtils.ToString(decoded, Encoding.UTF8));
                fail("Mismatch.");
            }
        }

Example #2

Show file

File: DictionaryEnumerator.cs Project: NightOwl888/Morfologik.Stemming

        private WordData Next()
        {
            ByteBuffer entryBuffer = entriesIter.Current;

            /*
             * Entries are typically: inflected<SEP>codedBase<SEP>tag so try to find this split.
             */
            byte[] ba     = entryBuffer.Array;
            int    bbSize = entryBuffer.Remaining;

            int sepPos;

            for (sepPos = 0; sepPos < bbSize; sepPos++)
            {
                if (ba[sepPos] == separator)
                {
                    break;
                }
            }

            if (sepPos == bbSize)
            {
                throw new Exception("Invalid dictionary " + "entry format (missing separator).");
            }

            inflectedBuffer = BufferUtils.ClearAndEnsureCapacity(inflectedBuffer, sepPos);
            //Array.Resize(ref inflectedBuffer, sepPos);
            //Array.Copy(ba, 0, inflectedBuffer, 0, sepPos);
            inflectedBuffer.Put(ba, 0, sepPos);
            inflectedBuffer.Flip();

            inflectedCharBuffer = BufferUtils.BytesToChars(decoder, inflectedBuffer, inflectedCharBuffer);
            entry.Update(inflectedBuffer, inflectedCharBuffer);

            temp = BufferUtils.ClearAndEnsureCapacity(temp, bbSize - sepPos);
            //Array.Resize(ref temp, bbSize - sepPos);
            sepPos++;
            //Array.Copy(ba, 0, temp, sepPos, bbSize - sepPos);
            temp.Put(ba, sepPos, bbSize - sepPos);
            temp.Flip();

            ba     = temp.Array;
            bbSize = temp.Remaining;

            /*
             * Find the next separator byte's position splitting word form and tag.
             */
#pragma warning disable 612, 618
            Debug.Assert(sequenceEncoder.PrefixBytes <= bbSize, sequenceEncoder.GetType() + " >? " + bbSize);
            sepPos = sequenceEncoder.PrefixBytes;
#pragma warning restore 612, 618
            for (; sepPos < bbSize; sepPos++)
            {
                if (ba[sepPos] == separator)
                {
                    break;
                }
            }

            /*
             * Decode the stem into stem buffer.
             */
            if (decodeStems)
            {
                entry.stemBuffer = sequenceEncoder.Decode(entry.stemBuffer,
                                                          inflectedBuffer,
                                                          ByteBuffer.Wrap(ba, 0, sepPos));
            }
            else
            {
                entry.stemBuffer = BufferUtils.ClearAndEnsureCapacity(entry.stemBuffer, sepPos);
                entry.stemBuffer.Put(ba, 0, sepPos);
                entry.stemBuffer.Flip();
            }

            // Skip separator character, if present.
            if (sepPos + 1 <= bbSize)
            {
                sepPos++;
            }

            /*
             * Decode the tag data.
             */
            entry.tagBuffer = BufferUtils.ClearAndEnsureCapacity(entry.tagBuffer, bbSize - sepPos);
            //Array.Resize(ref entry.tagBuffer, bbSize - sepPos);
            entry.tagBuffer.Put(ba, sepPos, bbSize - sepPos);
            entry.tagBuffer.Flip();

            return(entry);
        }

Example #3

Show file

File: DictionaryLookup.cs Project: NightOwl888/Morfologik.Stemming

        /// <summary>
        /// Searches the automaton for a symbol sequence equal to <paramref name="word"/>,
        /// followed by a separator. The result is a stem (decompressed accordingly
        /// to the dictionary's specification) and an optional tag data.
        /// </summary>
        public IList <WordData> Lookup(ICharSequence word)
        {
            byte separator = dictionaryMetadata.Separator;

#pragma warning disable 612, 618
            int prefixBytes = sequenceEncoder.PrefixBytes;
#pragma warning restore 612, 618

            if (dictionaryMetadata.InputConversionPairs.Any())
            {
                word = ApplyReplacements(word, dictionaryMetadata.InputConversionPairs);
            }

            // Reset the output list to zero length.
            formsList.Wrap(forms, 0, 0);

            // Encode word characters into bytes in the same encoding as the FSA's.
            charBuffer = BufferUtils.ClearAndEnsureCapacity(charBuffer, word.Length);
            for (int i = 0; i < word.Length; i++)
            {
                char chr = word[i];
                if (chr == separatorChar)
                {
                    // No valid input can contain the separator.
                    return(formsList);
                }
                charBuffer.Put(chr);
            }
            charBuffer.Flip();
            try
            {
                byteBuffer = BufferUtils.CharsToBytes(encoder, charBuffer, byteBuffer);
            }
            catch (UnmappableInputException)
            {
                // This should be a rare occurrence, but if it happens it means there is no way
                // the dictionary can contain the input word.
                return(formsList);
            }

            // Try to find a partial match in the dictionary.
            MatchResult match = matcher.Match(matchResult, byteBuffer
                                              .Array, 0, byteBuffer.Remaining, rootNode);

            if (match.Kind == MatchResult.SequenceIsAPrefix)
            {
                /*
                 * The entire sequence exists in the dictionary. A separator should
                 * be the next symbol.
                 */
                int arc = fsa.GetArc(match.Node, separator);

                /*
                 * The situation when the arc points to a final node should NEVER
                 * happen. After all, we want the word to have SOME base form.
                 */
                if (arc != 0 && !fsa.IsArcFinal(arc))
                {
                    // There is such a word in the dictionary. Return its base forms.
                    int formsCount = 0;

                    finalStatesIterator.RestartFrom(fsa.GetEndNode(arc));
                    while (finalStatesIterator.MoveNext())
                    {
                        ByteBuffer bb     = finalStatesIterator.Current;
                        byte[]     ba     = bb.Array;
                        int        bbSize = bb.Remaining;

                        if (formsCount >= forms.Length)
                        {
                            //forms = Arrays.CopyOf(forms, forms.Length + EXPAND_SIZE);
                            Array.Resize(ref forms, forms.Length + ExpandSize);
                            for (int k = 0; k < forms.Length; k++)
                            {
                                if (forms[k] == null)
                                {
                                    forms[k] = new WordData(decoder);
                                }
                            }
                        }

                        /*
                         * Now, expand the prefix/ suffix 'compression' and store
                         * the base form.
                         */
                        WordData wordData = forms[formsCount++];
                        if (!dictionaryMetadata.OutputConversionPairs.Any())
                        {
                            wordData.Update(byteBuffer, word);
                        }
                        else
                        {
                            wordData.Update(byteBuffer, ApplyReplacements(word, dictionaryMetadata.OutputConversionPairs));
                        }

                        /*
                         * Find the separator byte's position splitting the inflection instructions
                         * from the tag.
                         */
                        Debug.Assert(prefixBytes <= bbSize, sequenceEncoder.GetType() + " >? " + bbSize);
                        int sepPos;
                        for (sepPos = prefixBytes; sepPos < bbSize; sepPos++)
                        {
                            if (ba[sepPos] == separator)
                            {
                                break;
                            }
                        }

                        /*
                         * Decode the stem into stem buffer.
                         */
                        wordData.stemBuffer = sequenceEncoder.Decode(wordData.stemBuffer,
                                                                     byteBuffer,
                                                                     ByteBuffer.Wrap(ba, 0, sepPos));

                        // Skip separator character.
                        sepPos++;

                        /*
                         * Decode the tag data.
                         */
                        int tagSize = bbSize - sepPos;
                        if (tagSize > 0)
                        {
                            wordData.tagBuffer = BufferUtils.ClearAndEnsureCapacity(wordData.tagBuffer, tagSize);
                            wordData.tagBuffer.Put(ba, sepPos, tagSize);
                            wordData.tagBuffer.Flip();
                        }
                    }

                    formsList.Wrap(forms, 0, formsCount);
                }
            }
            else
            {
                /*
                 * this case is somewhat confusing: we should have hit the separator
                 * first... I don't really know how to deal with it at the time
                 * being.
                 */
            }
            return(formsList);
        }