UnicodeUtil.UTF8toUTF16 C# (CSharp)のコード例

コード例 #1

0

ファイルを表示

//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private void readField(util.BytesRef type, index.FieldInfo fieldInfo, index.StoredFieldVisitor visitor) throws java.io.IOException
        private void readField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor)
        {
            readLine();
            Debug.Assert(StringHelper.StartsWith(scratch, VALUE));
            if (type == TYPE_STRING)
            {
                visitor.stringField(fieldInfo, new string(scratch.bytes, scratch.offset + VALUE.length, scratch.length - VALUE.length, StandardCharsets.UTF_8));
            }
            else if (type == TYPE_BINARY)
            {
                sbyte[] copy = new sbyte[scratch.length - VALUE.length];
                Array.Copy(scratch.bytes, scratch.offset + VALUE.length, copy, 0, copy.Length);
                visitor.binaryField(fieldInfo, copy);
            }
            else if (type == TYPE_INT)
            {
                UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset + VALUE.length, scratch.length - VALUE.length, scratchUTF16);
                visitor.intField(fieldInfo, Convert.ToInt32(scratchUTF16.ToString()));
            }
            else if (type == TYPE_LONG)
            {
                UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset + VALUE.length, scratch.length - VALUE.length, scratchUTF16);
                visitor.longField(fieldInfo, Convert.ToInt64(scratchUTF16.ToString()));
            }
            else if (type == TYPE_FLOAT)
            {
                UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset + VALUE.length, scratch.length - VALUE.length, scratchUTF16);
                visitor.floatField(fieldInfo, Convert.ToSingle(scratchUTF16.ToString()));
            }
            else if (type == TYPE_DOUBLE)
            {
                UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset + VALUE.length, scratch.length - VALUE.length, scratchUTF16);
                visitor.doubleField(fieldInfo, Convert.ToDouble(scratchUTF16.ToString()));
            }
        }

コード例 #2

0

ファイルを表示

ファイル: FSTCompletionLookup.cs プロジェクト: sycct/lucenenet

        public override IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, bool higherWeightsFirst, int num)
        {
            if (contexts != null)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }
            IList <FSTCompletion.Completion> completions;

            if (higherWeightsFirst)
            {
                completions = higherWeightsCompletion.DoLookup(key, num);
            }
            else
            {
                completions = normalCompletion.DoLookup(key, num);
            }

            List <LookupResult> results = new List <LookupResult>(completions.Count);
            CharsRef            spare   = new CharsRef();

            foreach (FSTCompletion.Completion c in completions)
            {
                spare.Grow(c.Utf8.Length);
                UnicodeUtil.UTF8toUTF16(c.Utf8, spare);
                results.Add(new LookupResult(spare.ToString(), c.Bucket));
            }
            return(results);
        }

コード例 #3

0

ファイルを表示

ファイル: CompressionTools.cs プロジェクト: mindis/Transformalize

 /// <summary>Decompress the byte array previously returned by
 /// compressString back into a String
 /// </summary>
 public static System.String DecompressString(byte[] value_Renamed)
 {
     UnicodeUtil.UTF16Result result = new UnicodeUtil.UTF16Result();
     byte[] bytes = Decompress(value_Renamed);
     UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.Length, result);
     return(new System.String(result.result, 0, result.length));
 }

コード例 #4

0

ファイルを表示

ファイル: TermBuffer.cs プロジェクト: mindis/Transformalize

        public void  Read(IndexInput input, FieldInfos fieldInfos)
        {
            this.term = null; // invalidate cache
            int start       = input.ReadVInt();
            int length      = input.ReadVInt();
            int totalLength = start + length;

            if (preUTF8Strings)
            {
                text.SetLength(totalLength);
                input.ReadChars(text.result, start, length);
            }
            else
            {
                if (dirty)
                {
                    // Fully convert all bytes since bytes is dirty
                    UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length);
                    UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text);
                    dirty = false;
                }
                else
                {
                    // Incrementally convert only the UTF8 bytes that are new:
                    bytes.SetLength(totalLength);
                    input.ReadBytes(bytes.result, start, length);
                    UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text);
                }
            }
            this.field = fieldInfos.FieldName(input.ReadVInt());
        }

コード例 #5

0

ファイルを表示

ファイル: AnalyzingSuggester.cs プロジェクト: proazr/lucenenet

        private LookupResult GetLookupResult(long?output1, BytesRef output2, CharsRef spare)
        {
            LookupResult result;

            if (hasPayloads)
            {
                int sepIndex = -1;
                for (int i = 0; i < output2.Length; i++)
                {
                    if (output2.Bytes[output2.Offset + i] == PAYLOAD_SEP)
                    {
                        sepIndex = i;
                        break;
                    }
                }
                Debug.Assert(sepIndex != -1);
                spare.Grow(sepIndex);

                int payloadLen = output2.Length - sepIndex - 1;
                UnicodeUtil.UTF8toUTF16(output2.Bytes, output2.Offset, sepIndex, spare);
                BytesRef payload = new BytesRef(payloadLen);
                Array.Copy(output2.Bytes, sepIndex + 1, payload.Bytes, 0, payloadLen);
                payload.Length = payloadLen;
                result         = new LookupResult(spare.ToString(), DecodeWeight(output1.GetValueOrDefault()), payload);
            }
            else
            {
                spare.Grow(output2.Length);
                UnicodeUtil.UTF8toUTF16(output2, spare);
                result = new LookupResult(spare.ToString(), DecodeWeight(output1.GetValueOrDefault()));
            }

            return(result);
        }

コード例 #6

0

ファイルを表示

        public override void Build(IInputIterator tfit)
        {
            if (tfit.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (tfit.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            root = new TernaryTreeNode();
            // buffer first
#pragma warning disable 612, 618
            if (tfit.Comparer != BytesRef.UTF8SortedAsUTF16Comparer)
            {
                // make sure it's sorted and the comparer uses UTF16 sort order
                tfit = new SortedInputIterator(tfit, BytesRef.UTF8SortedAsUTF16Comparer);
            }
#pragma warning restore 612, 618

            List <string> tokens = new List <string>();
            List <object> vals   = new List <object>();
            BytesRef      spare;
            CharsRef      charsSpare = new CharsRef();
            while ((spare = tfit.Next()) != null)
            {
                charsSpare.Grow(spare.Length);
                UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare);
                tokens.Add(charsSpare.ToString());
                vals.Add(Convert.ToInt64(tfit.Weight));
            }
            autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root);
        }

コード例 #7

0

ファイルを表示

        public override void Build(InputIterator tfit)
        {
            if (tfit.HasPayloads)
            {
                throw new ArgumentException("this suggester doesn't support payloads");
            }
            if (tfit.Comparator != null)
            {
                // make sure it's unsorted
                // WTF - this could result in yet another sorted iteration....
                tfit = new UnsortedInputIterator(tfit);
            }
            if (tfit.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            count = 0;
            trie  = new JaspellTernarySearchTrie();
            trie.MatchAlmostDiff = editDistance;
            BytesRef spare;

            CharsRef charsSpare = new CharsRef();

            while ((spare = tfit.Next()) != null)
            {
                long weight = tfit.Weight;
                if (spare.Length == 0)
                {
                    continue;
                }
                charsSpare.Grow(spare.Length);
                UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare);
                trie.Put(charsSpare.ToString(), Convert.ToInt64(weight));
            }
        }

コード例 #8

0

ファイルを表示

ファイル: MoreLikeThis.cs プロジェクト: sycct/lucenenet

        /// <summary>
        /// Adds terms and frequencies found in vector into the <see cref="T:IDictionary{string, Int}"/> <paramref name="termFreqMap"/>
        /// </summary>
        /// <param name="termFreqMap"> a <see cref="T:IDictionary{string, Int}"/> of terms and their frequencies </param>
        /// <param name="vector"> List of terms and their frequencies for a doc/field </param>
        private void AddTermFrequencies(IDictionary <string, Int32> termFreqMap, Terms vector)
        {
            var      termsEnum = vector.GetIterator(null);
            var      spare     = new CharsRef();
            BytesRef text;

            while ((text = termsEnum.Next()) != null)
            {
                UnicodeUtil.UTF8toUTF16(text, spare);
                var term = spare.ToString();
                if (IsNoiseWord(term))
                {
                    continue;
                }
                var freq = (int)termsEnum.TotalTermFreq;

                // increment frequency
                Int32 cnt;
                if (!termFreqMap.TryGetValue(term, out cnt))
                {
                    cnt = new Int32();
                    termFreqMap[term] = cnt;
                    cnt.x             = freq;
                }
                else
                {
                    cnt.x += freq;
                }
            }
        }

コード例 #9

0

ファイルを表示

ファイル: StemmerOverrideFilter.cs プロジェクト: ywscr/lucenenet

 public override bool IncrementToken()
 {
     if (m_input.IncrementToken())
     {
         if (fstReader == null)
         {
             // No overrides
             return(true);
         }
         if (!keywordAtt.IsKeyword) // don't muck with already-keyworded terms
         {
             BytesRef stem = stemmerOverrideMap.Get(termAtt.Buffer, termAtt.Length, scratchArc, fstReader);
             if (stem != null)
             {
                 char[] buffer = spare.Chars = termAtt.Buffer;
                 UnicodeUtil.UTF8toUTF16(stem.Bytes, stem.Offset, stem.Length, spare);
                 if (spare.Chars != buffer)
                 {
                     termAtt.CopyBuffer(spare.Chars, spare.Offset, spare.Length);
                 }
                 termAtt.Length       = spare.Length;
                 keywordAtt.IsKeyword = true;
             }
         }
         return(true);
     }
     else
     {
         return(false);
     }
 }

コード例 #10

0

ファイルを表示

ファイル: StemmerOverrideFilter.cs プロジェクト: leotohill/lucene.net

//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
        {
            if (input.incrementToken())
            {
                if (fstReader == null)
                {
                    // No overrides
                    return(true);
                }
                if (!keywordAtt.Keyword)   // don't muck with already-keyworded terms
                {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.util.BytesRef stem = stemmerOverrideMap.get(termAtt.buffer(), termAtt.length(), scratchArc, fstReader);
                    BytesRef stem = stemmerOverrideMap.get(termAtt.buffer(), termAtt.length(), scratchArc, fstReader);
                    if (stem != null)
                    {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final char[] buffer = spare.chars = termAtt.buffer();
                        char[] buffer = spare.chars = termAtt.buffer();
                        UnicodeUtil.UTF8toUTF16(stem.bytes, stem.offset, stem.length, spare);
                        if (spare.chars != buffer)
                        {
                            termAtt.copyBuffer(spare.chars, spare.offset, spare.length);
                        }
                        termAtt.Length     = spare.length;
                        keywordAtt.Keyword = true;
                    }
                }
                return(true);
            }
            else
            {
                return(false);
            }
        }

コード例 #11

0

ファイルを表示

        public virtual void TestRandomUnicodeStrings()
        {
            char[] buffer   = new char[20];
            char[] expected = new char[20];

            BytesRef utf8  = new BytesRef(20);
            CharsRef utf16 = new CharsRef(20);

            int num = AtLeast(100000);

            for (int iter = 0; iter < num; iter++)
            {
                bool hasIllegal = FillUnicode(buffer, expected, 0, 20);

                UnicodeUtil.UTF16toUTF8(buffer, 0, 20, utf8);
                if (!hasIllegal)
                {
#pragma warning disable 612, 618
                    var b = (new string(buffer, 0, 20)).GetBytes(IOUtils.CHARSET_UTF_8);
#pragma warning restore 612, 618
                    Assert.AreEqual(b.Length, utf8.Length);
                    for (int i = 0; i < b.Length; i++)
                    {
                        Assert.AreEqual(b[i], utf8.Bytes[i]);
                    }
                }

                UnicodeUtil.UTF8toUTF16(utf8.Bytes, 0, utf8.Length, utf16);
                Assert.AreEqual(utf16.Length, 20);
                for (int i = 0; i < 20; i++)
                {
                    Assert.AreEqual(expected[i], utf16.Chars[i]);
                }
            }
        }

コード例 #12

0

ファイルを表示

        /// <summary>
        /// Creates a new <see cref="QueryAutoStopWordAnalyzer"/> with stopwords calculated for the
        /// given selection of fields from terms with a document frequency greater than
        /// the given <paramref name="maxDocFreq"/>
        /// </summary>
        /// <param name="matchVersion"> Version to be used in <see cref="StopFilter"/> </param>
        /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
        /// <param name="indexReader"> <see cref="IndexReader"/> to identify the stopwords from </param>
        /// <param name="fields"> Selection of fields to calculate stopwords for </param>
        /// <param name="maxDocFreq"> Document frequency terms should be above in order to be stopwords </param>
        /// <exception cref="IOException"> Can be thrown while reading from the <see cref="IndexReader"/> </exception>
        public QueryAutoStopWordAnalyzer(LuceneVersion matchVersion, Analyzer @delegate, IndexReader indexReader, ICollection <string> fields, int maxDocFreq)
            : base(@delegate.Strategy)
        {
            this.matchVersion = matchVersion;
            this.@delegate    = @delegate;

            foreach (string field in fields)
            {
                var      stopWords = new JCG.HashSet <string>();
                Terms    terms     = MultiFields.GetTerms(indexReader, field);
                CharsRef spare     = new CharsRef();
                if (terms != null)
                {
                    TermsEnum te = terms.GetEnumerator();
                    while (te.MoveNext())
                    {
                        if (te.DocFreq > maxDocFreq)
                        {
                            UnicodeUtil.UTF8toUTF16(te.Term, spare);
                            stopWords.Add(spare.ToString());
                        }
                    }
                }
                stopWordsPerField[field] = stopWords;
            }
        }

コード例 #13

0

ファイルを表示

        /// <summary>
        /// Decompress the <see cref="byte"/> array previously returned by
        /// <see cref="CompressString(string)"/> back into a <see cref="string"/>
        /// </summary>
        public static string DecompressString(byte[] value, int offset, int length)
        {
            byte[]   bytes  = Decompress(value, offset, length);
            CharsRef result = new CharsRef(bytes.Length);

            UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.Length, result);
            return(new string(result.Chars, 0, result.Length));
        }

コード例 #14

0

ファイルを表示

        // Currently used only by assert statement
        private int CompareToLastTerm(int fieldNumber, BytesRef term)
        {
            if (lastFieldNumber != fieldNumber)
            {
                int cmp = FieldName(fieldInfos, lastFieldNumber).CompareToOrdinal(FieldName(fieldInfos, fieldNumber));
                // If there is a field named "" (empty string) then we
                // will get 0 on this comparison, yet, it's "OK".  But
                // it's not OK if two different field numbers map to
                // the same name.
                if (cmp != 0 || lastFieldNumber != -1)
                {
                    return(cmp);
                }
            }

            scratchBytes.CopyBytes(term);
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(lastTerm.Offset == 0);
            }
            UnicodeUtil.UTF8toUTF16(lastTerm.Bytes, 0, lastTerm.Length, utf16Result1);

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(scratchBytes.Offset == 0);
            }
            UnicodeUtil.UTF8toUTF16(scratchBytes.Bytes, 0, scratchBytes.Length, utf16Result2);

            int len;

            if (utf16Result1.Length < utf16Result2.Length)
            {
                len = utf16Result1.Length;
            }
            else
            {
                len = utf16Result2.Length;
            }

            for (int i = 0; i < len; i++)
            {
                char ch1 = utf16Result1.Chars[i];
                char ch2 = utf16Result2.Chars[i];
                if (ch1 != ch2)
                {
                    return(ch1 - ch2);
                }
            }
            if (utf16Result1.Length == 0 && lastFieldNumber == -1)
            {
                // If there is a field named "" (empty string) with a term text of "" (empty string) then we
                // will get 0 on this comparison, yet, it's "OK".
                return(-1);
            }
            return(utf16Result1.Length - utf16Result2.Length);
        }

コード例 #15

0

ファイルを表示

ファイル: DocTermsIndexDocValues.cs プロジェクト: wow64bb/YAFNET

 public override string StrVal(int doc)
 {
     m_termsIndex.Get(doc, m_spare);
     if (m_spare.Length == 0)
     {
         return(null);
     }
     UnicodeUtil.UTF8toUTF16(m_spare, m_spareChars);
     return(m_spareChars.ToString());
 }

コード例 #16

0

ファイルを表示

ファイル: SimpleTextFieldsReader.cs プロジェクト: segovia/lucenenet

            public override int NextPosition()
            {
                int pos;

                if (_readPositions)
                {
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS), "got line=" + _scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.POS.Length, _scratch.Length - SimpleTextFieldsWriter.POS.Length,
                                            _scratchUtf162);
                    pos = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                }
                else
                {
                    pos = -1;
                }

                if (_readOffsets)
                {
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET), "got line=" + _scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.START_OFFSET.Length,
                                            _scratch.Length - SimpleTextFieldsWriter.START_OFFSET.Length, _scratchUtf162);
                    _startOffset = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET), "got line=" + _scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.END_OFFSET.Length,
                                            _scratch.Length - SimpleTextFieldsWriter.END_OFFSET.Length, _scratchUtf162);
                    _endOffset = ArrayUtil.ParseInt32(_scratchUtf162.Chars, 0, _scratchUtf162.Length);
                }

                long fp = _in.GetFilePointer();

                SimpleTextUtil.ReadLine(_in, _scratch);
                if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                {
                    int len = _scratch.Length - SimpleTextFieldsWriter.PAYLOAD.Length;
                    if (_scratch2.Bytes.Length < len)
                    {
                        _scratch2.Grow(len);
                    }
                    Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.PAYLOAD.Length, _scratch2.Bytes, 0, len);
                    _scratch2.Length = len;
                    _payload         = _scratch2;
                }
                else
                {
                    _payload = null;
                    _in.Seek(fp);
                }
                return(pos);
            }

コード例 #17

0

ファイルを表示

            public override int NextPosition()
            {
                int pos;

                if (readPositions)
                {
                    SimpleTextUtil.ReadLine(@in, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, POS), "got line=" + scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.Offset + POS.length, scratch.Length - POS.length,
                                            scratchUTF16_2);
                    pos = ArrayUtil.ParseInt(scratchUTF16_2.Chars, 0, scratchUTF16_2.length);
                }
                else
                {
                    pos = -1;
                }

                if (readOffsets)
                {
                    SimpleTextUtil.ReadLine(@in, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, START_OFFSET), "got line=" + scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.Offset + START_OFFSET.length,
                                            scratch.Length - START_OFFSET.length, scratchUTF16_2);
                    startOffset_Renamed = ArrayUtil.ParseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
                    SimpleTextUtil.ReadLine(@in, scratch);
                    Debug.Assert(StringHelper.StartsWith(scratch, END_OFFSET), "got line=" + scratch.Utf8ToString());
                    UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + END_OFFSET.length,
                                            scratch.Length - END_OFFSET.length, scratchUTF16_2);
                    endOffset_Renamed = ArrayUtil.ParseInt(scratchUTF16_2.Chars, 0, scratchUTF16_2.length);
                }

                long fp = @in.FilePointer;

                SimpleTextUtil.ReadLine(@in, scratch);
                if (StringHelper.StartsWith(scratch, PAYLOAD))
                {
                    int len = scratch.Length - PAYLOAD.length;
                    if (scratch2.Bytes.Length < len)
                    {
                        scratch2.Grow(len);
                    }
                    Array.Copy(scratch.Bytes, PAYLOAD.length, scratch2.Bytes, 0, len);
                    scratch2.Length = len;
                    payload         = scratch2;
                }
                else
                {
                    payload = null;
                    @in.Seek(fp);
                }
                return(pos);
            }

コード例 #18

0

ファイルを表示

        /// <summary>
        /// Build a minimal, deterministic automaton from a sorted list of <see cref="BytesRef"/> representing
        /// strings in UTF-8. These strings must be binary-sorted.
        /// </summary>
        public static Automaton Build(ICollection <BytesRef> input)
        {
            DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();

            CharsRef scratch = new CharsRef();

            foreach (BytesRef b in input)
            {
                UnicodeUtil.UTF8toUTF16(b, scratch);
                builder.Add(scratch);
            }

            return(new Automaton
            {
                initial = Convert(builder.Complete(), new JCG.Dictionary <State, Lucene.Net.Util.Automaton.State>(IdentityEqualityComparer <State> .Default)),
                deterministic = true
            });
        }

コード例 #19

0

ファイルを表示

        /// <summary>
        /// Build a minimal, deterministic automaton from a sorted list of <seealso cref="BytesRef"/> representing
        /// strings in UTF-8. These strings must be binary-sorted.
        /// </summary>
        public static Automaton Build(ICollection <BytesRef> input)
        {
            DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();

            CharsRef scratch = new CharsRef();

            foreach (BytesRef b in input)
            {
                UnicodeUtil.UTF8toUTF16(b, scratch);
                builder.Add(scratch);
            }

            Automaton a = new Automaton();

            a.initial       = Convert(builder.Complete(), new IdentityHashMap <State, Lucene.Net.Util.Automaton.State>());
            a.deterministic = true;
            return(a);
        }

コード例 #20

0

ファイルを表示

 private void ReadField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor)
 {
     ReadLine();
     if (Debugging.AssertsEnabled)
     {
         Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE));
     }
     if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_STRING))
     {
         visitor.StringField(fieldInfo,
                             Encoding.UTF8.GetString(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length,
                                                     _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length));
     }
     else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_BINARY))
     {
         var copy = new byte[_scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length];
         Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, copy, 0, copy.Length);
         visitor.BinaryField(fieldInfo, copy);
     }
     else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_INT))
     {
         UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                 _scratchUtf16);
         visitor.Int32Field(fieldInfo, J2N.Numerics.Int32.Parse(_scratchUtf16.ToString(), NumberFormatInfo.InvariantInfo));
     }
     else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_LONG))
     {
         UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                 _scratchUtf16);
         visitor.Int64Field(fieldInfo, J2N.Numerics.Int64.Parse(_scratchUtf16.ToString(), NumberFormatInfo.InvariantInfo));
     }
     else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_FLOAT))
     {
         UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                 _scratchUtf16);
         visitor.SingleField(fieldInfo, J2N.Numerics.Single.Parse(_scratchUtf16.ToString(), NumberStyle.Float, NumberFormatInfo.InvariantInfo));
     }
     else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_DOUBLE))
     {
         UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                 _scratchUtf16);
         visitor.DoubleField(fieldInfo, J2N.Numerics.Double.Parse(_scratchUtf16.ToString(), NumberStyle.Float, NumberFormatInfo.InvariantInfo));
     }
 }

コード例 #21

0

ファイルを表示

        public override void Build(IInputEnumerator enumerator)
        {
            // LUCENENET: Added guard clause for null
            if (enumerator is null)
            {
                throw new ArgumentNullException(nameof(enumerator));
            }

            if (enumerator.HasPayloads)
            {
                throw new ArgumentException("this suggester doesn't support payloads");
            }
            if (enumerator.Comparer != null)
            {
                // make sure it's unsorted
                // WTF - this could result in yet another sorted iteration....
                enumerator = new UnsortedInputEnumerator(enumerator);
            }
            if (enumerator.HasContexts)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }
            count = 0;
            trie  = new JaspellTernarySearchTrie {
                MatchAlmostDiff = editDistance
            };
            BytesRef spare;

            var charsSpare = new CharsRef();

            while (enumerator.MoveNext())
            {
                spare = enumerator.Current;
                long weight = enumerator.Weight;
                if (spare.Length == 0)
                {
                    continue;
                }
                charsSpare.Grow(spare.Length);
                UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare);
                trie.Put(charsSpare.ToString(), weight);
            }
        }

コード例 #22

0

ファイルを表示

        public virtual void TestAllUnicodeChars()
        {
            BytesRef utf8  = new BytesRef(10);
            CharsRef utf16 = new CharsRef(10);

            char[] chars = new char[2];
            for (int ch = 0; ch < 0x0010FFFF; ch++)
            {
                if (ch == 0xd800)
                // Skip invalid code points
                {
                    ch = 0xe000;
                }

                int len = 0;
                if (ch <= 0xffff)
                {
                    chars[len++] = (char)ch;
                }
                else
                {
                    chars[len++] = (char)(((ch - 0x0010000) >> 10) + UnicodeUtil.UNI_SUR_HIGH_START);
                    chars[len++] = (char)(((ch - 0x0010000) & 0x3FFL) + UnicodeUtil.UNI_SUR_LOW_START);
                }

                UnicodeUtil.UTF16toUTF8(chars, 0, len, utf8);

                string s1 = new string(chars, 0, len);
                string s2 = Encoding.UTF8.GetString(utf8.Bytes, utf8.Offset, utf8.Length);
                Assert.AreEqual(s1, s2, "codepoint " + ch);

                UnicodeUtil.UTF8toUTF16(utf8.Bytes, 0, utf8.Length, utf16);
                Assert.AreEqual(s1, new string(utf16.Chars, 0, utf16.Length), "codepoint " + ch);

                var b = s1.GetBytes(Encoding.UTF8);
                Assert.AreEqual(utf8.Length, b.Length);
                for (int j = 0; j < utf8.Length; j++)
                {
                    Assert.AreEqual(utf8.Bytes[j], b[j]);
                }
            }
        }

コード例 #23

0

ファイルを表示

 private void ReadField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor)
 {
     ReadLine();
     Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE));
     if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_STRING))
     {
         visitor.StringField(fieldInfo,
                             Encoding.UTF8.GetString(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length,
                                                     _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length));
     }
     else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_BINARY))
     {
         var copy = new byte[_scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length];
         Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, copy, 0, copy.Length);
         visitor.BinaryField(fieldInfo, copy);
     }
     else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_INT))
     {
         UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                 _scratchUtf16);
         visitor.Int32Field(fieldInfo, Convert.ToInt32(_scratchUtf16.ToString(), CultureInfo.InvariantCulture));
     }
     else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_LONG))
     {
         UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                 _scratchUtf16);
         visitor.Int64Field(fieldInfo, Convert.ToInt64(_scratchUtf16.ToString(), CultureInfo.InvariantCulture));
     }
     else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_FLOAT))
     {
         UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                 _scratchUtf16);
         visitor.SingleField(fieldInfo, Convert.ToSingle(_scratchUtf16.ToString(), CultureInfo.InvariantCulture));
     }
     else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_DOUBLE))
     {
         UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
                                 _scratchUtf16);
         visitor.DoubleField(fieldInfo, Convert.ToDouble(_scratchUtf16.ToString(), CultureInfo.InvariantCulture));
     }
 }

コード例 #24

0

ファイルを表示

ファイル: TSTLookup.cs プロジェクト: ywscr/lucenenet

        public override void Build(IInputEnumerator enumerator)
        {
            // LUCENENT: Added guard clause for null
            if (enumerator is null)
            {
                throw new ArgumentNullException(nameof(enumerator));
            }

            if (enumerator.HasPayloads)
            {
                throw new ArgumentException("this suggester doesn't support payloads");
            }
            if (enumerator.HasContexts)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }
            root = new TernaryTreeNode();
            // buffer first
#pragma warning disable 612, 618
            if (enumerator.Comparer != BytesRef.UTF8SortedAsUTF16Comparer)
            {
                // make sure it's sorted and the comparer uses UTF16 sort order
                enumerator = new SortedInputEnumerator(enumerator, BytesRef.UTF8SortedAsUTF16Comparer);
            }
#pragma warning restore 612, 618

            JCG.List <string> tokens = new JCG.List <string>();
            JCG.List <object> vals   = new JCG.List <object>();
            BytesRef          spare;
            CharsRef          charsSpare = new CharsRef();
            while (enumerator.MoveNext())
            {
                spare = enumerator.Current;
                charsSpare.Grow(spare.Length);
                UnicodeUtil.UTF8toUTF16(spare.Bytes, spare.Offset, spare.Length, charsSpare);
                tokens.Add(charsSpare.ToString());
                vals.Add(enumerator.Weight);
            }
            autocomplete.BalancedTree(tokens.ToArray(), vals.ToArray(), 0, tokens.Count - 1, root);
        }

コード例 #25

0

ファイルを表示

ファイル: TermInfosWriter.cs プロジェクト: mindis/Transformalize

        // Currently used only by assert statement
        private int CompareToLastTerm(int fieldNumber, byte[] termBytes, int termBytesLength)
        {
            if (lastFieldNumber != fieldNumber)
            {
                int cmp = String.CompareOrdinal(fieldInfos.FieldName(lastFieldNumber), fieldInfos.FieldName(fieldNumber));
                // If there is a field named "" (empty string) then we
                // will get 0 on this comparison, yet, it's "OK".  But
                // it's not OK if two different field numbers map to
                // the same name.
                if (cmp != 0 || lastFieldNumber != -1)
                {
                    return(cmp);
                }
            }

            UnicodeUtil.UTF8toUTF16(lastTermBytes, 0, lastTermBytesLength, utf16Result1);
            UnicodeUtil.UTF8toUTF16(termBytes, 0, termBytesLength, utf16Result2);
            int len;

            if (utf16Result1.length < utf16Result2.length)
            {
                len = utf16Result1.length;
            }
            else
            {
                len = utf16Result2.length;
            }

            for (int i = 0; i < len; i++)
            {
                char ch1 = utf16Result1.result[i];
                char ch2 = utf16Result2.result[i];
                if (ch1 != ch2)
                {
                    return(ch1 - ch2);
                }
            }
            return(utf16Result1.length - utf16Result2.length);
        }

コード例 #26

0

ファイルを表示

ファイル: MoreLikeThis.cs プロジェクト: kushanp/lucene.net

        /// <summary>
        /// Adds terms and frequencies found in vector into the Map termFreqMap
        /// </summary>
        /// <param name="termFreqMap"> a Map of terms and their frequencies </param>
        /// <param name="vector"> List of terms and their frequencies for a doc/field </param>
        private void AddTermFrequencies(IDictionary <string, Int> termFreqMap, Terms vector)
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.index.TermsEnum termsEnum = vector.iterator(null);
            TermsEnum termsEnum = vector.Iterator(null);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.util.CharsRef spare = new org.apache.lucene.util.CharsRef();
            CharsRef spare = new CharsRef();
            BytesRef text;

            while ((text = termsEnum.Next()) != null)
            {
                UnicodeUtil.UTF8toUTF16(text, spare);
                //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
                //ORIGINAL LINE: final String term = spare.toString();
                string term = spare.ToString();
                if (IsNoiseWord(term))
                {
                    continue;
                }
                //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
                //ORIGINAL LINE: final int freq = (int) termsEnum.totalTermFreq();
                int freq = (int)termsEnum.TotalTermFreq();

                // increment frequency
                Int cnt = termFreqMap[term];
                if (cnt == null)
                {
                    cnt = new Int();
                    termFreqMap[term] = cnt;
                    cnt.x             = freq;
                }
                else
                {
                    cnt.x += freq;
                }
            }
        }

コード例 #27

0

ファイルを表示

        /// <summary>
        /// Provide spelling corrections based on several parameters.
        /// </summary>
        /// <param name="term"> The term to suggest spelling corrections for </param>
        /// <param name="numSug"> The maximum number of spelling corrections </param>
        /// <param name="ir"> The index reader to fetch the candidate spelling corrections from </param>
        /// <param name="docfreq"> The minimum document frequency a potential suggestion need to have in order to be included </param>
        /// <param name="editDistance"> The maximum edit distance candidates are allowed to have </param>
        /// <param name="accuracy"> The minimum accuracy a suggested spelling correction needs to have in order to be included </param>
        /// <param name="spare"> a chars scratch </param>
        /// <returns> a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order. </returns>
        /// <exception cref="System.IO.IOException"> If I/O related errors occur </exception>
        protected internal virtual IEnumerable <ScoreTerm> SuggestSimilar(Term term, int numSug, IndexReader ir,
                                                                          int docfreq, int editDistance, float accuracy, CharsRef spare)
        {
            var atts = new AttributeSource();
            IMaxNonCompetitiveBoostAttribute maxBoostAtt = atts.AddAttribute <IMaxNonCompetitiveBoostAttribute>();
            Terms terms = MultiFields.GetTerms(ir, term.Field);

            if (terms == null)
            {
                return(new List <ScoreTerm>());
            }
            FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.Max(minPrefix, editDistance - 1), true);

            var stQueue = new Support.PriorityQueue <ScoreTerm>();

            BytesRef        queryTerm = new BytesRef(term.Text());
            BytesRef        candidateTerm;
            ScoreTerm       st       = new ScoreTerm();
            IBoostAttribute boostAtt = e.Attributes.AddAttribute <IBoostAttribute>();

            while ((candidateTerm = e.Next()) != null)
            {
                float boost = boostAtt.Boost;
                // ignore uncompetitive hits
                if (stQueue.Count >= numSug && boost <= stQueue.Peek().Boost)
                {
                    continue;
                }

                // ignore exact match of the same term
                if (queryTerm.BytesEquals(candidateTerm))
                {
                    continue;
                }

                int df = e.DocFreq;

                // check docFreq if required
                if (df <= docfreq)
                {
                    continue;
                }

                float  score;
                string termAsString;
                if (distance == INTERNAL_LEVENSHTEIN)
                {
                    // delay creating strings until the end
                    termAsString = null;
                    // undo FuzzyTermsEnum's scale factor for a real scaled lev score
                    score = boost / e.ScaleFactor + e.MinSimilarity;
                }
                else
                {
                    UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
                    termAsString = spare.ToString();
                    score        = distance.GetDistance(term.Text(), termAsString);
                }

                if (score < accuracy)
                {
                    continue;
                }

                // add new entry in PQ
                st.Term         = BytesRef.DeepCopyOf(candidateTerm);
                st.Boost        = boost;
                st.Docfreq      = df;
                st.TermAsString = termAsString;
                st.Score        = score;
                stQueue.Offer(st);
                // possibly drop entries from queue
                st = (stQueue.Count > numSug) ? stQueue.Poll() : new ScoreTerm();
                maxBoostAtt.MaxNonCompetitiveBoost = (stQueue.Count >= numSug) ? stQueue.Peek().Boost : float.NegativeInfinity;
            }

            return(stQueue);
        }

コード例 #28

0

ファイルを表示

        /// <summary>
        /// Suggest similar words.
        ///
        /// <para>
        /// Unlike <see cref="SpellChecker"/>, the similarity used to fetch the most
        /// relevant terms is an edit distance, therefore typically a low value
        /// for numSug will work very well.
        /// </para>
        /// </summary>
        /// <param name="term"> Term you want to spell check on </param>
        /// <param name="numSug"> the maximum number of suggested words </param>
        /// <param name="ir"> IndexReader to find terms from </param>
        /// <param name="suggestMode"> specifies when to return suggested words </param>
        /// <param name="accuracy"> return only suggested words that match with this similarity </param>
        /// <returns> sorted list of the suggested words according to the comparer </returns>
        /// <exception cref="System.IO.IOException"> If there is a low-level I/O error. </exception>
        public virtual SuggestWord[] SuggestSimilar(Term term, int numSug, IndexReader ir,
                                                    SuggestMode suggestMode, float accuracy)
        {
            CharsRef spare = new CharsRef();
            string   text  = term.Text();

            if (minQueryLength > 0 && text.CodePointCount(0, text.Length) < minQueryLength)
            {
                return(new SuggestWord[0]);
            }

            if (lowerCaseTerms)
            {
                term = new Term(term.Field, text.ToLower());
            }

            int docfreq = ir.DocFreq(term);

            if (suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && docfreq > 0)
            {
                return(new SuggestWord[0]);
            }

            int maxDoc = ir.MaxDoc;

            if (maxQueryFrequency >= 1f && docfreq > maxQueryFrequency)
            {
                return(new SuggestWord[0]);
            }
            else if (docfreq > (int)Math.Ceiling(maxQueryFrequency * maxDoc))
            {
                return(new SuggestWord[0]);
            }

            if (suggestMode != SuggestMode.SUGGEST_MORE_POPULAR)
            {
                docfreq = 0;
            }

            if (thresholdFrequency >= 1f)
            {
                docfreq = Math.Max(docfreq, (int)thresholdFrequency);
            }
            else if (thresholdFrequency > 0f)
            {
                docfreq = Math.Max(docfreq, (int)(thresholdFrequency * maxDoc) - 1);
            }

            IEnumerable <ScoreTerm> terms = null;
            int inspections = numSug * maxInspections;

            // try ed=1 first, in case we get lucky
            terms = SuggestSimilar(term, inspections, ir, docfreq, 1, accuracy, spare);
            if (maxEdits > 1 && terms.Count() < inspections)
            {
                var moreTerms = new HashSet <ScoreTerm>();
                moreTerms.AddAll(terms);
                moreTerms.AddAll(SuggestSimilar(term, inspections, ir, docfreq, maxEdits, accuracy, spare));
                terms = moreTerms;
            }

            // create the suggestword response, sort it, and trim it to size.

            var suggestions = new SuggestWord[terms.Count()];
            int index       = suggestions.Length - 1;

            foreach (ScoreTerm s in terms)
            {
                SuggestWord suggestion = new SuggestWord();
                if (s.TermAsString == null)
                {
                    UnicodeUtil.UTF8toUTF16(s.Term, spare);
                    s.TermAsString = spare.ToString();
                }
                suggestion.String    = s.TermAsString;
                suggestion.Score     = s.Score;
                suggestion.Freq      = s.Docfreq;
                suggestions[index--] = suggestion;
            }

            ArrayUtil.TimSort(suggestions, Collections.ReverseOrder(comparer));
            if (numSug < suggestions.Length)
            {
                SuggestWord[] trimmed = new SuggestWord[numSug];
                Array.Copy(suggestions, 0, trimmed, 0, numSug);
                suggestions = trimmed;
            }
            return(suggestions);
        }

コード例 #29

0

ファイルを表示

        public override IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, bool onlyMorePopular, int num)
        {
            if (contexts != null)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(num > 0);
            }

            if (onlyMorePopular)
            {
                throw new ArgumentException("this suggester only works with onlyMorePopular=false");
            }

            if (fst == null)
            {
                return(Collections.EmptyList <LookupResult>());
            }

            BytesRef scratch      = new BytesRef(key);
            int      prefixLength = scratch.Length;

            FST.Arc <long?> arc = new FST.Arc <long?>();

            // match the prefix portion exactly
            long?prefixOutput = null;

            try
            {
                prefixOutput = LookupPrefix(scratch, arc);
            }
            catch (IOException bogus)
            {
                throw new Exception(bogus.ToString(), bogus);
            }

            if (prefixOutput == null)
            {
                return(Collections.EmptyList <LookupResult>());
            }

            List <LookupResult> results = new List <LookupResult>(num);
            CharsRef            spare   = new CharsRef();

            if (exactFirst && arc.IsFinal)
            {
                spare.Grow(scratch.Length);
                UnicodeUtil.UTF8toUTF16(scratch, spare);
                results.Add(new LookupResult(spare.ToString(), DecodeWeight(prefixOutput.GetValueOrDefault() + arc.NextFinalOutput.GetValueOrDefault())));
                if (--num == 0)
                {
                    return(results); // that was quick
                }
            }

            // complete top-N
            Util.Fst.Util.TopResults <long?> completions = null;
            try
            {
                completions = Lucene.Net.Util.Fst.Util.ShortestPaths(fst, arc, prefixOutput, weightComparer, num, !exactFirst);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(completions.IsComplete);
                }
            }
            catch (IOException bogus)
            {
                throw new Exception(bogus.ToString(), bogus);
            }

            BytesRef suffix = new BytesRef(8);

            foreach (Util.Fst.Util.Result <long?> completion in completions)
            {
                scratch.Length = prefixLength;
                // append suffix
                Lucene.Net.Util.Fst.Util.ToBytesRef(completion.Input, suffix);
                scratch.Append(suffix);
                spare.Grow(scratch.Length);
                UnicodeUtil.UTF8toUTF16(scratch, spare);
                results.Add(new LookupResult(spare.ToString(), DecodeWeight(completion.Output.GetValueOrDefault())));
            }
            return(results);
        }

コード例 #30

0

ファイルを表示

ファイル: SynonymFilter.cs プロジェクト: eladmarg/lucene.net

        // Interleaves all output tokens onto the futureOutputs:
        private void AddOutput(BytesRef bytes, int matchInputLength, int matchEndOffset)
        {
            bytesReader.Reset(bytes.Bytes, bytes.Offset, bytes.Length);

            int  code     = bytesReader.ReadVInt32();
            bool keepOrig = (code & 0x1) == 0;
            int  count    = (int)((uint)code >> 1);

            //System.out.println("  addOutput count=" + count + " keepOrig=" + keepOrig);
            for (int outputIDX = 0; outputIDX < count; outputIDX++)
            {
                synonyms.Words.Get(bytesReader.ReadVInt32(), scratchBytes);
                //System.out.println("    outIDX=" + outputIDX + " bytes=" + scratchBytes.length);
                UnicodeUtil.UTF8toUTF16(scratchBytes, scratchChars);
                int lastStart  = scratchChars.Offset;
                int chEnd      = lastStart + scratchChars.Length;
                int outputUpto = nextRead;
                for (int chIDX = lastStart; chIDX <= chEnd; chIDX++)
                {
                    if (chIDX == chEnd || scratchChars.Chars[chIDX] == SynonymMap.WORD_SEPARATOR)
                    {
                        int outputLen = chIDX - lastStart;
                        // Caller is not allowed to have empty string in
                        // the output:
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(outputLen > 0, "output contains empty string: {0}", scratchChars);
                        }
                        int endOffset;
                        int posLen;
                        if (chIDX == chEnd && lastStart == scratchChars.Offset)
                        {
                            // This rule had a single output token, so, we set
                            // this output's endOffset to the current
                            // endOffset (ie, endOffset of the last input
                            // token it matched):
                            endOffset = matchEndOffset;
                            posLen    = keepOrig ? matchInputLength : 1;
                        }
                        else
                        {
                            // This rule has more than one output token; we
                            // can't pick any particular endOffset for this
                            // case, so, we inherit the endOffset for the
                            // input token which this output overlaps:
                            endOffset = -1;
                            posLen    = 1;
                        }
                        futureOutputs[outputUpto].Add(scratchChars.Chars, lastStart, outputLen, endOffset, posLen);
                        //System.out.println("      " + new String(scratchChars.chars, lastStart, outputLen) + " outputUpto=" + outputUpto);
                        lastStart = 1 + chIDX;
                        //System.out.println("  slot=" + outputUpto + " keepOrig=" + keepOrig);
                        outputUpto = RollIncr(outputUpto);
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(futureOutputs[outputUpto].posIncr == 1, "outputUpto={0} vs nextWrite={1}", outputUpto, nextWrite);
                        }
                    }
                }
            }

            int upto = nextRead;

            for (int idx = 0; idx < matchInputLength; idx++)
            {
                futureInputs[upto].keepOrig |= keepOrig;
                futureInputs[upto].matched   = true;
                upto = RollIncr(upto);
            }
        }

C# (CSharp) UnicodeUtil.UTF8toUTF16の例