/// <summary> /// Return tag data decoded to a character sequence or /// <c>null</c> if no associated tag data exists. /// </summary> public ICharSequence GetTag() { //decoder.GetChars(tagBuffer, 0, tagBuffer.Length, tagCharSequence, 0); //return tagCharSequence.Length == 0 ? null : new string(tagCharSequence); tagCharSequence = BufferUtils.BytesToChars(decoder, tagBuffer, tagCharSequence); return(tagCharSequence.Length == 0 ? null : tagCharSequence.ToString().AsCharSequence()); }
/// <summary> /// Return stem data decoded to a character sequence or /// <c>null</c> if no associated stem data exists. /// </summary> public ICharSequence GetStem() { //decoder.GetChars(stemBuffer, 0, stemBuffer.Length, stemCharSequence, 0); //return stemCharSequence.Length == 0 ? null : new string(stemCharSequence); stemCharSequence = BufferUtils.BytesToChars(decoder, stemBuffer, stemCharSequence); return(stemCharSequence.Length == 0 ? null : stemCharSequence.ToString().AsCharSequence()); }
private WordData Next() { ByteBuffer entryBuffer = entriesIter.Current; /* * Entries are typically: inflected<SEP>codedBase<SEP>tag so try to find this split. */ byte[] ba = entryBuffer.Array; int bbSize = entryBuffer.Remaining; int sepPos; for (sepPos = 0; sepPos < bbSize; sepPos++) { if (ba[sepPos] == separator) { break; } } if (sepPos == bbSize) { throw new Exception("Invalid dictionary " + "entry format (missing separator)."); } inflectedBuffer = BufferUtils.ClearAndEnsureCapacity(inflectedBuffer, sepPos); //Array.Resize(ref inflectedBuffer, sepPos); //Array.Copy(ba, 0, inflectedBuffer, 0, sepPos); inflectedBuffer.Put(ba, 0, sepPos); inflectedBuffer.Flip(); inflectedCharBuffer = BufferUtils.BytesToChars(decoder, inflectedBuffer, inflectedCharBuffer); entry.Update(inflectedBuffer, inflectedCharBuffer); temp = BufferUtils.ClearAndEnsureCapacity(temp, bbSize - sepPos); //Array.Resize(ref temp, bbSize - sepPos); sepPos++; //Array.Copy(ba, 0, temp, sepPos, bbSize - sepPos); temp.Put(ba, sepPos, bbSize - sepPos); temp.Flip(); ba = temp.Array; bbSize = temp.Remaining; /* * Find the next separator byte's position splitting word form and tag. */ #pragma warning disable 612, 618 Debug.Assert(sequenceEncoder.PrefixBytes <= bbSize, sequenceEncoder.GetType() + " >? " + bbSize); sepPos = sequenceEncoder.PrefixBytes; #pragma warning restore 612, 618 for (; sepPos < bbSize; sepPos++) { if (ba[sepPos] == separator) { break; } } /* * Decode the stem into stem buffer. */ if (decodeStems) { entry.stemBuffer = sequenceEncoder.Decode(entry.stemBuffer, inflectedBuffer, ByteBuffer.Wrap(ba, 0, sepPos)); } else { entry.stemBuffer = BufferUtils.ClearAndEnsureCapacity(entry.stemBuffer, sepPos); entry.stemBuffer.Put(ba, 0, sepPos); entry.stemBuffer.Flip(); } // Skip separator character, if present. if (sepPos + 1 <= bbSize) { sepPos++; } /* * Decode the tag data. */ entry.tagBuffer = BufferUtils.ClearAndEnsureCapacity(entry.tagBuffer, bbSize - sepPos); //Array.Resize(ref entry.tagBuffer, bbSize - sepPos); entry.tagBuffer.Put(ba, sepPos, bbSize - sepPos); entry.tagBuffer.Flip(); return(entry); }