/// <summary> /// Initializes a new instance of <see cref="DictionaryEnumerator"/>. /// </summary> public DictionaryEnumerator(Dictionary dictionary, Encoding decoder, bool decodeStems) { this.entriesIter = dictionary.FSA.GetEnumerator(); this.separator = dictionary.Metadata.Separator; this.sequenceEncoder = dictionary.Metadata.SequenceEncoderType.Get(); this.decoder = decoder; this.entry = new WordData(decoder); this.decodeStems = decodeStems; }
public void TestEncodeSuffixOnRandomSequences([ValueSource(typeof(TestFactory), "Values")] ISequenceEncoder coder) { for (int i = 0; i < 10000; i++) { assertRoundtripEncode( coder, randomAsciiLettersOfLengthBetween(0, 500), randomAsciiLettersOfLengthBetween(0, 500)); } }
public void TestEncodeSamples([ValueSource(typeof(TestFactory), "Values")] ISequenceEncoder coder) { assertRoundtripEncode(coder, "", ""); assertRoundtripEncode(coder, "abc", "ab"); assertRoundtripEncode(coder, "abc", "abx"); assertRoundtripEncode(coder, "ab", "abc"); assertRoundtripEncode(coder, "xabc", "abc"); assertRoundtripEncode(coder, "axbc", "abc"); assertRoundtripEncode(coder, "axybc", "abc"); assertRoundtripEncode(coder, "axybc", "abc"); assertRoundtripEncode(coder, "azbc", "abcxy"); assertRoundtripEncode(coder, "Niemcami", "Niemiec"); assertRoundtripEncode(coder, "Niemiec", "Niemcami"); }
private void assertRoundtripEncode(ISequenceEncoder coder, String srcString, String dstString) { ByteBuffer source = ByteBuffer.Wrap(Encoding.UTF8.GetBytes(srcString)); ByteBuffer target = ByteBuffer.Wrap(Encoding.UTF8.GetBytes(dstString)); ByteBuffer encoded = coder.Encode(ByteBuffer.Allocate(Random.Next(30)), source, target); ByteBuffer decoded = coder.Decode(ByteBuffer.Allocate(Random.Next(30)), source, encoded); if (!decoded.Equals(target)) { Console.Out.WriteLine("src: " + BufferUtils.ToString(source, Encoding.UTF8)); Console.Out.WriteLine("dst: " + BufferUtils.ToString(target, Encoding.UTF8)); Console.Out.WriteLine("enc: " + BufferUtils.ToString(encoded, Encoding.UTF8)); Console.Out.WriteLine("dec: " + BufferUtils.ToString(decoded, Encoding.UTF8)); fail("Mismatch."); } }
/// <summary> /// Creates a new object of this class using the given FSA for word lookups /// and encoding for converting characters to bytes. /// </summary> /// <param name="dictionary">The dictionary to use for lookups.</param> /// <exception cref="ArgumentException">If FSA's root node cannot be acquired (dictionary is empty).</exception> public DictionaryLookup(Dictionary dictionary) { this.formsList = new ArrayViewList <WordData>(forms, 0, forms.Length); this.dictionary = dictionary; this.dictionaryMetadata = dictionary.Metadata; this.sequenceEncoder = dictionary.Metadata.SequenceEncoderType.Get(); this.rootNode = dictionary.FSA.GetRootNode(); this.fsa = dictionary.FSA; this.matcher = new FSATraversal(fsa); this.finalStatesIterator = new ByteSequenceEnumerator(fsa, fsa.GetRootNode()); if (dictionaryMetadata == null) { throw new ArgumentNullException(nameof(dictionaryMetadata), "Dictionary metadata must not be null."); } decoder = dictionary.Metadata.Decoder; encoder = dictionary.Metadata.Encoder; separatorChar = dictionary.Metadata.SeparatorAsChar; }