internal static IDetokenizer CreateLatinDetokenizer() { using (var dictIn = Tests.OpenFile("/opennlp/tools/tokenize/latin-detokenizer.xml")) { var dict = new DetokenizationDictionary(dictIn); return(new DictionaryDetokenizer(dict)); } }
public void Setup() { var dict = new DetokenizationDictionary(Tests.OpenFile("opennlp/tools/tokenize/latin-detokenizer.xml")); var stream = new NameToTokenSampleStream( new DictionaryDetokenizer(dict), new AdNameSampleStream(Tests.OpenFile(sampleFile), Encoding.UTF8, true, false)); samplesFromConvert = new List<TokenSample>(); TokenSample sample; while ((sample = stream.Read()) != null) { samplesFromConvert.Add(sample); } samplesFromStream = new List<TokenSample>(); var sampleStream = new AdTokenSampleStream( new PlainTextByLineStream(Tests.OpenFile(sampleFile)), new DictionaryDetokenizer(dict), true, false); while ((sample = sampleStream.Read()) != null) { samplesFromStream.Add(sample); } }
public void Setup() { var dict = new DetokenizationDictionary(Tests.OpenFile("opennlp/tools/tokenize/latin-detokenizer.xml")); var stream = new NameToTokenSampleStream( new DictionaryDetokenizer(dict), new AdNameSampleStream(Tests.OpenFile(sampleFile), Encoding.UTF8, true, false)); samplesFromConvert = new List <TokenSample>(); TokenSample sample; while ((sample = stream.Read()) != null) { samplesFromConvert.Add(sample); } samplesFromStream = new List <TokenSample>(); var sampleStream = new AdTokenSampleStream( new PlainTextByLineStream(Tests.OpenFile(sampleFile)), new DictionaryDetokenizer(dict), true, false); while ((sample = sampleStream.Read()) != null) { samplesFromStream.Add(sample); } }
private static void TestEntries(DetokenizationDictionary dict) { Assert.AreEqual(DetokenizationDictionary.Operation.RightLeftMatching, dict["\""]); Assert.AreEqual(DetokenizationDictionary.Operation.MoveRight, dict["("]); Assert.AreEqual(DetokenizationDictionary.Operation.MoveLeft, dict[")"]); Assert.AreEqual(DetokenizationDictionary.Operation.MoveBoth, dict["-"]); }
public void Setup() { dict = new DetokenizationDictionary { {"\"", Operation.RightLeftMatching}, {"(", Operation.MoveRight}, {")", Operation.MoveLeft}, {"-", Operation.MoveBoth} }; }
/// <summary> /// Initializes a new instance of the <see cref="DictionaryDetokenizer"/> class. /// </summary> /// <param name="dictionaryFile">The dictionary file.</param> /// <exception cref="System.ArgumentNullException"><paramref name="dictionaryFile"/></exception> /// <exception cref="System.IO.FileNotFoundException">The dictionary file does not exist.</exception> public DictionaryDetokenizer(FileInfo dictionaryFile) { if (dictionaryFile == null) throw new ArgumentNullException("dictionaryFile"); if (!dictionaryFile.Exists) throw new FileNotFoundException("The dictionary file does not exist.", dictionaryFile.FullName); dictionary = new DetokenizationDictionary(dictionaryFile.OpenRead()); }
public void Setup() { dict = new DetokenizationDictionary { { "\"", DetokenizationDictionary.Operation.RightLeftMatching }, { "(", DetokenizationDictionary.Operation.MoveRight }, { ")", DetokenizationDictionary.Operation.MoveLeft }, { "-", DetokenizationDictionary.Operation.MoveBoth } }; }
public void TestSerialization() { using (var data = new MemoryStream()) { dict.Serialize(data); data.Seek(0, SeekOrigin.Begin); var parsedDict = new DetokenizationDictionary(data); TestEntries(parsedDict); } }
public static String DeTokenize(String[] tokens, DetokenizationDictionary.Operation operation) { DetokenizationDictionary.Operation[] operations = new DetokenizationDictionary.Operation[tokens.Length]; for (int i = 0; i < tokens.Length; i++) { operations[i] = operation; } DetokenizationDictionary dictionary = new DetokenizationDictionary( tokens, operations); DictionaryDetokenizer detokenizer = new DictionaryDetokenizer( dictionary); return(detokenizer.detokenize(tokens, " ")); }
public void TestDetokenizer() { var dict = new DetokenizationDictionary { {".", DetokenizationDictionary.Operation.MoveLeft}, {"!", DetokenizationDictionary.Operation.MoveLeft}, {"(", DetokenizationDictionary.Operation.MoveRight}, {")", DetokenizationDictionary.Operation.MoveLeft}, {"\"", DetokenizationDictionary.Operation.RightLeftMatching}, {"-", DetokenizationDictionary.Operation.MoveBoth} }; var detokenizer = new DictionaryDetokenizer(dict); var detokenizeOperations = detokenizer.Detokenize(new[] {"Simple", "test", ".", "co", "-", "worker"}); Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[0]); Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[1]); Assert.AreEqual(DetokenizationOperation.MergeToLeft, detokenizeOperations[2]); Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[3]); Assert.AreEqual(DetokenizationOperation.MergeBoth, detokenizeOperations[4]); Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[5]); }
public void TestDetokenizer() { var dict = new DetokenizationDictionary { { ".", DetokenizationDictionary.Operation.MoveLeft }, { "!", DetokenizationDictionary.Operation.MoveLeft }, { "(", DetokenizationDictionary.Operation.MoveRight }, { ")", DetokenizationDictionary.Operation.MoveLeft }, { "\"", DetokenizationDictionary.Operation.RightLeftMatching }, { "-", DetokenizationDictionary.Operation.MoveBoth } }; var detokenizer = new DictionaryDetokenizer(dict); var detokenizeOperations = detokenizer.Detokenize(new[] { "Simple", "test", ".", "co", "-", "worker" }); Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[0]); Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[1]); Assert.AreEqual(DetokenizationOperation.MergeToLeft, detokenizeOperations[2]); Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[3]); Assert.AreEqual(DetokenizationOperation.MergeBoth, detokenizeOperations[4]); Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[5]); }
public DictionaryDetokenizer(DetokenizationDictionary dictionary) { this.dictionary = dictionary; }
internal static IDetokenizer CreateLatinDetokenizer() { using (var dictIn = Tests.OpenFile("/opennlp/tools/tokenize/latin-detokenizer.xml")) { var dict = new DetokenizationDictionary(dictIn); return new DictionaryDetokenizer(dict); } }
private static void TestEntries(DetokenizationDictionary dict) { Assert.AreEqual(Operation.RightLeftMatching, dict["\""]); Assert.AreEqual(Operation.MoveRight, dict["("]); Assert.AreEqual(Operation.MoveLeft, dict[")"]); Assert.AreEqual(Operation.MoveBoth, dict["-"]); }