internal static IDetokenizer CreateLatinDetokenizer()
 {
     using (var dictIn = Tests.OpenFile("/opennlp/tools/tokenize/latin-detokenizer.xml")) {
         var dict = new DetokenizationDictionary(dictIn);
         return(new DictionaryDetokenizer(dict));
     }
 }
        public void Setup() {
            var dict = new DetokenizationDictionary(Tests.OpenFile("opennlp/tools/tokenize/latin-detokenizer.xml"));
            var stream = new NameToTokenSampleStream(
                new DictionaryDetokenizer(dict),
                new AdNameSampleStream(Tests.OpenFile(sampleFile), Encoding.UTF8, true, false));

            samplesFromConvert = new List<TokenSample>();

            TokenSample sample;

            while ((sample = stream.Read()) != null) {
                samplesFromConvert.Add(sample);               
            }

            samplesFromStream = new List<TokenSample>();

            var sampleStream = new AdTokenSampleStream(
                new PlainTextByLineStream(Tests.OpenFile(sampleFile)), 
                new DictionaryDetokenizer(dict),
                true, false);


            while ((sample = sampleStream.Read()) != null) {
                samplesFromStream.Add(sample);
            }
        }
Example #3
0
        public void Setup()
        {
            var dict   = new DetokenizationDictionary(Tests.OpenFile("opennlp/tools/tokenize/latin-detokenizer.xml"));
            var stream = new NameToTokenSampleStream(
                new DictionaryDetokenizer(dict),
                new AdNameSampleStream(Tests.OpenFile(sampleFile), Encoding.UTF8, true, false));

            samplesFromConvert = new List <TokenSample>();

            TokenSample sample;

            while ((sample = stream.Read()) != null)
            {
                samplesFromConvert.Add(sample);
            }

            samplesFromStream = new List <TokenSample>();

            var sampleStream = new AdTokenSampleStream(
                new PlainTextByLineStream(Tests.OpenFile(sampleFile)),
                new DictionaryDetokenizer(dict),
                true, false);


            while ((sample = sampleStream.Read()) != null)
            {
                samplesFromStream.Add(sample);
            }
        }
Example #4
0
 private static void TestEntries(DetokenizationDictionary dict)
 {
     Assert.AreEqual(DetokenizationDictionary.Operation.RightLeftMatching, dict["\""]);
     Assert.AreEqual(DetokenizationDictionary.Operation.MoveRight, dict["("]);
     Assert.AreEqual(DetokenizationDictionary.Operation.MoveLeft, dict[")"]);
     Assert.AreEqual(DetokenizationDictionary.Operation.MoveBoth, dict["-"]);
 }
 public void Setup() {
     dict = new DetokenizationDictionary {
         {"\"", Operation.RightLeftMatching},
         {"(", Operation.MoveRight},
         {")", Operation.MoveLeft},
         {"-", Operation.MoveBoth}
     };
 }
        /// <summary>
        /// Initializes a new instance of the <see cref="DictionaryDetokenizer"/> class.
        /// </summary>
        /// <param name="dictionaryFile">The dictionary file.</param>
        /// <exception cref="System.ArgumentNullException"><paramref name="dictionaryFile"/></exception>
        /// <exception cref="System.IO.FileNotFoundException">The dictionary file does not exist.</exception>
        public DictionaryDetokenizer(FileInfo dictionaryFile) {
            if (dictionaryFile == null)
                throw new ArgumentNullException("dictionaryFile");

            if (!dictionaryFile.Exists)
                throw new FileNotFoundException("The dictionary file does not exist.", dictionaryFile.FullName);

            dictionary = new DetokenizationDictionary(dictionaryFile.OpenRead());
        }
Example #7
0
 public void Setup()
 {
     dict = new DetokenizationDictionary {
         { "\"", DetokenizationDictionary.Operation.RightLeftMatching },
         { "(", DetokenizationDictionary.Operation.MoveRight },
         { ")", DetokenizationDictionary.Operation.MoveLeft },
         { "-", DetokenizationDictionary.Operation.MoveBoth }
     };
 }
Example #8
0
        public void TestSerialization()
        {
            using (var data = new MemoryStream()) {
                dict.Serialize(data);
                data.Seek(0, SeekOrigin.Begin);

                var parsedDict = new DetokenizationDictionary(data);

                TestEntries(parsedDict);
            }
        }
        public void TestSerialization() {

            using (var data = new MemoryStream()) {
                dict.Serialize(data);
                data.Seek(0, SeekOrigin.Begin);

                var parsedDict = new DetokenizationDictionary(data);

                TestEntries(parsedDict);

            }

        }
Example #10
0
        public static String DeTokenize(String[] tokens, DetokenizationDictionary.Operation operation)
        {
            DetokenizationDictionary.Operation[] operations = new DetokenizationDictionary.Operation[tokens.Length];

            for (int i = 0; i < tokens.Length; i++)
            {
                operations[i] = operation;
            }

            DetokenizationDictionary dictionary = new DetokenizationDictionary(
                tokens, operations);
            DictionaryDetokenizer detokenizer = new DictionaryDetokenizer(
                dictionary);

            return(detokenizer.detokenize(tokens, " "));
        }
        public void TestDetokenizer() {
            var dict = new DetokenizationDictionary {
                {".", DetokenizationDictionary.Operation.MoveLeft},
                {"!", DetokenizationDictionary.Operation.MoveLeft},
                {"(", DetokenizationDictionary.Operation.MoveRight},
                {")", DetokenizationDictionary.Operation.MoveLeft},
                {"\"", DetokenizationDictionary.Operation.RightLeftMatching},
                {"-", DetokenizationDictionary.Operation.MoveBoth}
            };

            var detokenizer = new DictionaryDetokenizer(dict);

            var detokenizeOperations = detokenizer.Detokenize(new[] {"Simple", "test", ".", "co", "-", "worker"});

            Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[0]);
            Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[1]);
            Assert.AreEqual(DetokenizationOperation.MergeToLeft, detokenizeOperations[2]);
            Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[3]);
            Assert.AreEqual(DetokenizationOperation.MergeBoth, detokenizeOperations[4]);
            Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[5]);
        }
        public void TestDetokenizer()
        {
            var dict = new DetokenizationDictionary {
                { ".", DetokenizationDictionary.Operation.MoveLeft },
                { "!", DetokenizationDictionary.Operation.MoveLeft },
                { "(", DetokenizationDictionary.Operation.MoveRight },
                { ")", DetokenizationDictionary.Operation.MoveLeft },
                { "\"", DetokenizationDictionary.Operation.RightLeftMatching },
                { "-", DetokenizationDictionary.Operation.MoveBoth }
            };

            var detokenizer = new DictionaryDetokenizer(dict);

            var detokenizeOperations = detokenizer.Detokenize(new[] { "Simple", "test", ".", "co", "-", "worker" });

            Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[0]);
            Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[1]);
            Assert.AreEqual(DetokenizationOperation.MergeToLeft, detokenizeOperations[2]);
            Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[3]);
            Assert.AreEqual(DetokenizationOperation.MergeBoth, detokenizeOperations[4]);
            Assert.AreEqual(DetokenizationOperation.NoOperation, detokenizeOperations[5]);
        }
 public DictionaryDetokenizer(DetokenizationDictionary dictionary) {
     this.dictionary = dictionary;
 }
 internal static IDetokenizer CreateLatinDetokenizer() {
     using (var dictIn = Tests.OpenFile("/opennlp/tools/tokenize/latin-detokenizer.xml")) {
         var dict = new DetokenizationDictionary(dictIn);
         return new DictionaryDetokenizer(dict);
     }
 }
 private static void TestEntries(DetokenizationDictionary dict) {
     Assert.AreEqual(Operation.RightLeftMatching, dict["\""]);
     Assert.AreEqual(Operation.MoveRight, dict["("]);
     Assert.AreEqual(Operation.MoveLeft, dict[")"]);
     Assert.AreEqual(Operation.MoveBoth, dict["-"]);
 }