public void Setup() { var dict = new DetokenizationDictionary(Tests.OpenFile("opennlp/tools/tokenize/latin-detokenizer.xml")); var stream = new NameToTokenSampleStream( new DictionaryDetokenizer(dict), new AdNameSampleStream(Tests.OpenFile(sampleFile), Encoding.UTF8, true, false)); samplesFromConvert = new List <TokenSample>(); TokenSample sample; while ((sample = stream.Read()) != null) { samplesFromConvert.Add(sample); } samplesFromStream = new List <TokenSample>(); var sampleStream = new AdTokenSampleStream( new PlainTextByLineStream(Tests.OpenFile(sampleFile)), new DictionaryDetokenizer(dict), true, false); while ((sample = sampleStream.Read()) != null) { samplesFromStream.Add(sample); } }
public void Setup() { var dict = new DetokenizationDictionary(Tests.OpenFile("opennlp/tools/tokenize/latin-detokenizer.xml")); var stream = new NameToTokenSampleStream( new DictionaryDetokenizer(dict), new AdNameSampleStream(Tests.OpenFile(sampleFile), Encoding.UTF8, true, false)); samplesFromConvert = new List<TokenSample>(); TokenSample sample; while ((sample = stream.Read()) != null) { samplesFromConvert.Add(sample); } samplesFromStream = new List<TokenSample>(); var sampleStream = new AdTokenSampleStream( new PlainTextByLineStream(Tests.OpenFile(sampleFile)), new DictionaryDetokenizer(dict), true, false); while ((sample = sampleStream.Read()) != null) { samplesFromStream.Add(sample); } }