/// <summary> /// Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix /// and dictionary files. /// You have to close the provided InputStreams yourself. /// </summary> /// <param name="affix"> InputStream for reading the hunspell affix file (won't be closed). </param> /// <param name="dictionaries"> InputStream for reading the hunspell dictionary files (won't be closed). </param> /// <exception cref="IOException"> Can be thrown while reading from the InputStreams </exception> /// <exception cref="ParseException"> Can be thrown if the content of the files does not meet expected formats </exception> public Dictionary(Stream affix, IList <Stream> dictionaries, bool ignoreCase) { this.ignoreCase = ignoreCase; this.needsInputCleaning = ignoreCase; this.needsOutputCleaning = false; // set if we have an OCONV flagLookup.Add(new BytesRef()); // no flags -> ord 0 FileInfo aff = FileSupport.CreateTempFile("affix", "aff", tempDir); using (Stream @out = aff.Open(FileMode.Open, FileAccess.ReadWrite)) { // copy contents of affix stream to temp file affix.CopyTo(@out); } // pass 1: get encoding string encoding; using (Stream aff1 = aff.Open(FileMode.Open, FileAccess.Read)) { encoding = GetDictionaryEncoding(aff1); } // pass 2: parse affixes Encoding decoder = GetSystemEncoding(encoding); using (Stream aff2 = aff.Open(FileMode.Open, FileAccess.Read)) { ReadAffixFile(aff2, decoder); } // read dictionary entries IntSequenceOutputs o = IntSequenceOutputs.Singleton; Builder <IntsRef> b = new Builder <IntsRef>(FST.INPUT_TYPE.BYTE4, o); ReadDictionaryFiles(dictionaries, decoder, b); words = b.Finish(); aliases = null; // no longer needed try { aff.Delete(); } catch { // ignore } }
private FST <IntsRef> AffixFST(SortedDictionary <string, IList <char?> > affixes) { IntSequenceOutputs outputs = IntSequenceOutputs.Singleton; Builder <IntsRef> builder = new Builder <IntsRef>(FST.INPUT_TYPE.BYTE4, outputs); IntsRef scratch = new IntsRef(); foreach (KeyValuePair <string, IList <char?> > entry in affixes) { Lucene.Net.Util.Fst.Util.ToUTF32(entry.Key, scratch); IList <char?> entries = entry.Value; IntsRef output = new IntsRef(entries.Count); foreach (char?c in entries) { output.Ints[output.Length++] = c.HasValue ? c.Value : 0; } builder.Add(scratch, output); } return(builder.Finish()); }