Ejemplo n.º 1
0
        /// <summary>
        /// Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
        /// and dictionary files.
        /// You have to close the provided InputStreams yourself.
        /// </summary>
        /// <param name="affix"> InputStream for reading the hunspell affix file (won't be closed). </param>
        /// <param name="dictionaries"> InputStream for reading the hunspell dictionary files (won't be closed). </param>
        /// <exception cref="IOException"> Can be thrown while reading from the InputStreams </exception>
        /// <exception cref="ParseException"> Can be thrown if the content of the files does not meet expected formats </exception>
        public Dictionary(Stream affix, IList <Stream> dictionaries, bool ignoreCase)
        {
            this.ignoreCase          = ignoreCase;
            this.needsInputCleaning  = ignoreCase;
            this.needsOutputCleaning = false; // set if we have an OCONV
            flagLookup.Add(new BytesRef());   // no flags -> ord 0

            FileInfo aff = FileSupport.CreateTempFile("affix", "aff", tempDir);

            using (Stream @out = aff.Open(FileMode.Open, FileAccess.ReadWrite))
            {
                // copy contents of affix stream to temp file
                affix.CopyTo(@out);
            }

            // pass 1: get encoding
            string encoding;

            using (Stream aff1 = aff.Open(FileMode.Open, FileAccess.Read))
            {
                encoding = GetDictionaryEncoding(aff1);
            }

            // pass 2: parse affixes
            Encoding decoder = GetSystemEncoding(encoding);

            using (Stream aff2 = aff.Open(FileMode.Open, FileAccess.Read))
            {
                ReadAffixFile(aff2, decoder);
            }

            // read dictionary entries
            IntSequenceOutputs o = IntSequenceOutputs.Singleton;
            Builder <IntsRef>  b = new Builder <IntsRef>(FST.INPUT_TYPE.BYTE4, o);

            ReadDictionaryFiles(dictionaries, decoder, b);
            words   = b.Finish();
            aliases = null; // no longer needed

            try
            {
                aff.Delete();
            }
            catch
            {
                // ignore
            }
        }
Ejemplo n.º 2
0
        private FST <IntsRef> AffixFST(SortedDictionary <string, IList <char?> > affixes)
        {
            IntSequenceOutputs outputs = IntSequenceOutputs.Singleton;
            Builder <IntsRef>  builder = new Builder <IntsRef>(FST.INPUT_TYPE.BYTE4, outputs);

            IntsRef scratch = new IntsRef();

            foreach (KeyValuePair <string, IList <char?> > entry in affixes)
            {
                Lucene.Net.Util.Fst.Util.ToUTF32(entry.Key, scratch);
                IList <char?> entries = entry.Value;
                IntsRef       output  = new IntsRef(entries.Count);
                foreach (char?c in entries)
                {
                    output.Ints[output.Length++] = c.HasValue ? c.Value : 0;
                }
                builder.Add(scratch, output);
            }
            return(builder.Finish());
        }