Beispiel #1
0
        public void TestWriteMetadata()
        {
            StringWriter sw = new StringWriter();

            EncoderType encoder  = randomFrom((EncoderType[])Enum.GetValues(typeof(EncoderType)));
            var         encoding = randomFrom(new Encoding[] {
                Encoding.UTF8,
                Encoding.GetEncoding("iso-8859-1"),
                Encoding.ASCII
            });

            //Charset encoding = randomFrom(Arrays.asList(
            //    StandardCharsets.UTF_8,
            //    StandardCharsets.ISO_8859_1,
            //    StandardCharsets.US_ASCII));

            DictionaryMetadata.Builder()
            .Encoding(encoding)
            .Encoder(encoder)
            .Separator('|')
            .Build()
            .Write(sw);

            DictionaryMetadata other =
                DictionaryMetadata.Read(new MemoryStream(Encoding.UTF8.GetBytes(sw.ToString())));

            //Assertions.assertThat(other.getSeparator()).isEqualTo((byte) '|');
            //Assertions.assertThat(other.getDecoder().charset()).isEqualTo(encoding);
            //Assertions.assertThat(other.getEncoder().charset()).isEqualTo(encoding);
            //Assertions.assertThat(other.getSequenceEncoderType()).isEqualTo(encoder);
            Assert.AreEqual((byte)'|', other.Separator);
            Assert.AreEqual(encoding, other.Decoder);
            Assert.AreEqual(encoding, other.Encoder);
            Assert.AreEqual(encoder, other.SequenceEncoderType);
        }
Beispiel #2
0
        public void TestUnicodeSeparator()
        {
            DictionaryMetadata m = DictionaryMetadata.Read(GetType().getResourceAsStream("unicode-separator.info"));

            //Assertions.assertThat(m.getSeparator()).isEqualTo((byte) '\t');
            Assert.AreEqual((byte)'\t', m.Separator);
        }
Beispiel #3
0
        /// <summary>
        /// Attempts to load a dictionary using the path to the FSA file and the
        /// expected metadata extension.
        /// </summary>
        /// <param name="location">The location of the dictionary file (<code>*.dict</code>).</param>
        /// <returns>An instantiated dictionary.</returns>
        /// <exception cref="IOException">If an I/O error occurs.</exception>
        public static Dictionary Read(string location)
        {
            string metadata = DictionaryMetadata.GetExpectedMetadataLocation(location);

            using (var fsaStream = File.OpenRead(location))
                using (var metaDataStream = File.OpenRead(metadata))
                    return(Read(fsaStream, metaDataStream));
        }
        /// <summary>
        /// Creates a new object of this class using the given FSA for word lookups
        /// and encoding for converting characters to bytes.
        /// </summary>
        /// <param name="dictionary">The dictionary to use for lookups.</param>
        /// <exception cref="ArgumentException">If FSA's root node cannot be acquired (dictionary is empty).</exception>
        public DictionaryLookup(Dictionary dictionary)
        {
            this.formsList = new ArrayViewList <WordData>(forms, 0, forms.Length);

            this.dictionary         = dictionary;
            this.dictionaryMetadata = dictionary.Metadata;
            this.sequenceEncoder    = dictionary.Metadata.SequenceEncoderType.Get();
            this.rootNode           = dictionary.FSA.GetRootNode();
            this.fsa                 = dictionary.FSA;
            this.matcher             = new FSATraversal(fsa);
            this.finalStatesIterator = new ByteSequenceEnumerator(fsa, fsa.GetRootNode());

            if (dictionaryMetadata == null)
            {
                throw new ArgumentNullException(nameof(dictionaryMetadata),
                                                "Dictionary metadata must not be null.");
            }

            decoder       = dictionary.Metadata.Decoder;
            encoder       = dictionary.Metadata.Encoder;
            separatorChar = dictionary.Metadata.SeparatorAsChar;
        }
Beispiel #5
0
        /// <summary>
        /// Attempts to load a dictionary using the URL to the FSA file and the
        /// expected metadata extension.
        /// </summary>
        /// <param name="dictURL">The URL pointing to the dictionary file (<c>*.dict</c>).</param>
        /// <returns>An instantiated dictionary.</returns>
        /// <exception cref="IOException">If an I/O error occurs.</exception>
        public static Dictionary Read(Uri dictURL)
        {
            Uri expectedMetadataURL;

            try
            {
                string external = dictURL.AbsoluteUri;
                expectedMetadataURL = new Uri(DictionaryMetadata.GetExpectedMetadataFileName(external));
            }
            catch (UriFormatException e)
            {
                throw new IOException("Couldn't construct relative feature map URL for: " + dictURL, e);
            }

            var fsaRequest = (HttpWebRequest)WebRequest.Create(dictURL);
            var expectedMetadataRequest = (HttpWebRequest)WebRequest.Create(expectedMetadataURL);

            using (var fsaResponse = fsaRequest.GetResponse())
                using (var expectedMetadataResponse = expectedMetadataRequest.GetResponse())
                    using (var fsaStream = fsaResponse.GetResponseStream())
                        using (var metadataStream = expectedMetadataResponse.GetResponseStream())
                            return(Read(fsaStream, metadataStream));
        }
 /// <summary>
 /// Reads both the metadata and fsa into a <see cref="Dictionary"/>.
 /// </summary>
 private Dictionary ReadDictionary(string dictionaryName)
 {
     using (var fsaStream = this.GetType().getResourceAsStream(dictionaryName))
         using (var metadataStream = this.GetType().getResourceAsStream(DictionaryMetadata.GetExpectedMetadataFileName(dictionaryName)))
             return(Dictionary.Read(fsaStream, metadataStream));
 }
Beispiel #7
0
 /// <summary>
 /// It is strongly recommended to use static methods in this class for
 /// reading dictionaries.
 /// </summary>
 /// <param name="fsa">An instantiated <see cref="Morfologik.Fsa.FSA"/> instance.</param>
 /// <param name="metadata">
 /// A map of attributes describing the compression format and
 /// other settings not contained in the FSA automaton. For an
 /// explanation of available attributes and their possible values,
 /// see <see cref="DictionaryMetadata"/>.
 /// </param>
 public Dictionary(FSA fsa, DictionaryMetadata metadata)
 {
     this.FSA      = fsa;
     this.Metadata = metadata;
 }
Beispiel #8
0
 /// <summary>
 /// Attempts to load a dictionary from opened streams of FSA dictionary data
 /// and associated metadata. Input streams are not disposed automatically.
 /// </summary>
 /// <param name="fsaStream">The stream with FSA data.</param>
 /// <param name="metadataStream">The stream with metadata.</param>
 /// <returns>Returns an instantiated <see cref="Dictionary"/>.</returns>
 /// <exception cref="IOException">IOException if an I/O error occurs.</exception>
 public static Dictionary Read(Stream fsaStream, Stream metadataStream)
 {
     return(new Dictionary(FSA.Read(fsaStream), DictionaryMetadata.Read(metadataStream)));
 }