public void TestWriteMetadata() { StringWriter sw = new StringWriter(); EncoderType encoder = randomFrom((EncoderType[])Enum.GetValues(typeof(EncoderType))); var encoding = randomFrom(new Encoding[] { Encoding.UTF8, Encoding.GetEncoding("iso-8859-1"), Encoding.ASCII }); //Charset encoding = randomFrom(Arrays.asList( // StandardCharsets.UTF_8, // StandardCharsets.ISO_8859_1, // StandardCharsets.US_ASCII)); DictionaryMetadata.Builder() .Encoding(encoding) .Encoder(encoder) .Separator('|') .Build() .Write(sw); DictionaryMetadata other = DictionaryMetadata.Read(new MemoryStream(Encoding.UTF8.GetBytes(sw.ToString()))); //Assertions.assertThat(other.getSeparator()).isEqualTo((byte) '|'); //Assertions.assertThat(other.getDecoder().charset()).isEqualTo(encoding); //Assertions.assertThat(other.getEncoder().charset()).isEqualTo(encoding); //Assertions.assertThat(other.getSequenceEncoderType()).isEqualTo(encoder); Assert.AreEqual((byte)'|', other.Separator); Assert.AreEqual(encoding, other.Decoder); Assert.AreEqual(encoding, other.Encoder); Assert.AreEqual(encoder, other.SequenceEncoderType); }
public void TestUnicodeSeparator() { DictionaryMetadata m = DictionaryMetadata.Read(GetType().getResourceAsStream("unicode-separator.info")); //Assertions.assertThat(m.getSeparator()).isEqualTo((byte) '\t'); Assert.AreEqual((byte)'\t', m.Separator); }
/// <summary> /// Attempts to load a dictionary using the path to the FSA file and the /// expected metadata extension. /// </summary> /// <param name="location">The location of the dictionary file (<code>*.dict</code>).</param> /// <returns>An instantiated dictionary.</returns> /// <exception cref="IOException">If an I/O error occurs.</exception> public static Dictionary Read(string location) { string metadata = DictionaryMetadata.GetExpectedMetadataLocation(location); using (var fsaStream = File.OpenRead(location)) using (var metaDataStream = File.OpenRead(metadata)) return(Read(fsaStream, metaDataStream)); }
/// <summary> /// Creates a new object of this class using the given FSA for word lookups /// and encoding for converting characters to bytes. /// </summary> /// <param name="dictionary">The dictionary to use for lookups.</param> /// <exception cref="ArgumentException">If FSA's root node cannot be acquired (dictionary is empty).</exception> public DictionaryLookup(Dictionary dictionary) { this.formsList = new ArrayViewList <WordData>(forms, 0, forms.Length); this.dictionary = dictionary; this.dictionaryMetadata = dictionary.Metadata; this.sequenceEncoder = dictionary.Metadata.SequenceEncoderType.Get(); this.rootNode = dictionary.FSA.GetRootNode(); this.fsa = dictionary.FSA; this.matcher = new FSATraversal(fsa); this.finalStatesIterator = new ByteSequenceEnumerator(fsa, fsa.GetRootNode()); if (dictionaryMetadata == null) { throw new ArgumentNullException(nameof(dictionaryMetadata), "Dictionary metadata must not be null."); } decoder = dictionary.Metadata.Decoder; encoder = dictionary.Metadata.Encoder; separatorChar = dictionary.Metadata.SeparatorAsChar; }
/// <summary> /// Attempts to load a dictionary using the URL to the FSA file and the /// expected metadata extension. /// </summary> /// <param name="dictURL">The URL pointing to the dictionary file (<c>*.dict</c>).</param> /// <returns>An instantiated dictionary.</returns> /// <exception cref="IOException">If an I/O error occurs.</exception> public static Dictionary Read(Uri dictURL) { Uri expectedMetadataURL; try { string external = dictURL.AbsoluteUri; expectedMetadataURL = new Uri(DictionaryMetadata.GetExpectedMetadataFileName(external)); } catch (UriFormatException e) { throw new IOException("Couldn't construct relative feature map URL for: " + dictURL, e); } var fsaRequest = (HttpWebRequest)WebRequest.Create(dictURL); var expectedMetadataRequest = (HttpWebRequest)WebRequest.Create(expectedMetadataURL); using (var fsaResponse = fsaRequest.GetResponse()) using (var expectedMetadataResponse = expectedMetadataRequest.GetResponse()) using (var fsaStream = fsaResponse.GetResponseStream()) using (var metadataStream = expectedMetadataResponse.GetResponseStream()) return(Read(fsaStream, metadataStream)); }
/// <summary> /// Reads both the metadata and fsa into a <see cref="Dictionary"/>. /// </summary> private Dictionary ReadDictionary(string dictionaryName) { using (var fsaStream = this.GetType().getResourceAsStream(dictionaryName)) using (var metadataStream = this.GetType().getResourceAsStream(DictionaryMetadata.GetExpectedMetadataFileName(dictionaryName))) return(Dictionary.Read(fsaStream, metadataStream)); }
/// <summary> /// It is strongly recommended to use static methods in this class for /// reading dictionaries. /// </summary> /// <param name="fsa">An instantiated <see cref="Morfologik.Fsa.FSA"/> instance.</param> /// <param name="metadata"> /// A map of attributes describing the compression format and /// other settings not contained in the FSA automaton. For an /// explanation of available attributes and their possible values, /// see <see cref="DictionaryMetadata"/>. /// </param> public Dictionary(FSA fsa, DictionaryMetadata metadata) { this.FSA = fsa; this.Metadata = metadata; }
/// <summary> /// Attempts to load a dictionary from opened streams of FSA dictionary data /// and associated metadata. Input streams are not disposed automatically. /// </summary> /// <param name="fsaStream">The stream with FSA data.</param> /// <param name="metadataStream">The stream with metadata.</param> /// <returns>Returns an instantiated <see cref="Dictionary"/>.</returns> /// <exception cref="IOException">IOException if an I/O error occurs.</exception> public static Dictionary Read(Stream fsaStream, Stream metadataStream) { return(new Dictionary(FSA.Read(fsaStream), DictionaryMetadata.Read(metadataStream))); }