private void Init(Stream stream, string cachePath) { var entrySerializer = Serializer.ForKeyValuePair( Serializer.ForStringAsUTF8(), Serializer.ForReadOnlyList( Serializer.ForComposite() .With(Serializer.ForEnum <PartOfSpeech>()) .With(Serializer.ForStringAsUTF8()) .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUTF8())) .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUTF8())) .Create() .Mapping( raw => new WordnetDictionaryMeaning( (PartOfSpeech)raw[0], (string)raw[1], (IReadOnlyList <string>)raw[2], (IReadOnlyList <string>)raw[3]), meaning => new object[] { meaning.PartOfSpeech, meaning.Word, meaning.Definition, meaning.Examples }))); db = TinyIndex.Database.CreateOrOpen(cachePath, Version) .AddIndirectArray(entrySerializer, db => CreateEntries(stream), x => x.Key, StringComparer.Ordinal) .Build(); entries = db.Get <KeyValuePair <string, IReadOnlyList <WordnetDictionaryMeaning> > >(0, new LruCache <long, KeyValuePair <string, IReadOnlyList <WordnetDictionaryMeaning> > >(64)); }
private JMDictLookup Init(Stream stream, string cache) { var priorityTagSerializer = Serializer.ForStringAsUtf8().Mapping( raw => PriorityTag.FromString(raw), pTag => pTag.Map(p => p.ToString()).ValueOr("")); var crossReferenceSerializer = Serializer.ForStringAsUtf8().Mapping( raw => EdictCrossReference.Parse(raw), obj => obj.ToString()); var loanSourceSerializer = Serializer.ForComposite() .With(Serializer.ForStringAsUtf8()) .With(SerializerExt.ForBool()) .With(Serializer.ForEnum <EdictLoanSourceType>()) .With(SerializerExt.ForOption(Serializer.ForStringAsUtf8())) .Create() .Mapping( raw => new EdictLoanSource( (string)raw[0], (bool)raw[1], (EdictLoanSourceType)raw[2], (Option <string>)raw[3]), obj => new object[] { obj.SourceLanguage, obj.Wasei, obj.SourceType, obj.LoanWord }); var kanjiSerializer = Serializer.ForComposite() .With(Serializer.ForStringAsUtf8()) .With(SerializerExt.ForBool()) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictReadingInformation>())) .With(Serializer.ForReadOnlyCollection(priorityTagSerializer)) .Create() .Mapping( raw => new JMDictReading( (string)raw[0], (bool)raw[1], (IReadOnlyCollection <string>)raw[2], (IReadOnlyCollection <EdictReadingInformation>)raw[3], ((IReadOnlyCollection <Option <PriorityTag> >)raw[4]).Values().ToList()), obj => new object[] { obj.Reading, obj.NotATrueReading, obj.ValidReadingFor, obj.ReadingInformation, obj.PriorityInfo.Select(p => p.Some()).ToList() }); var readingSerializer = Serializer.ForComposite() .With(Serializer.ForStringAsUtf8()) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictKanjiInformation>())) .With(Serializer.ForReadOnlyCollection(priorityTagSerializer)) .Create() .Mapping( raw => new JMDictKanji( (string)raw[0], (IReadOnlyCollection <EdictKanjiInformation>)raw[1], ((IReadOnlyCollection <Option <PriorityTag> >)raw[2]).Values().ToList()), obj => new object[] { obj.Kanji, obj.Informational, obj.PriorityInfo.Select(p => p.Some()).ToList() }); var senseSerializer = Serializer.ForComposite() .With(SerializerExt.ForOption(Serializer.ForEnum <EdictPartOfSpeech>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictPartOfSpeech>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictDialect>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictField>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictMisc>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(loanSourceSerializer)) .With(Serializer.ForReadOnlyCollection(crossReferenceSerializer)) .With(Serializer.ForReadOnlyCollection(crossReferenceSerializer)) .Create() .Mapping( raw => new JMDictSense( (Option <EdictPartOfSpeech>)raw[0], (IReadOnlyCollection <EdictPartOfSpeech>)raw[1], (IReadOnlyCollection <EdictDialect>)raw[2], (IReadOnlyCollection <string>)raw[3], (IReadOnlyCollection <string>)raw[4], (IReadOnlyCollection <EdictField>)raw[5], (IReadOnlyCollection <EdictMisc>)raw[6], (IReadOnlyCollection <string>)raw[7], (IReadOnlyCollection <string>)raw[8], (IReadOnlyCollection <EdictLoanSource>)raw[9], (IReadOnlyCollection <EdictCrossReference>)raw[10], (IReadOnlyCollection <EdictCrossReference>)raw[11]), obj => new object[] { obj.Type, obj.PartOfSpeechInfo, obj.DialectalInfo, obj.Glosses, obj.Informational, obj.FieldData, obj.Misc, obj.RestrictedToKanji, obj.RestrictedToReading, obj.LoanSources, obj.CrossReferences, obj.Antonyms }); var entrySerializer = TinyIndex.Serializer.ForComposite() .With(Serializer.ForLong()) .With(Serializer.ForReadOnlyCollection(kanjiSerializer)) .With(Serializer.ForReadOnlyCollection(readingSerializer)) .With(Serializer.ForReadOnlyCollection(senseSerializer)) .Create() .Mapping( raw => new JMDictEntry( (long)raw[0], (IReadOnlyCollection <JMDictReading>)raw[1], (IReadOnlyCollection <JMDictKanji>)raw[2], (IReadOnlyCollection <JMDictSense>)raw[3]), obj => new object[] { obj.SequenceNumber, obj.ReadingEntries, obj.KanjiEntries, obj.Senses }); using (var jmdictParser = JMDictParser.Create(stream)) { db = TinyIndex.Database.CreateOrOpen(cache, Version) .AddIndirectArray(entrySerializer, db => jmdictParser.ReadRemainingToEnd(), x => x.SequenceNumber) .AddIndirectArray( TinyIndex.Serializer.ForKeyValuePair( TinyIndex.Serializer.ForStringAsUtf8(), TinyIndex.Serializer.ForReadOnlyList(TinyIndex.Serializer.ForLong())), db => { IEnumerable <KeyValuePair <long, string> > It(IEnumerable <JMDictEntry> entries) { foreach (var e in entries) { foreach (var k in e.KanjiEntries) { yield return(new KeyValuePair <long, string>(e.SequenceNumber, k.Kanji)); } foreach (var r in e.ReadingEntries) { yield return(new KeyValuePair <long, string>(e.SequenceNumber, r.Reading)); } } } return(It(db.Get <JMDictEntry>(0) .LinearScan()) .GroupBy(kvp => kvp.Value, kvp => kvp.Key) .Select(x => new KeyValuePair <string, IReadOnlyList <long> >(x.Key, x.ToList()))); }, x => x.Key, StringComparer.Ordinal) .AddIndirectArray( Serializer.ForKeyValuePair(Serializer.ForStringAsUtf8(), Serializer.ForStringAsUtf8()), db => jmdictParser.FriendlyNames, x => x.Key, StringComparer.Ordinal) .Build(); entries = db.Get <JMDictEntry>(0, new LruCache <long, JMDictEntry>(128)); kvps = db.Get <KeyValuePair <string, IReadOnlyList <long> > >(1, new LruCache <long, KeyValuePair <string, IReadOnlyList <long> > >(128)); friendlyNames = db.Get(2, new LruCache <long, KeyValuePair <string, string> >(256)); } return(this); }