private void Init(Stream stream, string cachePath) { var entrySerializer = Serializer.ForKeyValuePair( Serializer.ForStringAsUTF8(), Serializer.ForReadOnlyList( Serializer.ForComposite() .With(Serializer.ForEnum <PartOfSpeech>()) .With(Serializer.ForStringAsUTF8()) .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUTF8())) .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUTF8())) .Create() .Mapping( raw => new WordnetDictionaryMeaning( (PartOfSpeech)raw[0], (string)raw[1], (IReadOnlyList <string>)raw[2], (IReadOnlyList <string>)raw[3]), meaning => new object[] { meaning.PartOfSpeech, meaning.Word, meaning.Definition, meaning.Examples }))); db = TinyIndex.Database.CreateOrOpen(cachePath, Version) .AddIndirectArray(entrySerializer, db => CreateEntries(stream), x => x.Key, StringComparer.Ordinal) .Build(); entries = db.Get <KeyValuePair <string, IReadOnlyList <WordnetDictionaryMeaning> > >(0, new LruCache <long, KeyValuePair <string, IReadOnlyList <WordnetDictionaryMeaning> > >(64)); }
public JGramLookup(string jgramPath, string jgramLookupPath, string cachePath) { var entrySerializer = Serializer.ForComposite() .With(Serializer.ForLong()) .With(Serializer.ForStringAsUtf8()) .With(Serializer.ForStringAsUtf8()) .With(Serializer.ForStringAsUtf8()) .With(Serializer.ForStringAsUtf8()) .With(Serializer.ForStringAsUtf8()) .Create() .Mapping(raw => new JGram.Entry( (long)raw[0], EmptyToNull((string)raw[1]), EmptyToNull((string)raw[2]), EmptyToNull((string)raw[3]), EmptyToNull((string)raw[4]), EmptyToNull((string)raw[5])), obj => new object[] { obj.Id, NullToEmpty(obj.Key), NullToEmpty(obj.Reading), NullToEmpty(obj.Romaji), NullToEmpty(obj.Translation), NullToEmpty(obj.Example) }); var indexSerializer = Serializer.ForKeyValuePair( Serializer.ForStringAsUtf8(), Serializer.ForLong()); db = Database.CreateOrOpen(cachePath, Version) .AddIndirectArray(entrySerializer, db => JGram.Parse(jgramPath, Encoding.UTF8), e => e.Id) .AddIndirectArray(indexSerializer, db => LoadIndexEntries(jgramLookupPath), kvp => kvp.Key) .Build(); entries = db.Get <JGram.Entry>(0); index = db.Get <KeyValuePair <string, long> >(1); string NullToEmpty(string s) { return(s ?? ""); } string EmptyToNull(string s) { return(s == "" ? null : s); } }
private JMDictLookup Init(Stream stream, string cache) { var priorityTagSerializer = Serializer.ForStringAsUtf8().Mapping( raw => PriorityTag.FromString(raw), pTag => pTag.Map(p => p.ToString()).ValueOr("")); var crossReferenceSerializer = Serializer.ForStringAsUtf8().Mapping( raw => EdictCrossReference.Parse(raw), obj => obj.ToString()); var loanSourceSerializer = Serializer.ForComposite() .With(Serializer.ForStringAsUtf8()) .With(SerializerExt.ForBool()) .With(Serializer.ForEnum <EdictLoanSourceType>()) .With(SerializerExt.ForOption(Serializer.ForStringAsUtf8())) .Create() .Mapping( raw => new EdictLoanSource( (string)raw[0], (bool)raw[1], (EdictLoanSourceType)raw[2], (Option <string>)raw[3]), obj => new object[] { obj.SourceLanguage, obj.Wasei, obj.SourceType, obj.LoanWord }); var kanjiSerializer = Serializer.ForComposite() .With(Serializer.ForStringAsUtf8()) .With(SerializerExt.ForBool()) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictReadingInformation>())) .With(Serializer.ForReadOnlyCollection(priorityTagSerializer)) .Create() .Mapping( raw => new JMDictReading( (string)raw[0], (bool)raw[1], (IReadOnlyCollection <string>)raw[2], (IReadOnlyCollection <EdictReadingInformation>)raw[3], ((IReadOnlyCollection <Option <PriorityTag> >)raw[4]).Values().ToList()), obj => new object[] { obj.Reading, obj.NotATrueReading, obj.ValidReadingFor, obj.ReadingInformation, obj.PriorityInfo.Select(p => p.Some()).ToList() }); var readingSerializer = Serializer.ForComposite() .With(Serializer.ForStringAsUtf8()) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictKanjiInformation>())) .With(Serializer.ForReadOnlyCollection(priorityTagSerializer)) .Create() .Mapping( raw => new JMDictKanji( (string)raw[0], (IReadOnlyCollection <EdictKanjiInformation>)raw[1], ((IReadOnlyCollection <Option <PriorityTag> >)raw[2]).Values().ToList()), obj => new object[] { obj.Kanji, obj.Informational, obj.PriorityInfo.Select(p => p.Some()).ToList() }); var senseSerializer = Serializer.ForComposite() .With(SerializerExt.ForOption(Serializer.ForEnum <EdictPartOfSpeech>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictPartOfSpeech>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictDialect>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictField>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictMisc>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(loanSourceSerializer)) .With(Serializer.ForReadOnlyCollection(crossReferenceSerializer)) .With(Serializer.ForReadOnlyCollection(crossReferenceSerializer)) .Create() .Mapping( raw => new JMDictSense( (Option <EdictPartOfSpeech>)raw[0], (IReadOnlyCollection <EdictPartOfSpeech>)raw[1], (IReadOnlyCollection <EdictDialect>)raw[2], (IReadOnlyCollection <string>)raw[3], (IReadOnlyCollection <string>)raw[4], (IReadOnlyCollection <EdictField>)raw[5], (IReadOnlyCollection <EdictMisc>)raw[6], (IReadOnlyCollection <string>)raw[7], (IReadOnlyCollection <string>)raw[8], (IReadOnlyCollection <EdictLoanSource>)raw[9], (IReadOnlyCollection <EdictCrossReference>)raw[10], (IReadOnlyCollection <EdictCrossReference>)raw[11]), obj => new object[] { obj.Type, obj.PartOfSpeechInfo, obj.DialectalInfo, obj.Glosses, obj.Informational, obj.FieldData, obj.Misc, obj.RestrictedToKanji, obj.RestrictedToReading, obj.LoanSources, obj.CrossReferences, obj.Antonyms }); var entrySerializer = TinyIndex.Serializer.ForComposite() .With(Serializer.ForLong()) .With(Serializer.ForReadOnlyCollection(kanjiSerializer)) .With(Serializer.ForReadOnlyCollection(readingSerializer)) .With(Serializer.ForReadOnlyCollection(senseSerializer)) .Create() .Mapping( raw => new JMDictEntry( (long)raw[0], (IReadOnlyCollection <JMDictReading>)raw[1], (IReadOnlyCollection <JMDictKanji>)raw[2], (IReadOnlyCollection <JMDictSense>)raw[3]), obj => new object[] { obj.SequenceNumber, obj.ReadingEntries, obj.KanjiEntries, obj.Senses }); using (var jmdictParser = JMDictParser.Create(stream)) { db = TinyIndex.Database.CreateOrOpen(cache, Version) .AddIndirectArray(entrySerializer, db => jmdictParser.ReadRemainingToEnd(), x => x.SequenceNumber) .AddIndirectArray( TinyIndex.Serializer.ForKeyValuePair( TinyIndex.Serializer.ForStringAsUtf8(), TinyIndex.Serializer.ForReadOnlyList(TinyIndex.Serializer.ForLong())), db => { IEnumerable <KeyValuePair <long, string> > It(IEnumerable <JMDictEntry> entries) { foreach (var e in entries) { foreach (var k in e.KanjiEntries) { yield return(new KeyValuePair <long, string>(e.SequenceNumber, k.Kanji)); } foreach (var r in e.ReadingEntries) { yield return(new KeyValuePair <long, string>(e.SequenceNumber, r.Reading)); } } } return(It(db.Get <JMDictEntry>(0) .LinearScan()) .GroupBy(kvp => kvp.Value, kvp => kvp.Key) .Select(x => new KeyValuePair <string, IReadOnlyList <long> >(x.Key, x.ToList()))); }, x => x.Key, StringComparer.Ordinal) .AddIndirectArray( Serializer.ForKeyValuePair(Serializer.ForStringAsUtf8(), Serializer.ForStringAsUtf8()), db => jmdictParser.FriendlyNames, x => x.Key, StringComparer.Ordinal) .Build(); entries = db.Get <JMDictEntry>(0, new LruCache <long, JMDictEntry>(128)); kvps = db.Get <KeyValuePair <string, IReadOnlyList <long> > >(1, new LruCache <long, KeyValuePair <string, IReadOnlyList <long> > >(128)); friendlyNames = db.Get(2, new LruCache <long, KeyValuePair <string, string> >(256)); } return(this); }
private JMNedictLookup Init(Stream stream, string cache) { var entrySerializer = Serializer.ForComposite() .With(Serializer.ForLong()) .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyList(Serializer.ForComposite() .With(Serializer.ForReadOnlyList(Serializer.ForEnum <JMNedictType>())) .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUtf8())) .Create() .Mapping( raw => new JnedictTranslation( (IEnumerable <JMNedictType>)raw[0], (IEnumerable <string>)raw[1]), obj => new object[] { obj.Type, obj.Translation }))) .Create() .Mapping( raw => new JnedictEntry( (long)raw[0], (IEnumerable <string>)raw[1], (IEnumerable <string>)raw[2], (IEnumerable <JnedictTranslation>)raw[3]), obj => new object[] { obj.SequenceNumber, obj.Kanji, obj.Reading, obj.Translation }); using (var parser = JMNedictParser.Create(stream)) { database = Database.CreateOrOpen(cache, Version) .AddIndirectArray( entrySerializer, db => parser.ReadRemainingToEnd()) .AddIndirectArray( Serializer.ForKeyValuePair(Serializer.ForStringAsUtf8(), Serializer.ForReadOnlyList(Serializer.ForLong())), db => { IEnumerable <KeyValuePair <long, string> > It(IEnumerable <JnedictEntry> entries) { foreach (var e in entries) { foreach (var r in e.Reading) { yield return(new KeyValuePair <long, string>(e.SequenceNumber, r)); } foreach (var k in e.Kanji) { yield return(new KeyValuePair <long, string>(e.SequenceNumber, k)); } } } return(It(db.Get <JnedictEntry>(0).LinearScan()) .GroupBy(kvp => kvp.Value, kvp => kvp.Key) .Select(x => new KeyValuePair <string, IReadOnlyList <long> >(x.Key, x.ToList()))); }, x => x.Key, StringComparer.Ordinal) .Build(); } entries = database.Get <JnedictEntry>(0, new LruCache <long, JnedictEntry>(64)); kvps = database.Get <KeyValuePair <string, IReadOnlyList <long> > >(1, new LruCache <long, KeyValuePair <string, IReadOnlyList <long> > >(64)); return(this); }
private void Init(Lazy <IZipFile> zip, string cachePath) { var headerSerializer = Serializer.ForComposite() .With(Serializer.ForStringAsUtf8()) .With(Serializer.ForInt()) .With(Serializer.ForStringAsUtf8()) .With(Serializer.ForInt().Mapping(raw => raw != 0, b => b ? 1 : 0)) .Create() .Mapping(raw => new YomichanDictionaryVersion() { Title = (string)raw[0], Format = (int)raw[1], Revision = (string)raw[2], Sequenced = (bool)raw[3] }, obj => new object[] { obj.Title, obj.Format, obj.Revision, obj.Sequenced }); var entrySerializer = Serializer.ForComposite() .With(Serializer.ForStringAsUtf8()) .With(Serializer.ForStringAsUtf8()) .With(Serializer.ForStringAsUtf8()) .With(Serializer.ForStringAsUtf8()) .With(Serializer.ForInt()) .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUtf8())) .With(Serializer.ForInt()) .With(Serializer.ForStringAsUtf8()) .Create() .Mapping( raw => new YomichanDictionaryEntry { Expression = (string)raw[0], Reading = (string)raw[1], DefinitionTags = (string)raw[2], Rules = (string)raw[3], Score = (int)raw[4], Glossary = (IReadOnlyList <string>)raw[5], Sequence = (int)raw[6], TermTags = (string)raw[7] }, obj => new object[] { obj.Expression, obj.Reading, obj.DefinitionTags, obj.Rules, obj.Score, obj.Glossary, obj.Sequence, obj.TermTags }); var indexSerializer = Serializer.ForKeyValuePair( Serializer.ForStringAsUtf8(), Serializer.ForReadOnlyList(Serializer.ForLong())); var lazyHeaderInfo = new Lazy <(YomichanDictionaryVersion version, IEnumerable <string> dataFilePaths)>(() => GetHeaderInfo(zip.Value)); var lazyRoot = new Lazy <IEnumerable <YomichanDictionaryEntry> >(() => ParseEntriesFromZip(lazyHeaderInfo.Value.dataFilePaths, zip.Value)); db = Database.CreateOrOpen(cachePath, Version) .AddIndirectArray(entrySerializer, db => lazyRoot.Value) .AddIndirectArray(indexSerializer, db => Index(db.Get <YomichanDictionaryEntry>(0).LinearScan()), kvp => kvp.Key, StringComparer.Ordinal) .AddIndirectArray(headerSerializer, db => EnumerableExt.OfSingle(lazyHeaderInfo.Value.version)) .Build(); entries = db.Get <YomichanDictionaryEntry>(0, new LruCache <long, YomichanDictionaryEntry>(16)); index = db.Get <KeyValuePair <string, IReadOnlyList <long> > >(1, new LruCache <long, KeyValuePair <string, IReadOnlyList <long> > >(32)); this.version = db.Get <YomichanDictionaryVersion>(2).LinearScan().First(); }