Example #1
0
        private void Init(Stream stream, string cachePath)
        {
            var entrySerializer = Serializer.ForKeyValuePair(
                Serializer.ForStringAsUTF8(),
                Serializer.ForReadOnlyList(
                    Serializer.ForComposite()
                    .With(Serializer.ForEnum <PartOfSpeech>())
                    .With(Serializer.ForStringAsUTF8())
                    .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUTF8()))
                    .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUTF8()))
                    .Create()
                    .Mapping(
                        raw => new WordnetDictionaryMeaning(
                            (PartOfSpeech)raw[0],
                            (string)raw[1],
                            (IReadOnlyList <string>)raw[2],
                            (IReadOnlyList <string>)raw[3]),
                        meaning => new object[]
            {
                meaning.PartOfSpeech,
                meaning.Word,
                meaning.Definition,
                meaning.Examples
            })));

            db = TinyIndex.Database.CreateOrOpen(cachePath, Version)
                 .AddIndirectArray(entrySerializer, db => CreateEntries(stream),
                                   x => x.Key,
                                   StringComparer.Ordinal)
                 .Build();

            entries = db.Get <KeyValuePair <string, IReadOnlyList <WordnetDictionaryMeaning> > >(0,
                                                                                                 new LruCache <long, KeyValuePair <string, IReadOnlyList <WordnetDictionaryMeaning> > >(64));
        }
        public JGramLookup(string jgramPath, string jgramLookupPath, string cachePath)
        {
            var entrySerializer = Serializer.ForComposite()
                                  .With(Serializer.ForLong())
                                  .With(Serializer.ForStringAsUtf8())
                                  .With(Serializer.ForStringAsUtf8())
                                  .With(Serializer.ForStringAsUtf8())
                                  .With(Serializer.ForStringAsUtf8())
                                  .With(Serializer.ForStringAsUtf8())
                                  .Create()
                                  .Mapping(raw => new JGram.Entry(
                                               (long)raw[0],
                                               EmptyToNull((string)raw[1]),
                                               EmptyToNull((string)raw[2]),
                                               EmptyToNull((string)raw[3]),
                                               EmptyToNull((string)raw[4]),
                                               EmptyToNull((string)raw[5])),
                                           obj => new object[]
            {
                obj.Id,
                NullToEmpty(obj.Key),
                NullToEmpty(obj.Reading),
                NullToEmpty(obj.Romaji),
                NullToEmpty(obj.Translation),
                NullToEmpty(obj.Example)
            });

            var indexSerializer = Serializer.ForKeyValuePair(
                Serializer.ForStringAsUtf8(),
                Serializer.ForLong());

            db = Database.CreateOrOpen(cachePath, Version)
                 .AddIndirectArray(entrySerializer, db => JGram.Parse(jgramPath, Encoding.UTF8), e => e.Id)
                 .AddIndirectArray(indexSerializer, db => LoadIndexEntries(jgramLookupPath), kvp => kvp.Key)
                 .Build();

            entries = db.Get <JGram.Entry>(0);
            index   = db.Get <KeyValuePair <string, long> >(1);

            string NullToEmpty(string s)
            {
                return(s ?? "");
            }

            string EmptyToNull(string s)
            {
                return(s == "" ? null : s);
            }
        }
        private JMDictLookup Init(Stream stream, string cache)
        {
            var priorityTagSerializer = Serializer.ForStringAsUtf8().Mapping(
                raw => PriorityTag.FromString(raw),
                pTag => pTag.Map(p => p.ToString()).ValueOr(""));

            var crossReferenceSerializer = Serializer.ForStringAsUtf8().Mapping(
                raw => EdictCrossReference.Parse(raw),
                obj => obj.ToString());

            var loanSourceSerializer = Serializer.ForComposite()
                                       .With(Serializer.ForStringAsUtf8())
                                       .With(SerializerExt.ForBool())
                                       .With(Serializer.ForEnum <EdictLoanSourceType>())
                                       .With(SerializerExt.ForOption(Serializer.ForStringAsUtf8()))
                                       .Create()
                                       .Mapping(
                raw => new EdictLoanSource(
                    (string)raw[0],
                    (bool)raw[1],
                    (EdictLoanSourceType)raw[2],
                    (Option <string>)raw[3]),
                obj => new object[]
            {
                obj.SourceLanguage,
                obj.Wasei,
                obj.SourceType,
                obj.LoanWord
            });

            var kanjiSerializer = Serializer.ForComposite()
                                  .With(Serializer.ForStringAsUtf8())
                                  .With(SerializerExt.ForBool())
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictReadingInformation>()))
                                  .With(Serializer.ForReadOnlyCollection(priorityTagSerializer))
                                  .Create()
                                  .Mapping(
                raw => new JMDictReading(
                    (string)raw[0],
                    (bool)raw[1],
                    (IReadOnlyCollection <string>)raw[2],
                    (IReadOnlyCollection <EdictReadingInformation>)raw[3],
                    ((IReadOnlyCollection <Option <PriorityTag> >)raw[4]).Values().ToList()),
                obj => new object[]
            {
                obj.Reading,
                obj.NotATrueReading,
                obj.ValidReadingFor,
                obj.ReadingInformation,
                obj.PriorityInfo.Select(p => p.Some()).ToList()
            });

            var readingSerializer = Serializer.ForComposite()
                                    .With(Serializer.ForStringAsUtf8())
                                    .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictKanjiInformation>()))
                                    .With(Serializer.ForReadOnlyCollection(priorityTagSerializer))
                                    .Create()
                                    .Mapping(
                raw => new JMDictKanji(
                    (string)raw[0],
                    (IReadOnlyCollection <EdictKanjiInformation>)raw[1],
                    ((IReadOnlyCollection <Option <PriorityTag> >)raw[2]).Values().ToList()),
                obj => new object[]
            {
                obj.Kanji,
                obj.Informational,
                obj.PriorityInfo.Select(p => p.Some()).ToList()
            });

            var senseSerializer = Serializer.ForComposite()
                                  .With(SerializerExt.ForOption(Serializer.ForEnum <EdictPartOfSpeech>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictPartOfSpeech>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictDialect>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictField>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictMisc>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(loanSourceSerializer))
                                  .With(Serializer.ForReadOnlyCollection(crossReferenceSerializer))
                                  .With(Serializer.ForReadOnlyCollection(crossReferenceSerializer))
                                  .Create()
                                  .Mapping(
                raw => new JMDictSense(
                    (Option <EdictPartOfSpeech>)raw[0],
                    (IReadOnlyCollection <EdictPartOfSpeech>)raw[1],
                    (IReadOnlyCollection <EdictDialect>)raw[2],
                    (IReadOnlyCollection <string>)raw[3],
                    (IReadOnlyCollection <string>)raw[4],
                    (IReadOnlyCollection <EdictField>)raw[5],
                    (IReadOnlyCollection <EdictMisc>)raw[6],
                    (IReadOnlyCollection <string>)raw[7],
                    (IReadOnlyCollection <string>)raw[8],
                    (IReadOnlyCollection <EdictLoanSource>)raw[9],
                    (IReadOnlyCollection <EdictCrossReference>)raw[10],
                    (IReadOnlyCollection <EdictCrossReference>)raw[11]),
                obj => new object[]
            {
                obj.Type,
                obj.PartOfSpeechInfo,
                obj.DialectalInfo,
                obj.Glosses,
                obj.Informational,
                obj.FieldData,
                obj.Misc,
                obj.RestrictedToKanji,
                obj.RestrictedToReading,
                obj.LoanSources,
                obj.CrossReferences,
                obj.Antonyms
            });

            var entrySerializer = TinyIndex.Serializer.ForComposite()
                                  .With(Serializer.ForLong())
                                  .With(Serializer.ForReadOnlyCollection(kanjiSerializer))
                                  .With(Serializer.ForReadOnlyCollection(readingSerializer))
                                  .With(Serializer.ForReadOnlyCollection(senseSerializer))
                                  .Create()
                                  .Mapping(
                raw => new JMDictEntry(
                    (long)raw[0],
                    (IReadOnlyCollection <JMDictReading>)raw[1],
                    (IReadOnlyCollection <JMDictKanji>)raw[2],
                    (IReadOnlyCollection <JMDictSense>)raw[3]),
                obj => new object[]
            {
                obj.SequenceNumber,
                obj.ReadingEntries,
                obj.KanjiEntries,
                obj.Senses
            });

            using (var jmdictParser = JMDictParser.Create(stream))
            {
                db = TinyIndex.Database.CreateOrOpen(cache, Version)
                     .AddIndirectArray(entrySerializer, db => jmdictParser.ReadRemainingToEnd(),
                                       x => x.SequenceNumber)
                     .AddIndirectArray(
                    TinyIndex.Serializer.ForKeyValuePair(
                        TinyIndex.Serializer.ForStringAsUtf8(),
                        TinyIndex.Serializer.ForReadOnlyList(TinyIndex.Serializer.ForLong())),
                    db =>
                {
                    IEnumerable <KeyValuePair <long, string> > It(IEnumerable <JMDictEntry> entries)
                    {
                        foreach (var e in entries)
                        {
                            foreach (var k in e.KanjiEntries)
                            {
                                yield return(new KeyValuePair <long, string>(e.SequenceNumber, k.Kanji));
                            }

                            foreach (var r in e.ReadingEntries)
                            {
                                yield return(new KeyValuePair <long, string>(e.SequenceNumber, r.Reading));
                            }
                        }
                    }

                    return(It(db.Get <JMDictEntry>(0)
                              .LinearScan())
                           .GroupBy(kvp => kvp.Value, kvp => kvp.Key)
                           .Select(x => new KeyValuePair <string, IReadOnlyList <long> >(x.Key, x.ToList())));
                },
                    x => x.Key, StringComparer.Ordinal)
                     .AddIndirectArray(
                    Serializer.ForKeyValuePair(Serializer.ForStringAsUtf8(), Serializer.ForStringAsUtf8()),
                    db => jmdictParser.FriendlyNames,
                    x => x.Key, StringComparer.Ordinal)
                     .Build();
                entries = db.Get <JMDictEntry>(0, new LruCache <long, JMDictEntry>(128));
                kvps    = db.Get <KeyValuePair <string, IReadOnlyList <long> > >(1,
                                                                                 new LruCache <long, KeyValuePair <string, IReadOnlyList <long> > >(128));
                friendlyNames = db.Get(2, new LruCache <long, KeyValuePair <string, string> >(256));
            }

            return(this);
        }
        private JMNedictLookup Init(Stream stream, string cache)
        {
            var entrySerializer = Serializer.ForComposite()
                                  .With(Serializer.ForLong())
                                  .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyList(Serializer.ForComposite()
                                                                   .With(Serializer.ForReadOnlyList(Serializer.ForEnum <JMNedictType>()))
                                                                   .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUtf8()))
                                                                   .Create()
                                                                   .Mapping(
                                                                       raw => new JnedictTranslation(
                                                                           (IEnumerable <JMNedictType>)raw[0],
                                                                           (IEnumerable <string>)raw[1]),
                                                                       obj => new object[]
            {
                obj.Type,
                obj.Translation
            })))
                                  .Create()
                                  .Mapping(
                raw => new JnedictEntry(
                    (long)raw[0],
                    (IEnumerable <string>)raw[1],
                    (IEnumerable <string>)raw[2],
                    (IEnumerable <JnedictTranslation>)raw[3]),
                obj => new object[]
            {
                obj.SequenceNumber,
                obj.Kanji,
                obj.Reading,
                obj.Translation
            });

            using (var parser = JMNedictParser.Create(stream))
            {
                database = Database.CreateOrOpen(cache, Version)
                           .AddIndirectArray(
                    entrySerializer,
                    db => parser.ReadRemainingToEnd())
                           .AddIndirectArray(
                    Serializer.ForKeyValuePair(Serializer.ForStringAsUtf8(),
                                               Serializer.ForReadOnlyList(Serializer.ForLong())), db =>
                {
                    IEnumerable <KeyValuePair <long, string> > It(IEnumerable <JnedictEntry> entries)
                    {
                        foreach (var e in entries)
                        {
                            foreach (var r in e.Reading)
                            {
                                yield return(new KeyValuePair <long, string>(e.SequenceNumber, r));
                            }

                            foreach (var k in e.Kanji)
                            {
                                yield return(new KeyValuePair <long, string>(e.SequenceNumber, k));
                            }
                        }
                    }

                    return(It(db.Get <JnedictEntry>(0).LinearScan())
                           .GroupBy(kvp => kvp.Value, kvp => kvp.Key)
                           .Select(x => new KeyValuePair <string, IReadOnlyList <long> >(x.Key, x.ToList())));
                },
                    x => x.Key, StringComparer.Ordinal)
                           .Build();
            }

            entries = database.Get <JnedictEntry>(0, new LruCache <long, JnedictEntry>(64));
            kvps    = database.Get <KeyValuePair <string, IReadOnlyList <long> > >(1, new LruCache <long, KeyValuePair <string, IReadOnlyList <long> > >(64));

            return(this);
        }
        private void Init(Lazy <IZipFile> zip, string cachePath)
        {
            var headerSerializer = Serializer.ForComposite()
                                   .With(Serializer.ForStringAsUtf8())
                                   .With(Serializer.ForInt())
                                   .With(Serializer.ForStringAsUtf8())
                                   .With(Serializer.ForInt().Mapping(raw => raw != 0, b => b ? 1 : 0))
                                   .Create()
                                   .Mapping(raw => new YomichanDictionaryVersion()
            {
                Title     = (string)raw[0],
                Format    = (int)raw[1],
                Revision  = (string)raw[2],
                Sequenced = (bool)raw[3]
            },
                                            obj => new object[]
            {
                obj.Title,
                obj.Format,
                obj.Revision,
                obj.Sequenced
            });
            var entrySerializer = Serializer.ForComposite()
                                  .With(Serializer.ForStringAsUtf8())
                                  .With(Serializer.ForStringAsUtf8())
                                  .With(Serializer.ForStringAsUtf8())
                                  .With(Serializer.ForStringAsUtf8())
                                  .With(Serializer.ForInt())
                                  .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForInt())
                                  .With(Serializer.ForStringAsUtf8())
                                  .Create()
                                  .Mapping(
                raw => new YomichanDictionaryEntry
            {
                Expression     = (string)raw[0],
                Reading        = (string)raw[1],
                DefinitionTags = (string)raw[2],
                Rules          = (string)raw[3],
                Score          = (int)raw[4],
                Glossary       = (IReadOnlyList <string>)raw[5],
                Sequence       = (int)raw[6],
                TermTags       = (string)raw[7]
            },
                obj => new object[]
            {
                obj.Expression,
                obj.Reading,
                obj.DefinitionTags,
                obj.Rules,
                obj.Score,
                obj.Glossary,
                obj.Sequence,
                obj.TermTags
            });

            var indexSerializer = Serializer.ForKeyValuePair(
                Serializer.ForStringAsUtf8(),
                Serializer.ForReadOnlyList(Serializer.ForLong()));

            var lazyHeaderInfo =
                new Lazy <(YomichanDictionaryVersion version, IEnumerable <string> dataFilePaths)>(() =>
                                                                                                   GetHeaderInfo(zip.Value));
            var lazyRoot = new Lazy <IEnumerable <YomichanDictionaryEntry> >(() => ParseEntriesFromZip(lazyHeaderInfo.Value.dataFilePaths, zip.Value));

            db = Database.CreateOrOpen(cachePath, Version)
                 .AddIndirectArray(entrySerializer, db => lazyRoot.Value)
                 .AddIndirectArray(indexSerializer, db => Index(db.Get <YomichanDictionaryEntry>(0).LinearScan()), kvp => kvp.Key, StringComparer.Ordinal)
                 .AddIndirectArray(headerSerializer, db => EnumerableExt.OfSingle(lazyHeaderInfo.Value.version))
                 .Build();

            entries      = db.Get <YomichanDictionaryEntry>(0, new LruCache <long, YomichanDictionaryEntry>(16));
            index        = db.Get <KeyValuePair <string, IReadOnlyList <long> > >(1, new LruCache <long, KeyValuePair <string, IReadOnlyList <long> > >(32));
            this.version = db.Get <YomichanDictionaryVersion>(2).LinearScan().First();
        }