Ejemplo n.º 1
0
        private void Init(Stream stream, string cachePath)
        {
            var entrySerializer = Serializer.ForKeyValuePair(
                Serializer.ForStringAsUTF8(),
                Serializer.ForReadOnlyList(
                    Serializer.ForComposite()
                    .With(Serializer.ForEnum <PartOfSpeech>())
                    .With(Serializer.ForStringAsUTF8())
                    .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUTF8()))
                    .With(Serializer.ForReadOnlyList(Serializer.ForStringAsUTF8()))
                    .Create()
                    .Mapping(
                        raw => new WordnetDictionaryMeaning(
                            (PartOfSpeech)raw[0],
                            (string)raw[1],
                            (IReadOnlyList <string>)raw[2],
                            (IReadOnlyList <string>)raw[3]),
                        meaning => new object[]
            {
                meaning.PartOfSpeech,
                meaning.Word,
                meaning.Definition,
                meaning.Examples
            })));

            db = TinyIndex.Database.CreateOrOpen(cachePath, Version)
                 .AddIndirectArray(entrySerializer, db => CreateEntries(stream),
                                   x => x.Key,
                                   StringComparer.Ordinal)
                 .Build();

            entries = db.Get <KeyValuePair <string, IReadOnlyList <WordnetDictionaryMeaning> > >(0,
                                                                                                 new LruCache <long, KeyValuePair <string, IReadOnlyList <WordnetDictionaryMeaning> > >(64));
        }
        private JMDictLookup Init(Stream stream, string cache)
        {
            var priorityTagSerializer = Serializer.ForStringAsUtf8().Mapping(
                raw => PriorityTag.FromString(raw),
                pTag => pTag.Map(p => p.ToString()).ValueOr(""));

            var crossReferenceSerializer = Serializer.ForStringAsUtf8().Mapping(
                raw => EdictCrossReference.Parse(raw),
                obj => obj.ToString());

            var loanSourceSerializer = Serializer.ForComposite()
                                       .With(Serializer.ForStringAsUtf8())
                                       .With(SerializerExt.ForBool())
                                       .With(Serializer.ForEnum <EdictLoanSourceType>())
                                       .With(SerializerExt.ForOption(Serializer.ForStringAsUtf8()))
                                       .Create()
                                       .Mapping(
                raw => new EdictLoanSource(
                    (string)raw[0],
                    (bool)raw[1],
                    (EdictLoanSourceType)raw[2],
                    (Option <string>)raw[3]),
                obj => new object[]
            {
                obj.SourceLanguage,
                obj.Wasei,
                obj.SourceType,
                obj.LoanWord
            });

            var kanjiSerializer = Serializer.ForComposite()
                                  .With(Serializer.ForStringAsUtf8())
                                  .With(SerializerExt.ForBool())
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictReadingInformation>()))
                                  .With(Serializer.ForReadOnlyCollection(priorityTagSerializer))
                                  .Create()
                                  .Mapping(
                raw => new JMDictReading(
                    (string)raw[0],
                    (bool)raw[1],
                    (IReadOnlyCollection <string>)raw[2],
                    (IReadOnlyCollection <EdictReadingInformation>)raw[3],
                    ((IReadOnlyCollection <Option <PriorityTag> >)raw[4]).Values().ToList()),
                obj => new object[]
            {
                obj.Reading,
                obj.NotATrueReading,
                obj.ValidReadingFor,
                obj.ReadingInformation,
                obj.PriorityInfo.Select(p => p.Some()).ToList()
            });

            var readingSerializer = Serializer.ForComposite()
                                    .With(Serializer.ForStringAsUtf8())
                                    .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictKanjiInformation>()))
                                    .With(Serializer.ForReadOnlyCollection(priorityTagSerializer))
                                    .Create()
                                    .Mapping(
                raw => new JMDictKanji(
                    (string)raw[0],
                    (IReadOnlyCollection <EdictKanjiInformation>)raw[1],
                    ((IReadOnlyCollection <Option <PriorityTag> >)raw[2]).Values().ToList()),
                obj => new object[]
            {
                obj.Kanji,
                obj.Informational,
                obj.PriorityInfo.Select(p => p.Some()).ToList()
            });

            var senseSerializer = Serializer.ForComposite()
                                  .With(SerializerExt.ForOption(Serializer.ForEnum <EdictPartOfSpeech>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictPartOfSpeech>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictDialect>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictField>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictMisc>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(loanSourceSerializer))
                                  .With(Serializer.ForReadOnlyCollection(crossReferenceSerializer))
                                  .With(Serializer.ForReadOnlyCollection(crossReferenceSerializer))
                                  .Create()
                                  .Mapping(
                raw => new JMDictSense(
                    (Option <EdictPartOfSpeech>)raw[0],
                    (IReadOnlyCollection <EdictPartOfSpeech>)raw[1],
                    (IReadOnlyCollection <EdictDialect>)raw[2],
                    (IReadOnlyCollection <string>)raw[3],
                    (IReadOnlyCollection <string>)raw[4],
                    (IReadOnlyCollection <EdictField>)raw[5],
                    (IReadOnlyCollection <EdictMisc>)raw[6],
                    (IReadOnlyCollection <string>)raw[7],
                    (IReadOnlyCollection <string>)raw[8],
                    (IReadOnlyCollection <EdictLoanSource>)raw[9],
                    (IReadOnlyCollection <EdictCrossReference>)raw[10],
                    (IReadOnlyCollection <EdictCrossReference>)raw[11]),
                obj => new object[]
            {
                obj.Type,
                obj.PartOfSpeechInfo,
                obj.DialectalInfo,
                obj.Glosses,
                obj.Informational,
                obj.FieldData,
                obj.Misc,
                obj.RestrictedToKanji,
                obj.RestrictedToReading,
                obj.LoanSources,
                obj.CrossReferences,
                obj.Antonyms
            });

            var entrySerializer = TinyIndex.Serializer.ForComposite()
                                  .With(Serializer.ForLong())
                                  .With(Serializer.ForReadOnlyCollection(kanjiSerializer))
                                  .With(Serializer.ForReadOnlyCollection(readingSerializer))
                                  .With(Serializer.ForReadOnlyCollection(senseSerializer))
                                  .Create()
                                  .Mapping(
                raw => new JMDictEntry(
                    (long)raw[0],
                    (IReadOnlyCollection <JMDictReading>)raw[1],
                    (IReadOnlyCollection <JMDictKanji>)raw[2],
                    (IReadOnlyCollection <JMDictSense>)raw[3]),
                obj => new object[]
            {
                obj.SequenceNumber,
                obj.ReadingEntries,
                obj.KanjiEntries,
                obj.Senses
            });

            using (var jmdictParser = JMDictParser.Create(stream))
            {
                db = TinyIndex.Database.CreateOrOpen(cache, Version)
                     .AddIndirectArray(entrySerializer, db => jmdictParser.ReadRemainingToEnd(),
                                       x => x.SequenceNumber)
                     .AddIndirectArray(
                    TinyIndex.Serializer.ForKeyValuePair(
                        TinyIndex.Serializer.ForStringAsUtf8(),
                        TinyIndex.Serializer.ForReadOnlyList(TinyIndex.Serializer.ForLong())),
                    db =>
                {
                    IEnumerable <KeyValuePair <long, string> > It(IEnumerable <JMDictEntry> entries)
                    {
                        foreach (var e in entries)
                        {
                            foreach (var k in e.KanjiEntries)
                            {
                                yield return(new KeyValuePair <long, string>(e.SequenceNumber, k.Kanji));
                            }

                            foreach (var r in e.ReadingEntries)
                            {
                                yield return(new KeyValuePair <long, string>(e.SequenceNumber, r.Reading));
                            }
                        }
                    }

                    return(It(db.Get <JMDictEntry>(0)
                              .LinearScan())
                           .GroupBy(kvp => kvp.Value, kvp => kvp.Key)
                           .Select(x => new KeyValuePair <string, IReadOnlyList <long> >(x.Key, x.ToList())));
                },
                    x => x.Key, StringComparer.Ordinal)
                     .AddIndirectArray(
                    Serializer.ForKeyValuePair(Serializer.ForStringAsUtf8(), Serializer.ForStringAsUtf8()),
                    db => jmdictParser.FriendlyNames,
                    x => x.Key, StringComparer.Ordinal)
                     .Build();
                entries = db.Get <JMDictEntry>(0, new LruCache <long, JMDictEntry>(128));
                kvps    = db.Get <KeyValuePair <string, IReadOnlyList <long> > >(1,
                                                                                 new LruCache <long, KeyValuePair <string, IReadOnlyList <long> > >(128));
                friendlyNames = db.Get(2, new LruCache <long, KeyValuePair <string, string> >(256));
            }

            return(this);
        }