Пример #1
0
        public static Option <PriorityTag> FromString(string str)
        {
            if (TryParse(str, out var tag))
            {
                return(tag.Some());
            }

            return(Option.None <PriorityTag>());


            bool TryParse(string s, out PriorityTag t)
            {
                var firstDigitIndex = s.IndexOfAny(new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' });

                if (firstDigitIndex == -1)
                {
                    t = default;
                    return(false);
                }

                if (Enum.TryParse <Kind>(s.Substring(0, firstDigitIndex), out var kind) &&
                    int.TryParse(s.Substring(firstDigitIndex), out var value))
                {
                    t = new PriorityTag(value, kind);
                    return(true);
                }

                t = default;
                return(false);
            }
        }
        private JMDictReading ReadReadingElement(int depth, string tag)
        {
            string readingElement  = null;
            bool   notATrueReading = false;
            var    restrictedList  = new List <string>();
            var    infoList        = new List <EdictReadingInformation>();
            var    priorities      = new List <PriorityTag>();

            while (xmlReader.Read())
            {
                if (xmlReader.NodeType == XmlNodeType.EndElement && xmlReader.Name == tag && xmlReader.Depth == depth)
                {
                    break;
                }

                if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "reb")
                {
                    readingElement = ReadSimpleXmlTextElement(xmlReader.Depth, xmlReader.Name);
                }

                if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "re_nokanji")
                {
                    notATrueReading = true;
                }

                if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "re_restr")
                {
                    restrictedList.Add(ReadSimpleXmlTextElement(xmlReader.Depth, xmlReader.Name));
                }

                if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "re_inf")
                {
                    var reInf = ReadReInf(xmlReader.Depth, xmlReader.Name);
                    if (reInf != null)
                    {
                        infoList.Add(reInf.Value);
                    }
                }

                if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "re_pri")
                {
                    var priorityTag = PriorityTag.FromString(ReadSimpleXmlTextElement(xmlReader.Depth, xmlReader.Name));
                    priorityTag.MatchSome(p => priorities.Add(p));
                }
            }

            return(new JMDictReading(
                       readingElement ?? throw new InvalidDataException(),
                       notATrueReading,
                       restrictedList,
                       infoList,
                       priorities));
        }
Пример #3
0
        public int?CompareTo(PriorityTag other)
        {
            if (this.kind == Kind.none && other.kind != Kind.none)
            {
                return(-1);
            }
            if (this.kind != Kind.none && other.kind == Kind.none)
            {
                return(1);
            }

            if (this.kind != other.kind)
            {
                return(null);
            }

            return(this.raw.CompareTo(other.raw));
        }
        private JMDictKanji ReadKanjiElement(int depth, string tag)
        {
            string kanjiElement = null;
            var    infoList     = new List <EdictKanjiInformation>();
            var    priorities   = new List <PriorityTag>();

            while (xmlReader.Read())
            {
                if (xmlReader.NodeType == XmlNodeType.EndElement && xmlReader.Name == tag && xmlReader.Depth == depth)
                {
                    break;
                }

                if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "keb")
                {
                    kanjiElement = ReadSimpleXmlTextElement(xmlReader.Depth, xmlReader.Name);
                }

                if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "ke_inf")
                {
                    var keInf = ReadKeInf(xmlReader.Depth, xmlReader.Name);
                    if (keInf != null)
                    {
                        infoList.Add(keInf.Value);
                    }
                }

                if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "ke_pri")
                {
                    var priorityTag = PriorityTag.FromString(ReadSimpleXmlTextElement(xmlReader.Depth, xmlReader.Name));
                    priorityTag.MatchSome(p => priorities.Add(p));
                }
            }

            return(new JMDictKanji(
                       kanjiElement,
                       infoList,
                       priorities));
        }
        private JMDictLookup Init(Stream stream, string cache)
        {
            var priorityTagSerializer = Serializer.ForStringAsUtf8().Mapping(
                raw => PriorityTag.FromString(raw),
                pTag => pTag.Map(p => p.ToString()).ValueOr(""));

            var crossReferenceSerializer = Serializer.ForStringAsUtf8().Mapping(
                raw => EdictCrossReference.Parse(raw),
                obj => obj.ToString());

            var loanSourceSerializer = Serializer.ForComposite()
                                       .With(Serializer.ForStringAsUtf8())
                                       .With(SerializerExt.ForBool())
                                       .With(Serializer.ForEnum <EdictLoanSourceType>())
                                       .With(SerializerExt.ForOption(Serializer.ForStringAsUtf8()))
                                       .Create()
                                       .Mapping(
                raw => new EdictLoanSource(
                    (string)raw[0],
                    (bool)raw[1],
                    (EdictLoanSourceType)raw[2],
                    (Option <string>)raw[3]),
                obj => new object[]
            {
                obj.SourceLanguage,
                obj.Wasei,
                obj.SourceType,
                obj.LoanWord
            });

            var kanjiSerializer = Serializer.ForComposite()
                                  .With(Serializer.ForStringAsUtf8())
                                  .With(SerializerExt.ForBool())
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictReadingInformation>()))
                                  .With(Serializer.ForReadOnlyCollection(priorityTagSerializer))
                                  .Create()
                                  .Mapping(
                raw => new JMDictReading(
                    (string)raw[0],
                    (bool)raw[1],
                    (IReadOnlyCollection <string>)raw[2],
                    (IReadOnlyCollection <EdictReadingInformation>)raw[3],
                    ((IReadOnlyCollection <Option <PriorityTag> >)raw[4]).Values().ToList()),
                obj => new object[]
            {
                obj.Reading,
                obj.NotATrueReading,
                obj.ValidReadingFor,
                obj.ReadingInformation,
                obj.PriorityInfo.Select(p => p.Some()).ToList()
            });

            var readingSerializer = Serializer.ForComposite()
                                    .With(Serializer.ForStringAsUtf8())
                                    .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictKanjiInformation>()))
                                    .With(Serializer.ForReadOnlyCollection(priorityTagSerializer))
                                    .Create()
                                    .Mapping(
                raw => new JMDictKanji(
                    (string)raw[0],
                    (IReadOnlyCollection <EdictKanjiInformation>)raw[1],
                    ((IReadOnlyCollection <Option <PriorityTag> >)raw[2]).Values().ToList()),
                obj => new object[]
            {
                obj.Kanji,
                obj.Informational,
                obj.PriorityInfo.Select(p => p.Some()).ToList()
            });

            var senseSerializer = Serializer.ForComposite()
                                  .With(SerializerExt.ForOption(Serializer.ForEnum <EdictPartOfSpeech>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictPartOfSpeech>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictDialect>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictField>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictMisc>()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8()))
                                  .With(Serializer.ForReadOnlyCollection(loanSourceSerializer))
                                  .With(Serializer.ForReadOnlyCollection(crossReferenceSerializer))
                                  .With(Serializer.ForReadOnlyCollection(crossReferenceSerializer))
                                  .Create()
                                  .Mapping(
                raw => new JMDictSense(
                    (Option <EdictPartOfSpeech>)raw[0],
                    (IReadOnlyCollection <EdictPartOfSpeech>)raw[1],
                    (IReadOnlyCollection <EdictDialect>)raw[2],
                    (IReadOnlyCollection <string>)raw[3],
                    (IReadOnlyCollection <string>)raw[4],
                    (IReadOnlyCollection <EdictField>)raw[5],
                    (IReadOnlyCollection <EdictMisc>)raw[6],
                    (IReadOnlyCollection <string>)raw[7],
                    (IReadOnlyCollection <string>)raw[8],
                    (IReadOnlyCollection <EdictLoanSource>)raw[9],
                    (IReadOnlyCollection <EdictCrossReference>)raw[10],
                    (IReadOnlyCollection <EdictCrossReference>)raw[11]),
                obj => new object[]
            {
                obj.Type,
                obj.PartOfSpeechInfo,
                obj.DialectalInfo,
                obj.Glosses,
                obj.Informational,
                obj.FieldData,
                obj.Misc,
                obj.RestrictedToKanji,
                obj.RestrictedToReading,
                obj.LoanSources,
                obj.CrossReferences,
                obj.Antonyms
            });

            var entrySerializer = TinyIndex.Serializer.ForComposite()
                                  .With(Serializer.ForLong())
                                  .With(Serializer.ForReadOnlyCollection(kanjiSerializer))
                                  .With(Serializer.ForReadOnlyCollection(readingSerializer))
                                  .With(Serializer.ForReadOnlyCollection(senseSerializer))
                                  .Create()
                                  .Mapping(
                raw => new JMDictEntry(
                    (long)raw[0],
                    (IReadOnlyCollection <JMDictReading>)raw[1],
                    (IReadOnlyCollection <JMDictKanji>)raw[2],
                    (IReadOnlyCollection <JMDictSense>)raw[3]),
                obj => new object[]
            {
                obj.SequenceNumber,
                obj.ReadingEntries,
                obj.KanjiEntries,
                obj.Senses
            });

            using (var jmdictParser = JMDictParser.Create(stream))
            {
                db = TinyIndex.Database.CreateOrOpen(cache, Version)
                     .AddIndirectArray(entrySerializer, db => jmdictParser.ReadRemainingToEnd(),
                                       x => x.SequenceNumber)
                     .AddIndirectArray(
                    TinyIndex.Serializer.ForKeyValuePair(
                        TinyIndex.Serializer.ForStringAsUtf8(),
                        TinyIndex.Serializer.ForReadOnlyList(TinyIndex.Serializer.ForLong())),
                    db =>
                {
                    IEnumerable <KeyValuePair <long, string> > It(IEnumerable <JMDictEntry> entries)
                    {
                        foreach (var e in entries)
                        {
                            foreach (var k in e.KanjiEntries)
                            {
                                yield return(new KeyValuePair <long, string>(e.SequenceNumber, k.Kanji));
                            }

                            foreach (var r in e.ReadingEntries)
                            {
                                yield return(new KeyValuePair <long, string>(e.SequenceNumber, r.Reading));
                            }
                        }
                    }

                    return(It(db.Get <JMDictEntry>(0)
                              .LinearScan())
                           .GroupBy(kvp => kvp.Value, kvp => kvp.Key)
                           .Select(x => new KeyValuePair <string, IReadOnlyList <long> >(x.Key, x.ToList())));
                },
                    x => x.Key, StringComparer.Ordinal)
                     .AddIndirectArray(
                    Serializer.ForKeyValuePair(Serializer.ForStringAsUtf8(), Serializer.ForStringAsUtf8()),
                    db => jmdictParser.FriendlyNames,
                    x => x.Key, StringComparer.Ordinal)
                     .Build();
                entries = db.Get <JMDictEntry>(0, new LruCache <long, JMDictEntry>(128));
                kvps    = db.Get <KeyValuePair <string, IReadOnlyList <long> > >(1,
                                                                                 new LruCache <long, KeyValuePair <string, IReadOnlyList <long> > >(128));
                friendlyNames = db.Get(2, new LruCache <long, KeyValuePair <string, string> >(256));
            }

            return(this);
        }