private JMDictReading ReadReadingElement(int depth, string tag) { string readingElement = null; bool notATrueReading = false; var restrictedList = new List <string>(); var infoList = new List <EdictReadingInformation>(); var priorities = new List <PriorityTag>(); while (xmlReader.Read()) { if (xmlReader.NodeType == XmlNodeType.EndElement && xmlReader.Name == tag && xmlReader.Depth == depth) { break; } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "reb") { readingElement = ReadSimpleXmlTextElement(xmlReader.Depth, xmlReader.Name); } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "re_nokanji") { notATrueReading = true; } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "re_restr") { restrictedList.Add(ReadSimpleXmlTextElement(xmlReader.Depth, xmlReader.Name)); } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "re_inf") { var reInf = ReadReInf(xmlReader.Depth, xmlReader.Name); if (reInf != null) { infoList.Add(reInf.Value); } } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "re_pri") { var priorityTag = PriorityTag.FromString(ReadSimpleXmlTextElement(xmlReader.Depth, xmlReader.Name)); priorityTag.MatchSome(p => priorities.Add(p)); } } return(new JMDictReading( readingElement ?? throw new InvalidDataException(), notATrueReading, restrictedList, infoList, priorities)); }
private JMDictKanji ReadKanjiElement(int depth, string tag) { string kanjiElement = null; var infoList = new List <EdictKanjiInformation>(); var priorities = new List <PriorityTag>(); while (xmlReader.Read()) { if (xmlReader.NodeType == XmlNodeType.EndElement && xmlReader.Name == tag && xmlReader.Depth == depth) { break; } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "keb") { kanjiElement = ReadSimpleXmlTextElement(xmlReader.Depth, xmlReader.Name); } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "ke_inf") { var keInf = ReadKeInf(xmlReader.Depth, xmlReader.Name); if (keInf != null) { infoList.Add(keInf.Value); } } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "ke_pri") { var priorityTag = PriorityTag.FromString(ReadSimpleXmlTextElement(xmlReader.Depth, xmlReader.Name)); priorityTag.MatchSome(p => priorities.Add(p)); } } return(new JMDictKanji( kanjiElement, infoList, priorities)); }
private JMDictLookup Init(Stream stream, string cache) { var priorityTagSerializer = Serializer.ForStringAsUtf8().Mapping( raw => PriorityTag.FromString(raw), pTag => pTag.Map(p => p.ToString()).ValueOr("")); var crossReferenceSerializer = Serializer.ForStringAsUtf8().Mapping( raw => EdictCrossReference.Parse(raw), obj => obj.ToString()); var loanSourceSerializer = Serializer.ForComposite() .With(Serializer.ForStringAsUtf8()) .With(SerializerExt.ForBool()) .With(Serializer.ForEnum <EdictLoanSourceType>()) .With(SerializerExt.ForOption(Serializer.ForStringAsUtf8())) .Create() .Mapping( raw => new EdictLoanSource( (string)raw[0], (bool)raw[1], (EdictLoanSourceType)raw[2], (Option <string>)raw[3]), obj => new object[] { obj.SourceLanguage, obj.Wasei, obj.SourceType, obj.LoanWord }); var kanjiSerializer = Serializer.ForComposite() .With(Serializer.ForStringAsUtf8()) .With(SerializerExt.ForBool()) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictReadingInformation>())) .With(Serializer.ForReadOnlyCollection(priorityTagSerializer)) .Create() .Mapping( raw => new JMDictReading( (string)raw[0], (bool)raw[1], (IReadOnlyCollection <string>)raw[2], (IReadOnlyCollection <EdictReadingInformation>)raw[3], ((IReadOnlyCollection <Option <PriorityTag> >)raw[4]).Values().ToList()), obj => new object[] { obj.Reading, obj.NotATrueReading, obj.ValidReadingFor, obj.ReadingInformation, obj.PriorityInfo.Select(p => p.Some()).ToList() }); var readingSerializer = Serializer.ForComposite() .With(Serializer.ForStringAsUtf8()) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictKanjiInformation>())) .With(Serializer.ForReadOnlyCollection(priorityTagSerializer)) .Create() .Mapping( raw => new JMDictKanji( (string)raw[0], (IReadOnlyCollection <EdictKanjiInformation>)raw[1], ((IReadOnlyCollection <Option <PriorityTag> >)raw[2]).Values().ToList()), obj => new object[] { obj.Kanji, obj.Informational, obj.PriorityInfo.Select(p => p.Some()).ToList() }); var senseSerializer = Serializer.ForComposite() .With(SerializerExt.ForOption(Serializer.ForEnum <EdictPartOfSpeech>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictPartOfSpeech>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictDialect>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictField>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictMisc>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(loanSourceSerializer)) .With(Serializer.ForReadOnlyCollection(crossReferenceSerializer)) .With(Serializer.ForReadOnlyCollection(crossReferenceSerializer)) .Create() .Mapping( raw => new JMDictSense( (Option <EdictPartOfSpeech>)raw[0], (IReadOnlyCollection <EdictPartOfSpeech>)raw[1], (IReadOnlyCollection <EdictDialect>)raw[2], (IReadOnlyCollection <string>)raw[3], (IReadOnlyCollection <string>)raw[4], (IReadOnlyCollection <EdictField>)raw[5], (IReadOnlyCollection <EdictMisc>)raw[6], (IReadOnlyCollection <string>)raw[7], (IReadOnlyCollection <string>)raw[8], (IReadOnlyCollection <EdictLoanSource>)raw[9], (IReadOnlyCollection <EdictCrossReference>)raw[10], (IReadOnlyCollection <EdictCrossReference>)raw[11]), obj => new object[] { obj.Type, obj.PartOfSpeechInfo, obj.DialectalInfo, obj.Glosses, obj.Informational, obj.FieldData, obj.Misc, obj.RestrictedToKanji, obj.RestrictedToReading, obj.LoanSources, obj.CrossReferences, obj.Antonyms }); var entrySerializer = TinyIndex.Serializer.ForComposite() .With(Serializer.ForLong()) .With(Serializer.ForReadOnlyCollection(kanjiSerializer)) .With(Serializer.ForReadOnlyCollection(readingSerializer)) .With(Serializer.ForReadOnlyCollection(senseSerializer)) .Create() .Mapping( raw => new JMDictEntry( (long)raw[0], (IReadOnlyCollection <JMDictReading>)raw[1], (IReadOnlyCollection <JMDictKanji>)raw[2], (IReadOnlyCollection <JMDictSense>)raw[3]), obj => new object[] { obj.SequenceNumber, obj.ReadingEntries, obj.KanjiEntries, obj.Senses }); using (var jmdictParser = JMDictParser.Create(stream)) { db = TinyIndex.Database.CreateOrOpen(cache, Version) .AddIndirectArray(entrySerializer, db => jmdictParser.ReadRemainingToEnd(), x => x.SequenceNumber) .AddIndirectArray( TinyIndex.Serializer.ForKeyValuePair( TinyIndex.Serializer.ForStringAsUtf8(), TinyIndex.Serializer.ForReadOnlyList(TinyIndex.Serializer.ForLong())), db => { IEnumerable <KeyValuePair <long, string> > It(IEnumerable <JMDictEntry> entries) { foreach (var e in entries) { foreach (var k in e.KanjiEntries) { yield return(new KeyValuePair <long, string>(e.SequenceNumber, k.Kanji)); } foreach (var r in e.ReadingEntries) { yield return(new KeyValuePair <long, string>(e.SequenceNumber, r.Reading)); } } } return(It(db.Get <JMDictEntry>(0) .LinearScan()) .GroupBy(kvp => kvp.Value, kvp => kvp.Key) .Select(x => new KeyValuePair <string, IReadOnlyList <long> >(x.Key, x.ToList()))); }, x => x.Key, StringComparer.Ordinal) .AddIndirectArray( Serializer.ForKeyValuePair(Serializer.ForStringAsUtf8(), Serializer.ForStringAsUtf8()), db => jmdictParser.FriendlyNames, x => x.Key, StringComparer.Ordinal) .Build(); entries = db.Get <JMDictEntry>(0, new LruCache <long, JMDictEntry>(128)); kvps = db.Get <KeyValuePair <string, IReadOnlyList <long> > >(1, new LruCache <long, KeyValuePair <string, IReadOnlyList <long> > >(128)); friendlyNames = db.Get(2, new LruCache <long, KeyValuePair <string, string> >(256)); } return(this); }