private JMDictLookup Init(Stream stream, string cache) { var priorityTagSerializer = Serializer.ForStringAsUtf8().Mapping( raw => PriorityTag.FromString(raw), pTag => pTag.Map(p => p.ToString()).ValueOr("")); var crossReferenceSerializer = Serializer.ForStringAsUtf8().Mapping( raw => EdictCrossReference.Parse(raw), obj => obj.ToString()); var loanSourceSerializer = Serializer.ForComposite() .With(Serializer.ForStringAsUtf8()) .With(SerializerExt.ForBool()) .With(Serializer.ForEnum <EdictLoanSourceType>()) .With(SerializerExt.ForOption(Serializer.ForStringAsUtf8())) .Create() .Mapping( raw => new EdictLoanSource( (string)raw[0], (bool)raw[1], (EdictLoanSourceType)raw[2], (Option <string>)raw[3]), obj => new object[] { obj.SourceLanguage, obj.Wasei, obj.SourceType, obj.LoanWord }); var kanjiSerializer = Serializer.ForComposite() .With(Serializer.ForStringAsUtf8()) .With(SerializerExt.ForBool()) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictReadingInformation>())) .With(Serializer.ForReadOnlyCollection(priorityTagSerializer)) .Create() .Mapping( raw => new JMDictReading( (string)raw[0], (bool)raw[1], (IReadOnlyCollection <string>)raw[2], (IReadOnlyCollection <EdictReadingInformation>)raw[3], ((IReadOnlyCollection <Option <PriorityTag> >)raw[4]).Values().ToList()), obj => new object[] { obj.Reading, obj.NotATrueReading, obj.ValidReadingFor, obj.ReadingInformation, obj.PriorityInfo.Select(p => p.Some()).ToList() }); var readingSerializer = Serializer.ForComposite() .With(Serializer.ForStringAsUtf8()) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictKanjiInformation>())) .With(Serializer.ForReadOnlyCollection(priorityTagSerializer)) .Create() .Mapping( raw => new JMDictKanji( (string)raw[0], (IReadOnlyCollection <EdictKanjiInformation>)raw[1], ((IReadOnlyCollection <Option <PriorityTag> >)raw[2]).Values().ToList()), obj => new object[] { obj.Kanji, obj.Informational, obj.PriorityInfo.Select(p => p.Some()).ToList() }); var senseSerializer = Serializer.ForComposite() .With(SerializerExt.ForOption(Serializer.ForEnum <EdictPartOfSpeech>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictPartOfSpeech>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictDialect>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictField>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForEnum <EdictMisc>())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(Serializer.ForStringAsUtf8())) .With(Serializer.ForReadOnlyCollection(loanSourceSerializer)) .With(Serializer.ForReadOnlyCollection(crossReferenceSerializer)) .With(Serializer.ForReadOnlyCollection(crossReferenceSerializer)) .Create() .Mapping( raw => new JMDictSense( (Option <EdictPartOfSpeech>)raw[0], (IReadOnlyCollection <EdictPartOfSpeech>)raw[1], (IReadOnlyCollection <EdictDialect>)raw[2], (IReadOnlyCollection <string>)raw[3], (IReadOnlyCollection <string>)raw[4], (IReadOnlyCollection <EdictField>)raw[5], (IReadOnlyCollection <EdictMisc>)raw[6], (IReadOnlyCollection <string>)raw[7], (IReadOnlyCollection <string>)raw[8], (IReadOnlyCollection <EdictLoanSource>)raw[9], (IReadOnlyCollection <EdictCrossReference>)raw[10], (IReadOnlyCollection <EdictCrossReference>)raw[11]), obj => new object[] { obj.Type, obj.PartOfSpeechInfo, obj.DialectalInfo, obj.Glosses, obj.Informational, obj.FieldData, obj.Misc, obj.RestrictedToKanji, obj.RestrictedToReading, obj.LoanSources, obj.CrossReferences, obj.Antonyms }); var entrySerializer = TinyIndex.Serializer.ForComposite() .With(Serializer.ForLong()) .With(Serializer.ForReadOnlyCollection(kanjiSerializer)) .With(Serializer.ForReadOnlyCollection(readingSerializer)) .With(Serializer.ForReadOnlyCollection(senseSerializer)) .Create() .Mapping( raw => new JMDictEntry( (long)raw[0], (IReadOnlyCollection <JMDictReading>)raw[1], (IReadOnlyCollection <JMDictKanji>)raw[2], (IReadOnlyCollection <JMDictSense>)raw[3]), obj => new object[] { obj.SequenceNumber, obj.ReadingEntries, obj.KanjiEntries, obj.Senses }); using (var jmdictParser = JMDictParser.Create(stream)) { db = TinyIndex.Database.CreateOrOpen(cache, Version) .AddIndirectArray(entrySerializer, db => jmdictParser.ReadRemainingToEnd(), x => x.SequenceNumber) .AddIndirectArray( TinyIndex.Serializer.ForKeyValuePair( TinyIndex.Serializer.ForStringAsUtf8(), TinyIndex.Serializer.ForReadOnlyList(TinyIndex.Serializer.ForLong())), db => { IEnumerable <KeyValuePair <long, string> > It(IEnumerable <JMDictEntry> entries) { foreach (var e in entries) { foreach (var k in e.KanjiEntries) { yield return(new KeyValuePair <long, string>(e.SequenceNumber, k.Kanji)); } foreach (var r in e.ReadingEntries) { yield return(new KeyValuePair <long, string>(e.SequenceNumber, r.Reading)); } } } return(It(db.Get <JMDictEntry>(0) .LinearScan()) .GroupBy(kvp => kvp.Value, kvp => kvp.Key) .Select(x => new KeyValuePair <string, IReadOnlyList <long> >(x.Key, x.ToList()))); }, x => x.Key, StringComparer.Ordinal) .AddIndirectArray( Serializer.ForKeyValuePair(Serializer.ForStringAsUtf8(), Serializer.ForStringAsUtf8()), db => jmdictParser.FriendlyNames, x => x.Key, StringComparer.Ordinal) .Build(); entries = db.Get <JMDictEntry>(0, new LruCache <long, JMDictEntry>(128)); kvps = db.Get <KeyValuePair <string, IReadOnlyList <long> > >(1, new LruCache <long, KeyValuePair <string, IReadOnlyList <long> > >(128)); friendlyNames = db.Get(2, new LruCache <long, KeyValuePair <string, string> >(256)); } return(this); }
private JMDictSense ReadSense(int depth, string tag, IReadOnlyList <JMDictSense> priorSenses) { var partOfSpeechList = new List <EdictPartOfSpeech>(); var dialectList = new List <EdictDialect>(); var textEntryList = new List <string>(); var infoList = new List <string>(); var fieldList = new List <EdictField>(); var miscList = new List <EdictMisc>(); var stagkList = new List <string>(); var stagrList = new List <string>(); var lsourceList = new List <EdictLoanSource>(); var xrefList = new List <EdictCrossReference>(); var antList = new List <EdictCrossReference>(); while (xmlReader.Read()) { if (xmlReader.NodeType == XmlNodeType.EndElement && xmlReader.Name == tag && xmlReader.Depth == depth) { break; } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "stagk") { stagkList.Add(ReadSimpleXmlTextElement(xmlReader.Depth, xmlReader.Name)); } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "stagr") { stagrList.Add(ReadSimpleXmlTextElement(xmlReader.Depth, xmlReader.Name)); } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "pos") { var pos = ReadPos(xmlReader.Depth, xmlReader.Name); if (pos != null) { partOfSpeechList.Add(pos.Value); } } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "field") { var field = ReadField(xmlReader.Depth, xmlReader.Name); if (field != null) { fieldList.Add(field.Value); } } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "xref") { var xref = EdictCrossReference.Parse(ReadSimpleXmlTextElement(xmlReader.Depth, xmlReader.Name)); xrefList.Add(xref); } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "ant") { var ant = EdictCrossReference.Parse(ReadSimpleXmlTextElement(xmlReader.Depth, xmlReader.Name)); antList.Add(ant); } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "misc") { var misc = ReadMisc(xmlReader.Depth, xmlReader.Name); if (misc != null) { miscList.Add(misc.Value); } } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "s_inf") { infoList.Add(ReadSInf(xmlReader.Depth, xmlReader.Name)); } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "dial") { var dialect = ReadDial(xmlReader.Depth, xmlReader.Name); if (dialect != null) { dialectList.Add(dialect.Value); } } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "lsource") { lsourceList.Add(ReadLsource(xmlReader.Depth, xmlReader.Name)); } if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "gloss") { textEntryList.Add(ReadGloss(xmlReader.Depth, xmlReader.Name)); } } if (partOfSpeechList.Count == 0) { partOfSpeechList.AddRange(priorSenses.LastOrDefault()?.PartOfSpeechInfo ?? Enumerable.Empty <EdictPartOfSpeech>()); } return(new JMDictSense( partOfSpeechList.FirstOrNone(), partOfSpeechList, dialectList, textEntryList, infoList, fieldList, miscList, stagkList, stagrList, lsourceList, xrefList, antList)); }