public RichTextNode ToRichTextNode(FengDocument doc) { var output = new RichTextNode() { VerticalContainer = new RichTextNode.Types.VerticalContainerNode() { Children = { SectionHeader(zc.tH(doc.HanziOriginal), RenderSourcePronunciations( ToFeng(doc.YngpingUnderlyingOriginal), ToFeng(doc.YngpingCanonicalOriginal))), } } }; if (doc.ExplanationStructured != null) { output.VerticalContainer.Children.Add(explanationRenderer.ToRichTextNode(doc.ExplanationStructured, doc.HanziCanonical)); } else { output.VerticalContainer.Children.Add(SimpleText(zc.tH(doc.Explanation))); } output.VerticalContainer.Children.Add(Source(zc.tM($"出处:冯爱珍. 1998. 福州方言词典. 南京: 江苏教育出版社. 第 {doc.Source.PageNumber} 页. 用字可能经过编辑修订."))); return(output); }
public IEnumerable <FengDocument> Run() { var jsonOutput = new List <string>(); var documents = new List <FengDocument>(); var docs = LoadFengRows(mergedPath).Select(f => { var tmp = new FengDocument { Id = $"p{f.PageNumber}_{f.LineNumber}", HanziCanonical = f.KanjiClean, YngpingCanonical = f.Pron, Explanation = f.Explanation, ExplanationHans = Simplify(f.Explanation), Source = new FengDocument.Types.SourceInfo { PageNumber = f.PageNumber, LineNumber = f.LineNumber }, }; tmp.HanziMatchable.Add(f.KanjiClean); tmp.HanziMatchable.Add(Simplify(f.KanjiClean)); tmp.YngpingPermutations.Add(f.Pron); tmp.YngpingPermutations.AddRange(YngpingVariantsUtil.GenerateYngpingVariants(f.Pron)); return(tmp); }); documents.AddRange(docs); jsonOutput.AddRange(docs.Select(proto => proto.ToString())); File.WriteAllLines(Path.Combine(outputFolder, "feng_index_debug.txt"), jsonOutput); return(documents); }
private static int ScoreVocabQueryResult(string query, FengDocument matchedDocument) { int score = 0; if (matchedDocument.HanziMatchable.Where(m => m.Contains(query)).Count() > 0) { var distance = matchedDocument.HanziCanonical.Length - query.Length + 1; score += 1000 / distance; } if (matchedDocument.Explanation.Contains(query) || matchedDocument.ExplanationHans.Contains(query)) { score += 10 * matchedDocument.Explanation.CountOccurences(query); } return(score); }
public IEnumerable <FengDocument> Run() { var jsonOutput = new List <string>(); var documents = new List <FengDocument>(); var fengZeuMapping = LoadFengZeuMapping(); var docs = zingzeudata.ZingzeuData.Parser.ParseFeng.LoadFengRows(fengPath).Select(f => { var cleanExplanation = zingzeudata.ZingzeuData.Shared.StringHelpers.ReplaceAllBraces( f.ExplanationRaw); var structured = SafeParseExplanation(cleanExplanation); var flattened = FlattenExplanation(structured); var tmp = new FengDocument { Id = $"p{f.PageNumber}_{f.LineNumber}", HanziCanonical = f.HanziClean, HanziOriginal = f.HanziOriginal, YngpingCanonical = f.Pron, YngpingCanonicalOriginal = f.PronOriginal, YngpingUnderlying = f.PronUnderlying, YngpingUnderlyingOriginal = f.PronUnderlyingOriginal, Explanation = cleanExplanation, ExplanationTrad = flattened, ExplanationHans = Simplify(flattened), ExplanationStructured = structured, Source = new FengDocument.Types.SourceInfo { PageNumber = f.PageNumber, LineNumber = f.LineNumber }, }; if (fengZeuMapping.ContainsKey((f.PageNumber, f.LineNumber))) { tmp.ZingzeuId = fengZeuMapping[(f.PageNumber, f.LineNumber)]; }