public override void AddToIndex(Dictionary <Tuple <string, string>, Term> index) { var task = db.Table <CedictEntry>().ToListAsync(); var entries = task.Result; var indices = new List <Term>(); foreach (var entry in entries) { var key = Tuple.Create(entry.Traditional, entry.Pinyin); Term term; if (index.TryGetValue(key, out term)) { term.CedictEntryId = entry.Id; } else { term = new Term() { Traditional = entry.Traditional, Simplified = entry.Simplified, Pinyin = Pinyin.ConvertToAccents(entry.Pinyin), PinyinNumbered = entry.Pinyin, PinyinNoNumbers = Pinyin.RemoveNumbersAndUnderscore(entry.Pinyin), Length = entry.Traditional.Length, CedictEntryId = entry.Id }; index[key] = term; } } }
public override void AddToIndex(Dictionary <Tuple <string, string>, Term> index) { var entries = db.QueryAsync <MoedictHeteronymLookupForm>( "SELECT h.Id AS Id, Headword AS Traditional, Pinyin, PinyinNumbered " + "FROM MoedictHeteronym h LEFT JOIN MoedictEntry e ON h.EntryId = e.Id").Result; foreach (var entry in entries) { var key = Tuple.Create(entry.Traditional, entry.PinyinNumbered); Term term; if (index.TryGetValue(key, out term)) { term.MoedictHeteronymId = entry.Id; } else { term = new Term() { Traditional = entry.Traditional, Simplified = TradToSimp.Convert(entry.Traditional).FirstOrDefault(), Pinyin = entry.Pinyin, PinyinNumbered = entry.PinyinNumbered, PinyinNoNumbers = Pinyin.RemoveNumbersAndUnderscore(entry.PinyinNumbered), Length = entry.Traditional.Length, MoedictHeteronymId = entry.Id, }; index[key] = term; } } }
public override async Task <IEnumerable <SearchResult> > Search(CancellationToken ct, string query) { var results = new List <SearchResult>(); //try { //var entries = await db.Table<CedictEntry>().Where(p => p.Traditional.StartsWith(query)).ToListAsync(); var entries = await db.QueryAsync <CedictEntry>(ct, "SELECT * FROM CedictEntry WHERE Traditional LIKE ? ESCAPE '\\'", query + "%"); foreach (var q in Pinyin.ToQueryForms(query)) { var newEntries = await db.QueryAsync <CedictEntry>(ct, "SELECT * FROM (SELECT * FROM CedictEntry WHERE PinyinNoNumbers LIKE ? ESCAPE '\\') " + "WHERE Pinyin LIKE ? ESCAPE '\\'", Pinyin.RemoveNumbersAndUnderscore(q) + "%", q + "%"); entries.AddRange(newEntries); } foreach (var s in entries) { var definitions = await db.QueryAsync <CedictDefinition>(ct, "SELECT * FROM CedictDefinition WHERE EntryId = ?", s.Id); //var definitions = await db.Table<CedictDefinition>().Where(d => d.EntryId == s.Id).ToListAsync(); results.Add(new SearchResult() { Traditional = s.Traditional, Simplified = s.Simplified, Pinyin = Pinyin.ConvertToAccents(s.Pinyin), PinyinNumbered = s.Pinyin, Definitions = new List <List <string> >() { new List <string>(from d in definitions select d.Definition) } }); } } return(results); }
public async Task <IEnumerable <Term> > Search(CancellationToken ct, string query) { var results = new List <TermWithFreq>(); results.AddRange(await db.QueryAsync <TermWithFreq>(ct, "SELECT * FROM Term LEFT JOIN Frequency ON Simplified = Hanzi OR Traditional = Hanzi WHERE Traditional LIKE ? OR Simplified LIKE ? ESCAPE '\\'", query + "%", query + "%")); var queryForms = Pinyin.ToQueryForms(query); int numberOfForms = queryForms.Count(); bool allowMultiCharacter = numberOfForms > 1 || (numberOfForms > 0 && queryForms.First().IndexOf(' ') != -1); string limitLength = allowMultiCharacter ? "" : "Length = 1 AND "; foreach (var q in queryForms) { if (!string.IsNullOrWhiteSpace(q)) { results.AddRange(await db.QueryAsync <TermWithFreq>(ct, "SELECT * FROM (SELECT * FROM (SELECT * FROM Term WHERE " + limitLength + "PinyinNoNumbers LIKE ? ESCAPE '\\') " + " WHERE PinyinNumbered LIKE ?) LEFT JOIN Frequency ON Simplified = Hanzi OR Traditional = Hanzi", Pinyin.RemoveNumbersAndUnderscore(q) + "%", q + "%")); } } return(results.OrderBy(r => r.Length) .ThenBy(r => r.PinyinNumbered != null ? r.PinyinNumbered.Length : int.MaxValue) .ThenByDescending(r => r.Score, freqComparer) .ThenBy(r => r.PinyinNumbered)); }
public override void Build() { char[] space = new char[] { ' ' }; db.DropTableAsync <MoedictEntry>().Wait(); db.DropTableAsync <MoedictHeteronym>().Wait(); db.DropTableAsync <MoedictDefinition>().Wait(); db.DropTableAsync <MoedictQuote>().Wait(); db.DropTableAsync <MoedictExample>().Wait(); db.DropTableAsync <MoedictLink>().Wait(); db.CreateTableAsync <MoedictEntry>().Wait(); db.CreateTableAsync <MoedictHeteronym>().Wait(); db.CreateTableAsync <MoedictDefinition>().Wait(); db.CreateTableAsync <MoedictQuote>().Wait(); db.CreateTableAsync <MoedictExample>().Wait(); db.CreateTableAsync <MoedictLink>().Wait(); string[] lines = System.IO.File.ReadAllLines("a.txt"); var entries = new List <MoedictEntry>(); var heteronyms = new List <MoedictHeteronym>(); var definitions = new List <MoedictDefinition>(); var quotes = new List <MoedictQuote>(); var examples = new List <MoedictExample>(); var links = new List <MoedictLink>(); foreach (string line in lines) { string[] tokens = line.Split(space, 3); MoedictEntry entry = JsonConvert.DeserializeObject <MoedictEntry>(tokens[2]); entry.Headword = entry.Title.Replace("`", "").Replace("~", "").Trim(); Match match = extractAlternateHeadword.Match(entry.Headword); if (match.Success) { entry.Headword = match.Groups[1].Value; entry.AlternateHeadword = match.Groups[2].Value; } entries.Add(entry); } db.InsertAllAsync(entries).Wait(); foreach (var e in entries) { foreach (var h in e.Heteronyms) { h.EntryId = e.Id; if (h.Pinyin != null) { Match match = extractAlternatePinyin.Match(h.Pinyin); if (match.Success) { h.Pinyin = match.Groups[1].Value.Trim(); h.AlternatePinyinNote = match.Groups[2].Value; h.AlternatePinyin = match.Groups[3].Value.Trim(); } h.PinyinNumbered = Pinyin.ConvertToNumbers(h.Pinyin); h.AlternatePinyinNumbered = Pinyin.ConvertToNumbers(h.AlternatePinyin); } heteronyms.Add(h); } } db.InsertAllAsync(heteronyms).Wait(); foreach (var h in heteronyms) { foreach (var d in h.Definitions) { d.HeteronymId = h.Id; definitions.Add(d); } } db.InsertAllAsync(definitions).Wait(); foreach (var d in definitions) { if (d.Quotes != null) { foreach (var quote in d.QuoteStrings) { //quote.DefinitionId = d.Id; //quotes.Add(quote); quotes.Add(new MoedictQuote() { DefinitionId = d.Id, Quote = quote }); } } if (d.Examples != null) { foreach (var example in d.ExampleStrings) { //example.DefinitionId = d.Id; //examples.Add(example); examples.Add(new MoedictExample() { DefinitionId = d.Id, Example = example }); } } if (d.Links != null) { foreach (var link in d.LinkStrings) { //link.DefinitionId = d.Id; //links.Add(link); links.Add(new MoedictLink() { DefinitionId = d.Id, Link = link }); } } } db.InsertAllAsync(quotes).Wait(); db.InsertAllAsync(examples).Wait(); db.InsertAllAsync(links).Wait(); }