Ejemplo n.º 1
0
        public override void AddToIndex(Dictionary <Tuple <string, string>, Term> index)
        {
            var task    = db.Table <CedictEntry>().ToListAsync();
            var entries = task.Result;
            var indices = new List <Term>();

            foreach (var entry in entries)
            {
                var  key = Tuple.Create(entry.Traditional, entry.Pinyin);
                Term term;
                if (index.TryGetValue(key, out term))
                {
                    term.CedictEntryId = entry.Id;
                }
                else
                {
                    term = new Term()
                    {
                        Traditional     = entry.Traditional,
                        Simplified      = entry.Simplified,
                        Pinyin          = Pinyin.ConvertToAccents(entry.Pinyin),
                        PinyinNumbered  = entry.Pinyin,
                        PinyinNoNumbers = Pinyin.RemoveNumbersAndUnderscore(entry.Pinyin),
                        Length          = entry.Traditional.Length,
                        CedictEntryId   = entry.Id
                    };
                    index[key] = term;
                }
            }
        }
Ejemplo n.º 2
0
        public override void AddToIndex(Dictionary <Tuple <string, string>, Term> index)
        {
            var entries = db.QueryAsync <MoedictHeteronymLookupForm>(
                "SELECT h.Id AS Id, Headword AS Traditional, Pinyin, PinyinNumbered "
                + "FROM MoedictHeteronym h LEFT JOIN MoedictEntry e ON h.EntryId = e.Id").Result;

            foreach (var entry in entries)
            {
                var  key = Tuple.Create(entry.Traditional, entry.PinyinNumbered);
                Term term;
                if (index.TryGetValue(key, out term))
                {
                    term.MoedictHeteronymId = entry.Id;
                }
                else
                {
                    term = new Term()
                    {
                        Traditional        = entry.Traditional,
                        Simplified         = TradToSimp.Convert(entry.Traditional).FirstOrDefault(),
                        Pinyin             = entry.Pinyin,
                        PinyinNumbered     = entry.PinyinNumbered,
                        PinyinNoNumbers    = Pinyin.RemoveNumbersAndUnderscore(entry.PinyinNumbered),
                        Length             = entry.Traditional.Length,
                        MoedictHeteronymId = entry.Id,
                    };
                    index[key] = term;
                }
            }
        }
Ejemplo n.º 3
0
        public override async Task <IEnumerable <SearchResult> > Search(CancellationToken ct, string query)
        {
            var results = new List <SearchResult>();

            //try
            {
                //var entries = await db.Table<CedictEntry>().Where(p => p.Traditional.StartsWith(query)).ToListAsync();
                var entries = await db.QueryAsync <CedictEntry>(ct, "SELECT * FROM CedictEntry WHERE Traditional LIKE ? ESCAPE '\\'", query + "%");

                foreach (var q in Pinyin.ToQueryForms(query))
                {
                    var newEntries = await db.QueryAsync <CedictEntry>(ct,
                                                                       "SELECT * FROM (SELECT * FROM CedictEntry WHERE PinyinNoNumbers LIKE ? ESCAPE '\\') "
                                                                       + "WHERE Pinyin LIKE ? ESCAPE '\\'", Pinyin.RemoveNumbersAndUnderscore(q) + "%", q + "%");

                    entries.AddRange(newEntries);
                }


                foreach (var s in entries)
                {
                    var definitions = await db.QueryAsync <CedictDefinition>(ct, "SELECT * FROM CedictDefinition WHERE EntryId = ?", s.Id);

                    //var definitions = await db.Table<CedictDefinition>().Where(d => d.EntryId == s.Id).ToListAsync();
                    results.Add(new SearchResult()
                    {
                        Traditional    = s.Traditional,
                        Simplified     = s.Simplified,
                        Pinyin         = Pinyin.ConvertToAccents(s.Pinyin),
                        PinyinNumbered = s.Pinyin,
                        Definitions    = new List <List <string> >()
                        {
                            new List <string>(from d in definitions select d.Definition)
                        }
                    });
                }
            }

            return(results);
        }
Ejemplo n.º 4
0
        public async Task <IEnumerable <Term> > Search(CancellationToken ct, string query)
        {
            var results = new List <TermWithFreq>();

            results.AddRange(await db.QueryAsync <TermWithFreq>(ct, "SELECT * FROM Term LEFT JOIN Frequency ON Simplified = Hanzi OR Traditional = Hanzi WHERE Traditional LIKE ? OR Simplified LIKE ? ESCAPE '\\'", query + "%", query + "%"));
            var    queryForms          = Pinyin.ToQueryForms(query);
            int    numberOfForms       = queryForms.Count();
            bool   allowMultiCharacter = numberOfForms > 1 || (numberOfForms > 0 && queryForms.First().IndexOf(' ') != -1);
            string limitLength         = allowMultiCharacter ? "" : "Length = 1 AND ";

            foreach (var q in queryForms)
            {
                if (!string.IsNullOrWhiteSpace(q))
                {
                    results.AddRange(await db.QueryAsync <TermWithFreq>(ct,
                                                                        "SELECT * FROM (SELECT * FROM (SELECT * FROM Term WHERE " + limitLength + "PinyinNoNumbers LIKE ? ESCAPE '\\') "
                                                                        + " WHERE PinyinNumbered LIKE ?) LEFT JOIN Frequency ON Simplified = Hanzi OR Traditional = Hanzi", Pinyin.RemoveNumbersAndUnderscore(q) + "%", q + "%"));
                }
            }
            return(results.OrderBy(r => r.Length)
                   .ThenBy(r => r.PinyinNumbered != null ? r.PinyinNumbered.Length : int.MaxValue)
                   .ThenByDescending(r => r.Score, freqComparer)
                   .ThenBy(r => r.PinyinNumbered));
        }
Ejemplo n.º 5
0
        public override void Build()
        {
            char[] space = new char[] { ' ' };

            db.DropTableAsync <MoedictEntry>().Wait();
            db.DropTableAsync <MoedictHeteronym>().Wait();
            db.DropTableAsync <MoedictDefinition>().Wait();
            db.DropTableAsync <MoedictQuote>().Wait();
            db.DropTableAsync <MoedictExample>().Wait();
            db.DropTableAsync <MoedictLink>().Wait();
            db.CreateTableAsync <MoedictEntry>().Wait();
            db.CreateTableAsync <MoedictHeteronym>().Wait();
            db.CreateTableAsync <MoedictDefinition>().Wait();
            db.CreateTableAsync <MoedictQuote>().Wait();
            db.CreateTableAsync <MoedictExample>().Wait();
            db.CreateTableAsync <MoedictLink>().Wait();

            string[] lines = System.IO.File.ReadAllLines("a.txt");

            var entries     = new List <MoedictEntry>();
            var heteronyms  = new List <MoedictHeteronym>();
            var definitions = new List <MoedictDefinition>();
            var quotes      = new List <MoedictQuote>();
            var examples    = new List <MoedictExample>();
            var links       = new List <MoedictLink>();


            foreach (string line in lines)
            {
                string[]     tokens = line.Split(space, 3);
                MoedictEntry entry  = JsonConvert.DeserializeObject <MoedictEntry>(tokens[2]);
                entry.Headword = entry.Title.Replace("`", "").Replace("~", "").Trim();
                Match match = extractAlternateHeadword.Match(entry.Headword);
                if (match.Success)
                {
                    entry.Headword          = match.Groups[1].Value;
                    entry.AlternateHeadword = match.Groups[2].Value;
                }
                entries.Add(entry);
            }
            db.InsertAllAsync(entries).Wait();
            foreach (var e in entries)
            {
                foreach (var h in e.Heteronyms)
                {
                    h.EntryId = e.Id;
                    if (h.Pinyin != null)
                    {
                        Match match = extractAlternatePinyin.Match(h.Pinyin);
                        if (match.Success)
                        {
                            h.Pinyin = match.Groups[1].Value.Trim();
                            h.AlternatePinyinNote = match.Groups[2].Value;
                            h.AlternatePinyin     = match.Groups[3].Value.Trim();
                        }
                        h.PinyinNumbered          = Pinyin.ConvertToNumbers(h.Pinyin);
                        h.AlternatePinyinNumbered = Pinyin.ConvertToNumbers(h.AlternatePinyin);
                    }
                    heteronyms.Add(h);
                }
            }
            db.InsertAllAsync(heteronyms).Wait();
            foreach (var h in heteronyms)
            {
                foreach (var d in h.Definitions)
                {
                    d.HeteronymId = h.Id;
                    definitions.Add(d);
                }
            }
            db.InsertAllAsync(definitions).Wait();
            foreach (var d in definitions)
            {
                if (d.Quotes != null)
                {
                    foreach (var quote in d.QuoteStrings)
                    {
                        //quote.DefinitionId = d.Id;
                        //quotes.Add(quote);
                        quotes.Add(new MoedictQuote()
                        {
                            DefinitionId = d.Id, Quote = quote
                        });
                    }
                }
                if (d.Examples != null)
                {
                    foreach (var example in d.ExampleStrings)
                    {
                        //example.DefinitionId = d.Id;
                        //examples.Add(example);
                        examples.Add(new MoedictExample()
                        {
                            DefinitionId = d.Id, Example = example
                        });
                    }
                }
                if (d.Links != null)
                {
                    foreach (var link in d.LinkStrings)
                    {
                        //link.DefinitionId = d.Id;
                        //links.Add(link);
                        links.Add(new MoedictLink()
                        {
                            DefinitionId = d.Id, Link = link
                        });
                    }
                }
            }
            db.InsertAllAsync(quotes).Wait();
            db.InsertAllAsync(examples).Wait();
            db.InsertAllAsync(links).Wait();
        }