Esempio n. 1
0
        void test_run_v1(string word)
        {
            word = "forget";
            string url = string.Empty;

            //UrlService.GetAsync("https://dictionary.cambridge.org/dictionary/english/forget", (stream) =>
            //{
            //    oWordDefine wo = new oWordDefine(text);
            //    object rs = null;
            //    string s = string.Empty;
            //    using (var reader = new StreamReader(stream, Encoding.UTF8))
            //        s = reader.ReadToEnd();
            //    if (s.Length > 0)
            //        s = HttpUtility.HtmlDecode(s);
            //    if (s.Length > 0)
            //    {
            //        if (s.Contains(@"<span class=""ipa"">"))
            //            wo.PronunceUK = s.Split(new string[] { @"<span class=""ipa"">" }, StringSplitOptions.None)[1].Split('<')[0].Trim();

            //    }
            //    else return new UrlAnanyticResult() { Message = "Can not read" };
            //    return new UrlAnanyticResult() { Ok = true, Html = s, Result = wo };
            //}, (result) =>
            //{
            //    if (result.Result != null)
            //    {

            //    }
            //});

            url = string.Format("https://www.oxfordlearnersdictionaries.com/definition/english/{0}?q={0}", word);
            //url = "https://en.oxforddictionaries.com/definition/forget";
            UrlService.GetAsync(url, (stream) =>
            {
                oWordDefine wo = new oWordDefine(word);
                object rs      = null;
                string s       = string.Empty;
                using (var reader = new StreamReader(stream, Encoding.UTF8))
                    s = reader.ReadToEnd();
                if (s.Length > 0)
                {
                    s = HttpUtility.HtmlDecode(s);
                }
                if (s.Length > 0)
                {
                    #region

                    const char heading_char   = '#'; // ■ ≡ ¶ ■
                    const string heading_text = "\r\n# ";

                    string htm = s, pro = string.Empty, type = string.Empty, mean_en = word.ToUpper();

                    HtmlNode nodes = f_word_speak_getPronunciationFromOxford_Nodes(htm);
                    pro            = nodes.QuerySelectorAll("span[class=\"phon\"]").Select(x => x.InnerText).Where(x => !string.IsNullOrEmpty(x)).Take(1).SingleOrDefault();
                    type           = nodes.QuerySelectorAll("span[class=\"pos\"]").Select(x => x.InnerText).Where(x => !string.IsNullOrEmpty(x)).Take(1).SingleOrDefault();
                    string[] pro_s = nodes.QuerySelectorAll("span[class=\"vp-g\"]").Select(x => x.InnerText).Where(x => !string.IsNullOrEmpty(x))
                                     .Select(x => x.Replace(" BrE BrE", " = UK: ").Replace("; NAmE NAmE", "US: ").Replace("//", "/")).ToArray();
                    string[] word_links = pro_s.Select(x => x.Split('=')[0].Trim()).ToArray();
                    if (pro == null)
                    {
                        pro = string.Empty;
                    }

                    if (type != null && type.Length > 0)
                    {
                        mean_en += " (" + type + ")";
                    }

                    if (!string.IsNullOrEmpty(pro))
                    {
                        if (pro.StartsWith("BrE"))
                        {
                            pro = pro.Substring(3).Trim();
                        }
                        pro = pro.Replace("//", "/");
                    }

                    List <string> ls_Verb_Group = new List <string>();
                    var wgs = nodes.QuerySelectorAll("span[class=\"vp\"]").Select(x => x.InnerText_NewLine).Where(x => !string.IsNullOrEmpty(x)).ToArray();
                    foreach (string wi in wgs)
                    {
                        string[] a = wi.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
                        ls_Verb_Group.Add(a[a.Length - 1]);
                    }
                    if (ls_Verb_Group.Count > 0)
                    {
                        mean_en += heading_text + "REF: " + string.Join("; ", ls_Verb_Group.ToArray());
                    }


                    if (word_links.Length > 0)
                    {
                        mean_en += "\r\n" + string.Join(Environment.NewLine, word_links).Replace("-ing", "V-ing").Trim();
                    }

                    string[] mp3 = nodes.QuerySelectorAll("div[data-src-mp3]")
                                   .Select(x => x.GetAttributeValue("data-src-mp3", string.Empty))
                                   .Where(x => !string.IsNullOrEmpty(x))
                                   .Distinct()
                                   .ToArray();
                    if (mp3.Length > 0)
                    {
                        mean_en += "\r\n{\r\n" + string.Join(Environment.NewLine, mp3) + "\r\n}\r\n";
                    }

                    string[] uns     = nodes.QuerySelectorAll("span[class=\"un\"]").Select(x => x.InnerText_NewLine).Where(x => !string.IsNullOrEmpty(x)).ToArray();
                    string[] idoms   = nodes.QuerySelectorAll("span[class=\"idm-g\"]").Select(x => x.InnerText_NewLine).Where(x => !string.IsNullOrEmpty(x)).ToArray();
                    string[] defines = nodes.QuerySelectorAll("li[class=\"sn-g\"]").Select(x => x.InnerText_NewLine).Where(x => !string.IsNullOrEmpty(x)).ToArray();

                    if (defines.Length > 0)
                    {
                        mean_en += heading_text + "DEFINE:\r\n" +
                                   string.Join(Environment.NewLine,
                                               string.Join(Environment.NewLine, defines)
                                               .Split(new char[] { '\r', '\n' })
                                               .Select(x => x.Replace(".", ".\r\n").Trim())
                                               .Where(x => x.Length > 0)
                                               .ToArray())
                                   .Replace("\r\n[", ". ")
                                   .Replace("]", ":")

                                   .Replace("1\r\n", "\r\n- ")
                                   .Replace("2\r\n", "\r\n- ")
                                   .Replace("3\r\n", "\r\n- ")
                                   .Replace("4\r\n", "\r\n- ")
                                   .Replace("5\r\n", "\r\n- ")
                                   .Replace("6\r\n", "\r\n- ")
                                   .Replace("7\r\n", "\r\n- ")
                                   .Replace("8\r\n", "\r\n- ")
                                   .Replace("9\r\n", "\r\n- ")

                                   .Replace("1.", "\r\n+ ")
                                   .Replace("2.", "\r\n+ ")
                                   .Replace("3.", "\r\n+ ")
                                   .Replace("4.", "\r\n+ ")
                                   .Replace("5.", "\r\n+ ")
                                   .Replace("6.", "\r\n+ ")
                                   .Replace("7.", "\r\n+ ")
                                   .Replace("8.", "\r\n+ ")
                                   .Replace("9.", "\r\n+ ");
                    }

                    if (uns.Length > 0)
                    {
                        mean_en += heading_text + "NOTE:\r\n" + string.Join(Environment.NewLine, string.Join(Environment.NewLine, uns).Split(new char[] { '\r', '\n' }).Select(x => x.Replace(".", ".\r\n").Trim()).Where(x => x.Length > 0).ToArray());
                    }

                    if (idoms.Length > 0)
                    {
                        mean_en += heading_text + "IDOM:\r\n" + string.Join(Environment.NewLine, string.Join(Environment.NewLine, idoms).Split(new char[] { '\r', '\n' }).Select(x => x.Replace(".", ".\r\n").Trim()).Where(x => x.Length > 0).ToArray());
                    }

                    mean_en = Regex.Replace(mean_en, "[ ]{2,}", " ").Replace("\r\n’", "’");

                    mean_en = string.Join(Environment.NewLine,
                                          mean_en.Split(new string[] { Environment.NewLine }, StringSplitOptions.None)
                                          .Select(x => x.Trim())
                                          .Select(x => x.Length > 0 ?
                                                  (
                                                      (x[0] == '+' || x[0] == '-') ?
                                                      (x[0].ToString() + " " + x[2].ToString().ToUpper() + x.Substring(3))
                                        : (x[0].ToString().ToUpper() + x.Substring(1))
                                                  ) : x)
                                          .ToArray());

                    string[] sens = nodes.QuerySelectorAll("span[class=\"x\"]")
                                    .Where(x => !string.IsNullOrEmpty(x.InnerText))
                                    .Select(x => x.InnerText.Trim())
                                    .Where(x => x.Length > 0)
                                    .Select(x => "- " + x)
                                    .ToArray();
                    if (sens.Length > 0)
                    {
                        string sen_text = string.Join(Environment.NewLine, sens);
                        mean_en        += heading_text + "EXAMPLE:\r\n" + sen_text;
                    }

                    mean_en = mean_en.Replace("See full entry", string.Empty).Replace(Environment.NewLine, "|")
                              .Replace("’ ", @""" ").Replace(".’", @".""").Replace("’|", @"""|")
                              .Replace(" ‘", @" """)
                              .Replace("’", @"'");

                    mean_en = Regex.Replace(mean_en, @"[^\x20-\x7E]", string.Empty);

                    mean_en = mean_en.Replace("|", Environment.NewLine);
                    //mean_en = Regex.Replace(mean_en, @"[^0-9a-zA-Z;,|{}():/'#+-._\r\n]+!\?", " ");
                    mean_en = Regex.Replace(mean_en, "[ ]{2,}", " ");

                    #endregion
                }
                else
                {
                    return new UrlAnanyticResult()
                    {
                        Message = "Can not read"
                    }
                };
                return(new UrlAnanyticResult()
                {
                    Ok = true, Html = s, Result = wo
                });
            }, (result) =>
            {
                if (result.Result != null)
                {
                }
            });


            ;
        }
Esempio n. 2
0
        public void f_runLoop(object state, bool timedOut)
        {
            if (!_inited)
            {
                _inited = true;
                f_Init();
                return;
            }

            JobInfo ti = (JobInfo)state;

            if (!timedOut)
            {
                // Tracer.WriteLine("J{0} executes on thread {1}: SIGNAL -> STOP ...", Id, Thread.CurrentThread.GetHashCode().ToString());
                ti.f_stopJob();
                return;
            }

            // Tracer.WriteLine("J{0} executes on thread {1}:DO SOMETHING ...", Id, Thread.CurrentThread.GetHashCode().ToString());
            // Do something ...

            if (msg.Count > 0)
            {
                Message m = msg.Dequeue(null);
                if (m != null)
                {
                    switch (m.getAction())
                    {
                    case MESSAGE_ACTION.ITEM_SEARCH:
                        #region
                        if (true)
                        {
                            oLink[] a = new oLink[] { };
                            if (m.Input != null)
                            {
                                string key = m.Input as string;
                                a = list.Where(x => x.Link.Contains(key) || x.Title.Contains(key) || x.Tags.Contains(key), false, int.MaxValue);
                                m.Output.Counter = a.Length;
                            }
                            else
                            {
                                a = list.Take(10).ToArray();
                                m.Output.Counter = list.Count;
                            }

                            m.Type    = MESSAGE_TYPE.RESPONSE;
                            m.JobName = this._groupName;

                            m.Output.Ok         = true;
                            m.Output.PageSize   = 10;
                            m.Output.PageNumber = 1;
                            m.Output.Total      = list.Count;
                            m.Output.SetData(a);

                            this.StoreJob.f_responseMessageFromJob(m);
                        }
                        #endregion
                        break;

                    case MESSAGE_ACTION.URL_REQUEST_CACHE:
                        #region
                        if (m.Input != null)
                        {
                            string url = m.Input as string;
                            if (urlData.ContainsKey(url))
                            {
                                string htm = urlData[url];

                                m.Type    = MESSAGE_TYPE.RESPONSE;
                                m.JobName = this._groupName;

                                m.Output.Ok = true;
                                m.Output.SetData(htm);

                                this.StoreJob.f_responseMessageFromJob(m);
                            }
                            else
                            {
                                UrlService.GetAsync(url, m, UrlService.Func_GetHTML_UTF8_FORMAT_BROWSER, (result) =>
                                {
                                    if (result.Ok)
                                    {
                                        string htm = result.Html;
                                        if (!urlData.ContainsKey(url))
                                        {
                                            urlData.Add(url, htm);
                                        }

                                        m.Type    = MESSAGE_TYPE.RESPONSE;
                                        m.JobName = this._groupName;

                                        m.Output.Ok = true;
                                        m.Output.SetData(htm);

                                        this.StoreJob.f_responseMessageFromJob(m);
                                    }
                                });
                            }
                        }
                        #endregion
                        break;
                    }
                }
            }
        }