예제 #1
0
        protected override void DoTranslate(string phrase, LanguagePair languagesPair, string subject, Result result, NetworkSetting networkSetting)
        {
            Encoding         encoding = Encoding.GetEncoding("iso-8859-1");
            string           query    = "http://wordnetweb.princeton.edu/perl/webwn?s=" + HttpUtility.UrlEncode(phrase, encoding);
            WebRequestHelper helper   =
                new WebRequestHelper(result, new Uri(query),
                                     networkSetting,
                                     WebRequestContentType.UrlEncodedGet, encoding);

            string responseFromServer = helper.GetResponse();

            if (responseFromServer.Contains("<h3>Your search did not return any results.</h3>"))
            {
                result.ResultNotFound = true;
                throw new TranslationException("Nothing found");
            }


            if (responseFromServer.Contains("<h3>Sorry(?) your search can only contain letters(?)"))
            {
                throw new TranslationException("Query contains extraneous symbols");
            }

            result.ArticleUrl        = query;
            result.ArticleUrlCaption = phrase;

            string[] nodes = StringParser.ParseItemsList("<h3>", "</ul>", responseFromServer);

            bool   first = true;
            string nodename;

            Result child = result;

            string[] subnodes;
            string   translation;

            foreach (string node in nodes)
            {
                nodename = StringParser.ExtractLeft("</h3>", node);
                if (first && nodes.Length == 1)
                {
                    child.Abbreviation = nodename;
                }
                else
                {
                    child = new Result(result.ServiceItem, nodename, result.LanguagePair, result.Subject);
                    result.Childs.Add(child);
                }

                first = false;

                subnodes = StringParser.ParseItemsList("<li>", "</li>", node);
                foreach (string subnode in subnodes)
                {
                    translation = StringParser.RemoveAll("<", ">", subnode);
                    translation = StringParser.ExtractRight(")", translation);
                    child.Translations.Add(translation);
                }
            }
        }
예제 #2
0
        public static void SetAdditionalLinks(Result result, string data, string host)
        {
            string[]      urls = StringParser.ParseItemsList("<a href=\"", "</a>", data);
            string        link;
            string        text;
            List <string> texts = new List <string>();

            foreach (string url in urls)
            {
                link = StringParser.ExtractLeft("\"", url);
                link = link.Replace(" ", "+");
                if (link.StartsWith("/"))
                {
                    link = "http://" + (new Uri(host)).Host + link;
                }
                else
                {
                    link = host + link;
                }

                text = StringParser.ExtractRight(">", url);
                if (text.EndsWith(" 1"))
                {
                    text = StringParser.ExtractLeft(" 1", text);
                }
                else if (text.EndsWith(" 2"))
                {
                    text = StringParser.ExtractLeft(" 2", text);
                }

                if (!texts.Contains(text))
                {
                    result.RelatedLinks.Add(text, new Uri(link));
                    texts.Add(text);
                }
            }
        }
예제 #3
0
        protected override void DoTranslate(string phrase, LanguagePair languagesPair, string subject, Result result, NetworkSetting networkSetting)
        {
            string query = "http://www.google.com/dictionary?aq=f&langpair={1}&q={0}&hl=en";

            query = string.Format(query, HttpUtility.UrlEncode(phrase, System.Text.Encoding.UTF8), GoogleUtils.ConvertLanguagesPair(languagesPair));
            WebRequestHelper helper =
                new WebRequestHelper(result, new Uri(query),
                                     networkSetting,
                                     WebRequestContentType.UrlEncodedGet);

            result.ArticleUrl        = query;
            result.ArticleUrlCaption = phrase;

            string responseFromServer = helper.GetResponse();

            if (responseFromServer.Contains("No dictionary translations were found for: <strong>"))
            {
                result.ResultNotFound = true;
                throw new TranslationException("Nothing found");
            }
            if (responseFromServer.Contains("No dictionary definitions were found for: <strong>"))
            {
                result.ResultNotFound = true;
                throw new TranslationException("Nothing found");
            }

            result.HasAudio    = responseFromServer.Contains("<object data=\"/dictionary/flash");
            responseFromServer = StringParser.Parse("<div class=\"dct-srch-otr\">", "<div class=\"dct-rt-sct\">", responseFromServer);

            //pronuncation
            if (responseFromServer.Contains("<span class=\"dct-tp\">/"))
            {
                string pronuncation = StringParser.Parse("<span class=\"dct-tp\">/", "/</span>", responseFromServer);
                pronuncation        = pronuncation.Trim();
                result.Abbreviation = pronuncation;
            }


            //translations
            //string translations = StringParser.Parse("<div class=\"dct-srch-rslt\">", "</div>", responseFromServer);



            string translations = "";

            //TODO: additional sences like in "water" - "water down" not supported

            if (responseFromServer.Contains("<div class=\"sen\">"))
            {
                translations = StringParser.Parse("<ul class=\"dct-e2\" id=\"pr-root\" >", "</ul>\n<div class=\"sen\">", responseFromServer);
            }
            else if (responseFromServer.Contains("<h3>Related phrases</h3>"))
            {
                translations = StringParser.Parse("<ul class=\"dct-e2\" id=\"pr-root\" >", "</ul>\n<h3>Related phrases</h3>", responseFromServer);
            }
            else if (responseFromServer.Contains("<h3>Web definitions</h3>"))
            {
                translations = StringParser.Parse("<ul class=\"dct-e2\" id=\"pr-root\" >", "</ul>\n<h3>Web definitions</h3>", responseFromServer);
            }
            else
            {
                translations = StringParser.Parse("<ul class=\"dct-e2\" id=\"pr-root\" >", "</ul>", responseFromServer);
            }


            StringParser  parser          = null;
            List <string> subtranslations = new List <string>();

            if (translations.Contains("<li class=\"dct-ec\""))
            {
                //"</li>\n</ul>\n</li>"
                parser = new StringParser(translations);
                string[] subtranslation_list = parser.ReadItemsList("<li class=\"dct-ec\"", "</li>\n</ul>\n</li>", "3485730457203");
                subtranslations.AddRange(subtranslation_list);
            }
            else if (translations.Contains("<div style=\"font-weight:bold\">Synonyms:</div>"))
            {
                Result synonyms_tr = CreateNewResult("Synonyms", languagesPair, subject);
                result.Childs.Add(synonyms_tr);

                string synonyms = StringParser.Parse("<div style=\"font-weight:bold\">Synonyms:</div>", "</div>", translations);
                parser = new StringParser(synonyms);
                string[] syn_group_list = parser.ReadItemsList("<li>", "</li>", "3485730457203");
                foreach (string syngroup in syn_group_list)
                {
                    string syn_group_name = StringParser.Parse("title=\"Part-of-speech\">", "</span>", syngroup);
                    Result syn_tr         = CreateNewResult(syn_group_name, languagesPair, subject);
                    synonyms_tr.Childs.Add(syn_tr);
                    parser = new StringParser(syngroup);
                    string[] syn_list = parser.ReadItemsList("<a", "</a>", "3485730457203");
                    foreach (string syn in syn_list)
                    {
                        string synonym = StringParser.ExtractRight(">", syn);
                        syn_tr.Translations.Add(synonym);
                    }
                }

                subtranslations.Add(translations);
            }
            else
            {
                subtranslations.Add(translations);
            }

            Result subres_tr  = result;
            Result sub2res_tr = null;
            Result sub3res_tr = null;
            string abbr_str;

            foreach (string subtranslation in subtranslations)
            {
                if (subtranslation.Contains("<div  class=\"dct-ec\">"))
                {
                    abbr_str  = StringParser.Parse("title=\"Part-of-speech\">", "</span>", subtranslation);
                    subres_tr = CreateNewResult(abbr_str, languagesPair, subject);
                    result.Childs.Add(subres_tr);
                }

                parser = new StringParser(subtranslation.Replace("<li class=\"dct-em\"", "<end><begin>") + "<end>");
                string[] subsubtranslation_list = parser.ReadItemsList("<begin>", "<end>", "3485730457203");

                foreach (string subsubtanslation in subsubtranslation_list)
                {
                    sub2res_tr = CreateNewResult("", languagesPair, subject);
                    subres_tr.Childs.Add(sub2res_tr);

                    if (subsubtanslation.Contains(">See also</span>"))
                    {
                        sub2res_tr.Translations.Add("See also");
                    }

                    StringParser parser2 = new StringParser(subsubtanslation.Replace("<span class=\"dct-tt\">", "<end><begin>") + "<end>");
                    string[]     sub3translation_list = parser2.ReadItemsList("<begin>", "<end>", "3485730457203");

                    foreach (string sub3tanslation in sub3translation_list)
                    {
                        string text_translation = "";
                        string text_abbr        = "";
                        if (sub3tanslation.Contains("<span"))
                        {
                            text_translation = StringParser.ExtractLeft("<span", sub3tanslation);
                            if (text_translation.Contains("</span"))
                            {
                                text_translation = StringParser.ExtractLeft("</span", text_translation);
                            }
                            text_abbr = StringParser.Parse("<span", "</span", sub3tanslation);
                            text_abbr = StringParser.ExtractRight(">", text_abbr);
                        }
                        else
                        {
                            text_translation = StringParser.ExtractLeft("</span>", sub3tanslation);
                        }

                        text_translation = StringParser.RemoveAll("<", ">", text_translation);

                        if (sub2res_tr.Translations.Count == 0)
                        {
                            sub2res_tr.Translations.Add(text_translation);
                            sub2res_tr.Abbreviation = text_abbr;
                        }
                        else
                        {
                            sub3res_tr = CreateNewResult("", languagesPair, subject);
                            sub3res_tr.Translations.Add(text_translation);
                            //sub3res_tr.Phrase = text_abbr;
                            sub2res_tr.Childs.Add(sub3res_tr);
                        }
                    }
                }
            }


            //related words
            if (responseFromServer.Contains("<h3>Related phrases</h3>"))
            {
                string related = StringParser.Parse("<ul class=\"rlt-snt\">", "</ul>", responseFromServer);
                if (!string.IsNullOrEmpty(related))
                {
                    parser = new StringParser(related);
                    string[] related_list = parser.ReadItemsList("<li>", "</li>");

                    foreach (string related_s in related_list)
                    {
                        string related_str = related_s.Replace("\n", "").Trim();
                        string subphrase   = StringParser.Parse("<div>", "</div>", related_str);
                        subphrase = StringParser.RemoveAll("<", ">", subphrase);
                        subphrase = subphrase.Replace("&nbsp", " ").Replace("\n", "").Trim();


                        string subphrasetrans = StringParser.ExtractRight("</div>", related_str);

                        subphrasetrans = StringParser.RemoveAll("<", ">", subphrasetrans);
                        subphrasetrans = subphrasetrans.Replace("&nbsp", " ").Replace("\n", "").Trim();

                        Result subres = CreateNewResult(subphrase, languagesPair, subject);
                        subres.Translations.Add(subphrasetrans);
                        result.Childs.Add(subres);
                    }
                }
            }

            //Web definitions
            if (responseFromServer.Contains("<h3>Web definitions</h3>"))
            {
                string related = StringParser.ExtractRight("<ul class=\"gls\">", responseFromServer);
                if (!string.IsNullOrEmpty(related))
                {
                    {
                        Result subres_wd = CreateNewResult(phrase, languagesPair, subject);
                        result.Childs.Add(subres_wd);

                        parser = new StringParser(related);
                        string[] related_list = parser.ReadItemsList("<li>", "</li>");

                        foreach (string related_s in related_list)
                        {
                            string related_str = related_s;
                            related_str = related_str.Replace("<br/>", "").Trim();
                            related_str = StringParser.RemoveAll("<", ">", related_str);
                            related_str = related_str.Replace("&nbsp", " ").Replace("\n", "").Trim();
                            subres_wd.Translations.Add(related_str);
                        }
                    }
                }
            }
        }
예제 #4
0
        public static void DoTranslate(ServiceItem serviceItem, string phrase, LanguagePair languagesPair, string subject, Result result, NetworkSetting networkSetting)
        {
            bool   synonimsDictionary = languagesPair.From == Language.Polish && languagesPair.To == Language.Polish;
            string query = "http://megaslownik.pl/slownik/{0}/,{1}";

            query = string.Format(query,
                                  MegaslownikTools.ConvertLanguagePair(languagesPair),
                                  HttpUtility.UrlEncode(phrase));

            result.ArticleUrl        = query;
            result.ArticleUrlCaption = phrase;

            WebRequestHelper helper =
                new WebRequestHelper(result, new Uri(query),
                                     networkSetting,
                                     WebRequestContentType.UrlEncodedGet);

            string responseFromServer = helper.GetResponse();

            if (responseFromServer.Contains("<div class=\"slowo\">\r\n             Szukanego słowa nie ma w MEGAsłowniku.\r\n"))
            {
                result.ResultNotFound = true;
                throw new TranslationException("Nothing found");
            }

            result.HasAudio = responseFromServer.Contains("class=\"ikona_sluchaj2\">");

            string[] translations = StringParser.ParseItemsList("<div class=\"definicja\">", "</div>", responseFromServer);

            if (translations.Length == 0)
            {
                result.ResultNotFound = true;
                throw new TranslationException("Nothing found");
            }

            string subsubtranslation;

            string[] subtranslations;
            foreach (string translation in translations)
            {
                subtranslations = StringParser.ParseItemsList("<a href=\"/slownik", "</a>", translation);
                foreach (string subtranslation in subtranslations)
                {
                    subsubtranslation = StringParser.ExtractRight(">", subtranslation);
                    subsubtranslation = StringParser.RemoveAll("<", ">", subsubtranslation);
                    result.Translations.Add(subsubtranslation);
                }
            }

            //synonims
            translations = StringParser.ParseItemsList("<div class=\"synonim\">synonimy:", "</div>", responseFromServer);

            foreach (string translation in translations)
            {
                subtranslations = StringParser.ParseItemsList("<a href=\"/slownik", "</a>", translation);
                foreach (string subtranslation in subtranslations)
                {
                    subsubtranslation = StringParser.ExtractRight(">", subtranslation);
                    subsubtranslation = StringParser.RemoveAll("<", ">", subsubtranslation);
                    if (!result.Translations.Contains(subsubtranslation))
                    {
                        result.Translations.Add(subsubtranslation);
                    }
                }
            }

            //additional links
            if (!synonimsDictionary)
            {
                string[] links = StringParser.ParseItemsList("<li ><a href=\"/slownik/", "</li>", responseFromServer);
                string   linkUrl, linkText, subphrase, subtrans;
                Result   child;
                foreach (string link in links)
                {
                    linkUrl  = "http://megaslownik.pl/slownik/" + StringParser.ExtractLeft("\"", link);
                    linkText = StringParser.ExtractRight(">", link);
                    linkText = StringParser.RemoveAll("<", ">", linkText);
                    if (linkText.Contains("»") && linkText.Contains(phrase))
                    {
                        subphrase = StringParser.ExtractLeft("»", linkText);
                        subtrans  = StringParser.ExtractRight("»", linkText);
                        child     = serviceItem.CreateNewResult(subphrase, languagesPair, subject);
                        result.Childs.Add(child);
                        child.Translations.Add(subtrans);
                        child.ArticleUrl        = linkUrl;
                        child.ArticleUrlCaption = subphrase;
                    }
                }

                links = StringParser.ParseItemsList("<li><a href=\"/slownik/", "</li>", responseFromServer);
                foreach (string link in links)
                {
                    linkUrl  = "http://megaslownik.pl/slownik/" + StringParser.ExtractLeft("\"", link);
                    linkText = StringParser.ExtractRight(">", link);
                    linkText = StringParser.RemoveAll("<", ">", linkText);
                    if (linkText.Contains("»") && linkText.Contains(phrase))
                    {
                        subphrase = StringParser.ExtractLeft("»", linkText);
                        subtrans  = StringParser.ExtractRight("»", linkText);
                        child     = serviceItem.CreateNewResult(subphrase, languagesPair, subject);
                        result.Childs.Add(child);
                        child.Translations.Add(subtrans);
                        child.ArticleUrl        = linkUrl;
                        child.ArticleUrlCaption = subphrase;
                    }
                    //result.RelatedLinks.Add(linkText, linkUrl);
                }
            }
            else
            {             //synonyms
                string[] links = StringParser.ParseItemsList("<li ><a href=\"/slownik/", "</li>", responseFromServer);
                string   linkUrl, linkText;
                foreach (string link in links)
                {
                    linkUrl  = "http://megaslownik.pl/slownik/" + StringParser.ExtractLeft("\"", link);
                    linkText = StringParser.ExtractRight(">", link);
                    linkText = StringParser.RemoveAll("<", ">", linkText);
                    if (linkText.Contains(phrase))
                    {
                        result.RelatedLinks.Add(linkText, linkUrl);
                    }
                }

                links = StringParser.ParseItemsList("<li><a href=\"/slownik/", "</li>", responseFromServer);
                foreach (string link in links)
                {
                    linkUrl  = "http://megaslownik.pl/slownik/" + StringParser.ExtractLeft("\"", link);
                    linkText = StringParser.ExtractRight(">", link);
                    linkText = StringParser.RemoveAll("<", ">", linkText);
                    if (linkText.Contains(phrase))
                    {
                        result.RelatedLinks.Add(linkText, linkUrl);
                    }
                }
            }
        }
        void InternalDoTranslate(string phrase, LanguagePair languagesPair, string subject, Result result, NetworkSetting networkSetting, string post_data)
        {
            WebRequestHelper helper = null;

            if (string.IsNullOrEmpty(post_data))
            {
                string query = "http://www.merriam-webster.com/dictionary/{0}";
                query             = string.Format(query, HttpUtility.UrlEncode(phrase));
                result.ArticleUrl = query;

                helper =
                    new WebRequestHelper(result, new Uri(query),
                                         networkSetting,
                                         WebRequestContentType.UrlEncodedGet);
                //helper.UseGoogleCache = true;
            }
            else
            {
                helper =
                    new WebRequestHelper(result, new Uri("http://www.merriam-webster.com/dictionary"),
                                         networkSetting,
                                         WebRequestContentType.UrlEncoded);
                helper.AddPostData(post_data);
            }

            string responseFromServer = helper.GetResponse();

            helper = null;

            if (responseFromServer.IndexOf("The word you've entered isn't in the dictionary.") >= 0)
            {
                if (responseFromServer.IndexOf("<PRE>") < 0)
                {
                    result.ResultNotFound = true;
                    throw new TranslationException("Nothing found");
                }
                else
                {                  //get suggestions
                    StringParser parser = new StringParser("<PRE>", "</PRE>", responseFromServer);
                    string[]     items  = parser.ReadItemsList("\">", "<", "345873409587");
                    foreach (string item in items)
                    {
                        string part = item;
                        string link = "html!<p><a href=\"http://www.merriam-webster.com/dictionary/{0}\" title=\"http://www.merriam-webster.com/dictionary/{0}\">{0}</a></p>";
                        link = string.Format(link,
                                             part);
                        result.Translations.Add(link);
                    }
                    return;
                }
            }

            if (!(responseFromServer.Contains("One entry found.\n<br/>") || responseFromServer.Contains("One entry found.\n<br />")))
            {
                if (string.IsNullOrEmpty(post_data) && responseFromServer.Contains("'list' value=\"va:"))
                {
                    string count_str = StringParser.Parse("'list' value=\"va:", ",", responseFromServer);
                    int    count;
                    if (int.TryParse(count_str, out count))
                    {
                        result.MoreEntriesCount = count;
                    }
                }

                StringParser parser = new StringParser("<ol class=\"results\"", "</ol>", responseFromServer);
                string[]     items  = parser.ReadItemsList("href=\"/dictionary/", "</a>");

                foreach (string item in items)
                {
                    string part = StringParser.ExtractLeft("\">", item);
                    string name = StringParser.ExtractRight("\">", item);
                    name = StringParser.RemoveAll("<sup>", "</sup>", name);
                    string link = "html!<p><a href=\"http://www.merriam-webster.com/dictionary/{0}\" title=\"http://www.merriam-webster.com/dictionary/{0}\">{1}</a></p>";
                    link = string.Format(link,
                                         part, name);
                    result.Translations.Add(link);
                }

                if (result.Translations.Count < 50 && responseFromServer.IndexOf("name='incr'") > 0)
                {                 //we has more items
                    //incr=Next+5&jump=dragon%27s+blood&book=Dictionary&quer=blood&list=45%2C31%2C3602592%2C0%3Bdragon%27s+blood%3D2000318535%3Bflesh+and+blood%3D2000400359%3Bfull-blood%5B1%2Cadjective%5D%3D2000425490%3Bfull-blood%5B2%2Cnoun%5D%3D2000425517%3Bhalf-blood%3D2000475964%3Bhalf+blood%3D2000475978%3Bhigh+blood+pressure%3D2000498596%3Blow+blood+pressure%3D2000629024%3Bnew+blood%3D2000712110%3Bpure-blooded%3D2000860991
                    string incr_value      = StringParser.Parse("<input type='submit' value='", "'", responseFromServer);
                    string quer_value      = StringParser.Parse("<input type='hidden' name='quer' value=\"", "\"", responseFromServer);
                    string list_value      = StringParser.Parse("<input type='hidden' name='list' value=\"", "\"", responseFromServer);
                    string post_data_value = "incr={0}&jump={1}&book=Dictionary&quer={2}&list={3}";
                    post_data_value = string.Format(post_data_value,
                                                    incr_value,
                                                    HttpUtility.UrlEncode(items[0]),
                                                    HttpUtility.UrlEncode(quer_value),
                                                    HttpUtility.UrlEncode(list_value)
                                                    );

                    //some cleaning
                    responseFromServer = null;

                    InternalDoTranslate(phrase, languagesPair, subject, result, networkSetting, post_data_value);
                }

                if (result.MoreEntriesCount != 0 && string.IsNullOrEmpty(post_data))
                {
                    result.MoreEntriesCount -= result.Translations.Count;
                }
            }
            else if (responseFromServer.Contains("<span class=\"variant\">"))
            {
                string part = StringParser.Parse("<span class=\"variant\">", "</span>", responseFromServer);

                string link = "html!<p><a href=\"http://www.merriam-webster.com/dictionary/{0}\" title=\"http://www.merriam-webster.com/dictionary/{0}\">{0}</a></p>";
                link = string.Format(link,
                                     part);
                result.Translations.Add(link);
            }
        }