protected override void DoTranslate(string phrase, LanguagePair languagesPair, string subject, Result result, NetworkSetting networkSetting) { Encoding encoding = Encoding.GetEncoding("iso-8859-1"); string query = "http://wordnetweb.princeton.edu/perl/webwn?s=" + HttpUtility.UrlEncode(phrase, encoding); WebRequestHelper helper = new WebRequestHelper(result, new Uri(query), networkSetting, WebRequestContentType.UrlEncodedGet, encoding); string responseFromServer = helper.GetResponse(); if (responseFromServer.Contains("<h3>Your search did not return any results.</h3>")) { result.ResultNotFound = true; throw new TranslationException("Nothing found"); } if (responseFromServer.Contains("<h3>Sorry(?) your search can only contain letters(?)")) { throw new TranslationException("Query contains extraneous symbols"); } result.ArticleUrl = query; result.ArticleUrlCaption = phrase; string[] nodes = StringParser.ParseItemsList("<h3>", "</ul>", responseFromServer); bool first = true; string nodename; Result child = result; string[] subnodes; string translation; foreach (string node in nodes) { nodename = StringParser.ExtractLeft("</h3>", node); if (first && nodes.Length == 1) { child.Abbreviation = nodename; } else { child = new Result(result.ServiceItem, nodename, result.LanguagePair, result.Subject); result.Childs.Add(child); } first = false; subnodes = StringParser.ParseItemsList("<li>", "</li>", node); foreach (string subnode in subnodes) { translation = StringParser.RemoveAll("<", ">", subnode); translation = StringParser.ExtractRight(")", translation); child.Translations.Add(translation); } } }
public static void SetAdditionalLinks(Result result, string data, string host) { string[] urls = StringParser.ParseItemsList("<a href=\"", "</a>", data); string link; string text; List <string> texts = new List <string>(); foreach (string url in urls) { link = StringParser.ExtractLeft("\"", url); link = link.Replace(" ", "+"); if (link.StartsWith("/")) { link = "http://" + (new Uri(host)).Host + link; } else { link = host + link; } text = StringParser.ExtractRight(">", url); if (text.EndsWith(" 1")) { text = StringParser.ExtractLeft(" 1", text); } else if (text.EndsWith(" 2")) { text = StringParser.ExtractLeft(" 2", text); } if (!texts.Contains(text)) { result.RelatedLinks.Add(text, new Uri(link)); texts.Add(text); } } }
protected override void DoTranslate(string phrase, LanguagePair languagesPair, string subject, Result result, NetworkSetting networkSetting) { string query = "http://www.google.com/dictionary?aq=f&langpair={1}&q={0}&hl=en"; query = string.Format(query, HttpUtility.UrlEncode(phrase, System.Text.Encoding.UTF8), GoogleUtils.ConvertLanguagesPair(languagesPair)); WebRequestHelper helper = new WebRequestHelper(result, new Uri(query), networkSetting, WebRequestContentType.UrlEncodedGet); result.ArticleUrl = query; result.ArticleUrlCaption = phrase; string responseFromServer = helper.GetResponse(); if (responseFromServer.Contains("No dictionary translations were found for: <strong>")) { result.ResultNotFound = true; throw new TranslationException("Nothing found"); } if (responseFromServer.Contains("No dictionary definitions were found for: <strong>")) { result.ResultNotFound = true; throw new TranslationException("Nothing found"); } result.HasAudio = responseFromServer.Contains("<object data=\"/dictionary/flash"); responseFromServer = StringParser.Parse("<div class=\"dct-srch-otr\">", "<div class=\"dct-rt-sct\">", responseFromServer); //pronuncation if (responseFromServer.Contains("<span class=\"dct-tp\">/")) { string pronuncation = StringParser.Parse("<span class=\"dct-tp\">/", "/</span>", responseFromServer); pronuncation = pronuncation.Trim(); result.Abbreviation = pronuncation; } //translations //string translations = StringParser.Parse("<div class=\"dct-srch-rslt\">", "</div>", responseFromServer); string translations = ""; //TODO: additional sences like in "water" - "water down" not supported if (responseFromServer.Contains("<div class=\"sen\">")) { translations = StringParser.Parse("<ul class=\"dct-e2\" id=\"pr-root\" >", "</ul>\n<div class=\"sen\">", responseFromServer); } else if (responseFromServer.Contains("<h3>Related phrases</h3>")) { translations = StringParser.Parse("<ul class=\"dct-e2\" id=\"pr-root\" >", "</ul>\n<h3>Related phrases</h3>", responseFromServer); } else if (responseFromServer.Contains("<h3>Web definitions</h3>")) { translations = StringParser.Parse("<ul class=\"dct-e2\" id=\"pr-root\" >", "</ul>\n<h3>Web definitions</h3>", responseFromServer); } else { translations = StringParser.Parse("<ul class=\"dct-e2\" id=\"pr-root\" >", "</ul>", responseFromServer); } StringParser parser = null; List <string> subtranslations = new List <string>(); if (translations.Contains("<li class=\"dct-ec\"")) { //"</li>\n</ul>\n</li>" parser = new StringParser(translations); string[] subtranslation_list = parser.ReadItemsList("<li class=\"dct-ec\"", "</li>\n</ul>\n</li>", "3485730457203"); subtranslations.AddRange(subtranslation_list); } else if (translations.Contains("<div style=\"font-weight:bold\">Synonyms:</div>")) { Result synonyms_tr = CreateNewResult("Synonyms", languagesPair, subject); result.Childs.Add(synonyms_tr); string synonyms = StringParser.Parse("<div style=\"font-weight:bold\">Synonyms:</div>", "</div>", translations); parser = new StringParser(synonyms); string[] syn_group_list = parser.ReadItemsList("<li>", "</li>", "3485730457203"); foreach (string syngroup in syn_group_list) { string syn_group_name = StringParser.Parse("title=\"Part-of-speech\">", "</span>", syngroup); Result syn_tr = CreateNewResult(syn_group_name, languagesPair, subject); synonyms_tr.Childs.Add(syn_tr); parser = new StringParser(syngroup); string[] syn_list = parser.ReadItemsList("<a", "</a>", "3485730457203"); foreach (string syn in syn_list) { string synonym = StringParser.ExtractRight(">", syn); syn_tr.Translations.Add(synonym); } } subtranslations.Add(translations); } else { subtranslations.Add(translations); } Result subres_tr = result; Result sub2res_tr = null; Result sub3res_tr = null; string abbr_str; foreach (string subtranslation in subtranslations) { if (subtranslation.Contains("<div class=\"dct-ec\">")) { abbr_str = StringParser.Parse("title=\"Part-of-speech\">", "</span>", subtranslation); subres_tr = CreateNewResult(abbr_str, languagesPair, subject); result.Childs.Add(subres_tr); } parser = new StringParser(subtranslation.Replace("<li class=\"dct-em\"", "<end><begin>") + "<end>"); string[] subsubtranslation_list = parser.ReadItemsList("<begin>", "<end>", "3485730457203"); foreach (string subsubtanslation in subsubtranslation_list) { sub2res_tr = CreateNewResult("", languagesPair, subject); subres_tr.Childs.Add(sub2res_tr); if (subsubtanslation.Contains(">See also</span>")) { sub2res_tr.Translations.Add("See also"); } StringParser parser2 = new StringParser(subsubtanslation.Replace("<span class=\"dct-tt\">", "<end><begin>") + "<end>"); string[] sub3translation_list = parser2.ReadItemsList("<begin>", "<end>", "3485730457203"); foreach (string sub3tanslation in sub3translation_list) { string text_translation = ""; string text_abbr = ""; if (sub3tanslation.Contains("<span")) { text_translation = StringParser.ExtractLeft("<span", sub3tanslation); if (text_translation.Contains("</span")) { text_translation = StringParser.ExtractLeft("</span", text_translation); } text_abbr = StringParser.Parse("<span", "</span", sub3tanslation); text_abbr = StringParser.ExtractRight(">", text_abbr); } else { text_translation = StringParser.ExtractLeft("</span>", sub3tanslation); } text_translation = StringParser.RemoveAll("<", ">", text_translation); if (sub2res_tr.Translations.Count == 0) { sub2res_tr.Translations.Add(text_translation); sub2res_tr.Abbreviation = text_abbr; } else { sub3res_tr = CreateNewResult("", languagesPair, subject); sub3res_tr.Translations.Add(text_translation); //sub3res_tr.Phrase = text_abbr; sub2res_tr.Childs.Add(sub3res_tr); } } } } //related words if (responseFromServer.Contains("<h3>Related phrases</h3>")) { string related = StringParser.Parse("<ul class=\"rlt-snt\">", "</ul>", responseFromServer); if (!string.IsNullOrEmpty(related)) { parser = new StringParser(related); string[] related_list = parser.ReadItemsList("<li>", "</li>"); foreach (string related_s in related_list) { string related_str = related_s.Replace("\n", "").Trim(); string subphrase = StringParser.Parse("<div>", "</div>", related_str); subphrase = StringParser.RemoveAll("<", ">", subphrase); subphrase = subphrase.Replace(" ", " ").Replace("\n", "").Trim(); string subphrasetrans = StringParser.ExtractRight("</div>", related_str); subphrasetrans = StringParser.RemoveAll("<", ">", subphrasetrans); subphrasetrans = subphrasetrans.Replace(" ", " ").Replace("\n", "").Trim(); Result subres = CreateNewResult(subphrase, languagesPair, subject); subres.Translations.Add(subphrasetrans); result.Childs.Add(subres); } } } //Web definitions if (responseFromServer.Contains("<h3>Web definitions</h3>")) { string related = StringParser.ExtractRight("<ul class=\"gls\">", responseFromServer); if (!string.IsNullOrEmpty(related)) { { Result subres_wd = CreateNewResult(phrase, languagesPair, subject); result.Childs.Add(subres_wd); parser = new StringParser(related); string[] related_list = parser.ReadItemsList("<li>", "</li>"); foreach (string related_s in related_list) { string related_str = related_s; related_str = related_str.Replace("<br/>", "").Trim(); related_str = StringParser.RemoveAll("<", ">", related_str); related_str = related_str.Replace(" ", " ").Replace("\n", "").Trim(); subres_wd.Translations.Add(related_str); } } } } }
public static void DoTranslate(ServiceItem serviceItem, string phrase, LanguagePair languagesPair, string subject, Result result, NetworkSetting networkSetting) { bool synonimsDictionary = languagesPair.From == Language.Polish && languagesPair.To == Language.Polish; string query = "http://megaslownik.pl/slownik/{0}/,{1}"; query = string.Format(query, MegaslownikTools.ConvertLanguagePair(languagesPair), HttpUtility.UrlEncode(phrase)); result.ArticleUrl = query; result.ArticleUrlCaption = phrase; WebRequestHelper helper = new WebRequestHelper(result, new Uri(query), networkSetting, WebRequestContentType.UrlEncodedGet); string responseFromServer = helper.GetResponse(); if (responseFromServer.Contains("<div class=\"slowo\">\r\n Szukanego słowa nie ma w MEGAsłowniku.\r\n")) { result.ResultNotFound = true; throw new TranslationException("Nothing found"); } result.HasAudio = responseFromServer.Contains("class=\"ikona_sluchaj2\">"); string[] translations = StringParser.ParseItemsList("<div class=\"definicja\">", "</div>", responseFromServer); if (translations.Length == 0) { result.ResultNotFound = true; throw new TranslationException("Nothing found"); } string subsubtranslation; string[] subtranslations; foreach (string translation in translations) { subtranslations = StringParser.ParseItemsList("<a href=\"/slownik", "</a>", translation); foreach (string subtranslation in subtranslations) { subsubtranslation = StringParser.ExtractRight(">", subtranslation); subsubtranslation = StringParser.RemoveAll("<", ">", subsubtranslation); result.Translations.Add(subsubtranslation); } } //synonims translations = StringParser.ParseItemsList("<div class=\"synonim\">synonimy:", "</div>", responseFromServer); foreach (string translation in translations) { subtranslations = StringParser.ParseItemsList("<a href=\"/slownik", "</a>", translation); foreach (string subtranslation in subtranslations) { subsubtranslation = StringParser.ExtractRight(">", subtranslation); subsubtranslation = StringParser.RemoveAll("<", ">", subsubtranslation); if (!result.Translations.Contains(subsubtranslation)) { result.Translations.Add(subsubtranslation); } } } //additional links if (!synonimsDictionary) { string[] links = StringParser.ParseItemsList("<li ><a href=\"/slownik/", "</li>", responseFromServer); string linkUrl, linkText, subphrase, subtrans; Result child; foreach (string link in links) { linkUrl = "http://megaslownik.pl/slownik/" + StringParser.ExtractLeft("\"", link); linkText = StringParser.ExtractRight(">", link); linkText = StringParser.RemoveAll("<", ">", linkText); if (linkText.Contains("»") && linkText.Contains(phrase)) { subphrase = StringParser.ExtractLeft("»", linkText); subtrans = StringParser.ExtractRight("»", linkText); child = serviceItem.CreateNewResult(subphrase, languagesPair, subject); result.Childs.Add(child); child.Translations.Add(subtrans); child.ArticleUrl = linkUrl; child.ArticleUrlCaption = subphrase; } } links = StringParser.ParseItemsList("<li><a href=\"/slownik/", "</li>", responseFromServer); foreach (string link in links) { linkUrl = "http://megaslownik.pl/slownik/" + StringParser.ExtractLeft("\"", link); linkText = StringParser.ExtractRight(">", link); linkText = StringParser.RemoveAll("<", ">", linkText); if (linkText.Contains("»") && linkText.Contains(phrase)) { subphrase = StringParser.ExtractLeft("»", linkText); subtrans = StringParser.ExtractRight("»", linkText); child = serviceItem.CreateNewResult(subphrase, languagesPair, subject); result.Childs.Add(child); child.Translations.Add(subtrans); child.ArticleUrl = linkUrl; child.ArticleUrlCaption = subphrase; } //result.RelatedLinks.Add(linkText, linkUrl); } } else { //synonyms string[] links = StringParser.ParseItemsList("<li ><a href=\"/slownik/", "</li>", responseFromServer); string linkUrl, linkText; foreach (string link in links) { linkUrl = "http://megaslownik.pl/slownik/" + StringParser.ExtractLeft("\"", link); linkText = StringParser.ExtractRight(">", link); linkText = StringParser.RemoveAll("<", ">", linkText); if (linkText.Contains(phrase)) { result.RelatedLinks.Add(linkText, linkUrl); } } links = StringParser.ParseItemsList("<li><a href=\"/slownik/", "</li>", responseFromServer); foreach (string link in links) { linkUrl = "http://megaslownik.pl/slownik/" + StringParser.ExtractLeft("\"", link); linkText = StringParser.ExtractRight(">", link); linkText = StringParser.RemoveAll("<", ">", linkText); if (linkText.Contains(phrase)) { result.RelatedLinks.Add(linkText, linkUrl); } } } }
void InternalDoTranslate(string phrase, LanguagePair languagesPair, string subject, Result result, NetworkSetting networkSetting, string post_data) { WebRequestHelper helper = null; if (string.IsNullOrEmpty(post_data)) { string query = "http://www.merriam-webster.com/dictionary/{0}"; query = string.Format(query, HttpUtility.UrlEncode(phrase)); result.ArticleUrl = query; helper = new WebRequestHelper(result, new Uri(query), networkSetting, WebRequestContentType.UrlEncodedGet); //helper.UseGoogleCache = true; } else { helper = new WebRequestHelper(result, new Uri("http://www.merriam-webster.com/dictionary"), networkSetting, WebRequestContentType.UrlEncoded); helper.AddPostData(post_data); } string responseFromServer = helper.GetResponse(); helper = null; if (responseFromServer.IndexOf("The word you've entered isn't in the dictionary.") >= 0) { if (responseFromServer.IndexOf("<PRE>") < 0) { result.ResultNotFound = true; throw new TranslationException("Nothing found"); } else { //get suggestions StringParser parser = new StringParser("<PRE>", "</PRE>", responseFromServer); string[] items = parser.ReadItemsList("\">", "<", "345873409587"); foreach (string item in items) { string part = item; string link = "html!<p><a href=\"http://www.merriam-webster.com/dictionary/{0}\" title=\"http://www.merriam-webster.com/dictionary/{0}\">{0}</a></p>"; link = string.Format(link, part); result.Translations.Add(link); } return; } } if (!(responseFromServer.Contains("One entry found.\n<br/>") || responseFromServer.Contains("One entry found.\n<br />"))) { if (string.IsNullOrEmpty(post_data) && responseFromServer.Contains("'list' value=\"va:")) { string count_str = StringParser.Parse("'list' value=\"va:", ",", responseFromServer); int count; if (int.TryParse(count_str, out count)) { result.MoreEntriesCount = count; } } StringParser parser = new StringParser("<ol class=\"results\"", "</ol>", responseFromServer); string[] items = parser.ReadItemsList("href=\"/dictionary/", "</a>"); foreach (string item in items) { string part = StringParser.ExtractLeft("\">", item); string name = StringParser.ExtractRight("\">", item); name = StringParser.RemoveAll("<sup>", "</sup>", name); string link = "html!<p><a href=\"http://www.merriam-webster.com/dictionary/{0}\" title=\"http://www.merriam-webster.com/dictionary/{0}\">{1}</a></p>"; link = string.Format(link, part, name); result.Translations.Add(link); } if (result.Translations.Count < 50 && responseFromServer.IndexOf("name='incr'") > 0) { //we has more items //incr=Next+5&jump=dragon%27s+blood&book=Dictionary&quer=blood&list=45%2C31%2C3602592%2C0%3Bdragon%27s+blood%3D2000318535%3Bflesh+and+blood%3D2000400359%3Bfull-blood%5B1%2Cadjective%5D%3D2000425490%3Bfull-blood%5B2%2Cnoun%5D%3D2000425517%3Bhalf-blood%3D2000475964%3Bhalf+blood%3D2000475978%3Bhigh+blood+pressure%3D2000498596%3Blow+blood+pressure%3D2000629024%3Bnew+blood%3D2000712110%3Bpure-blooded%3D2000860991 string incr_value = StringParser.Parse("<input type='submit' value='", "'", responseFromServer); string quer_value = StringParser.Parse("<input type='hidden' name='quer' value=\"", "\"", responseFromServer); string list_value = StringParser.Parse("<input type='hidden' name='list' value=\"", "\"", responseFromServer); string post_data_value = "incr={0}&jump={1}&book=Dictionary&quer={2}&list={3}"; post_data_value = string.Format(post_data_value, incr_value, HttpUtility.UrlEncode(items[0]), HttpUtility.UrlEncode(quer_value), HttpUtility.UrlEncode(list_value) ); //some cleaning responseFromServer = null; InternalDoTranslate(phrase, languagesPair, subject, result, networkSetting, post_data_value); } if (result.MoreEntriesCount != 0 && string.IsNullOrEmpty(post_data)) { result.MoreEntriesCount -= result.Translations.Count; } } else if (responseFromServer.Contains("<span class=\"variant\">")) { string part = StringParser.Parse("<span class=\"variant\">", "</span>", responseFromServer); string link = "html!<p><a href=\"http://www.merriam-webster.com/dictionary/{0}\" title=\"http://www.merriam-webster.com/dictionary/{0}\">{0}</a></p>"; link = string.Format(link, part); result.Translations.Add(link); } }