예제 #1
0
        protected override void DoTranslate(string phrase, LanguagePair languagesPair, string subject, Result result, NetworkSetting networkSetting)
        {
            string langPair   = GoogleUtils.ConvertLanguagesPair(languagesPair);
            string query_base = "http://ajax.googleapis.com/ajax/services/language/translate?" +
                                "v=1.0&langpair={0}&hl=en&q=";

            query_base = string.Format(query_base, langPair);

            int allowed_length = 2070 - query_base.Length;
            //"key=ABQIAAAA1Xz0dZCPKigOKIhDUJZ6FxQmSA1Htufb6qVqyW_v4yDxIUvb4BRwNjuLUmsgD0bAGP7qnB0dWYfEdg";


            List <string> queries = new List <string>();

            SplitToQueries(queries, phrase, SplitMode.Separators, allowed_length);

            StringBuilder sb     = new StringBuilder(phrase.Length);
            string        subres = "";
            int           prefix_idx;
            int           suffix_idx;

            foreach (string subphrase in queries)
            {
                if (subphrase.Length > 500)
                {
                    throw new InvalidOperationException("The length of string is greater of 500 characters." +
                                                        " string : " + subphrase +
                                                        ", full phrase : " + phrase
                                                        );
                }

                //remove not-alphas from start and end of subphrase
                prefix_idx = subphrase.Length;
                for (int i = 0; i < subphrase.Length; i++)
                {
                    if (char.IsLetter(subphrase[i]))
                    {
                        prefix_idx = i;
                        break;
                    }
                }

                if (prefix_idx == subphrase.Length)
                {                 // no alphas - skip without translation
                    sb.Append(subphrase);
                    continue;
                }

                suffix_idx = 0;
                for (int i = subphrase.Length - 1; i >= 0; i--)
                {
                    if (char.IsLetter(subphrase[i]))
                    {
                        suffix_idx = i;
                        break;
                    }
                }

                string real_query = query_base + HttpUtility.UrlEncode(subphrase.Substring(prefix_idx, suffix_idx - prefix_idx + 1));
                if (real_query.Length > 2070)
                {
                    throw new InvalidOperationException("The length of query is greater of 2070 characters." +
                                                        " string : " + subphrase +
                                                        " query : " + real_query +
                                                        " full phrase : " + phrase
                                                        );
                }


                /* debug splitting algo
                 * result.Translations.Add(
                 *              " string length : " + subphrase.Length.ToString());
                 *
                 * result.Translations.Add(
                 *              " \r\nstring : " + subphrase);
                 * result.Translations.Add(
                 *              " \r\nstring for query length : " + (suffix_idx - prefix_idx + 1).ToString());
                 * result.Translations.Add(
                 *              " \r\nstring for query : " + subphrase.Substring(prefix_idx, suffix_idx - prefix_idx + 1));
                 * result.Translations.Add(
                 *              " \r\nquery length : " + real_query.Length.ToString());
                 */

                if (prefix_idx > 0)
                {
                    sb.Append(subphrase.Substring(0, prefix_idx));
                }

                subres = DoInternalTranslate(real_query, result, networkSetting);
                sb.Append(subres);

                if (suffix_idx < subphrase.Length - 1)
                {
                    sb.Append(subphrase.Substring(suffix_idx + 1));
                }
            }
            result.Translations.Add(sb.ToString());
        }
예제 #2
0
        protected override void DoTranslate(string phrase, LanguagePair languagesPair, string subject, Result result, NetworkSetting networkSetting)
        {
            string query = "http://www.google.com/dictionary?aq=f&langpair={1}&q={0}&hl=en";

            query = string.Format(query, HttpUtility.UrlEncode(phrase, System.Text.Encoding.UTF8), GoogleUtils.ConvertLanguagesPair(languagesPair));
            WebRequestHelper helper =
                new WebRequestHelper(result, new Uri(query),
                                     networkSetting,
                                     WebRequestContentType.UrlEncodedGet);

            result.ArticleUrl        = query;
            result.ArticleUrlCaption = phrase;

            string responseFromServer = helper.GetResponse();

            if (responseFromServer.Contains("No dictionary translations were found for: <strong>"))
            {
                result.ResultNotFound = true;
                throw new TranslationException("Nothing found");
            }
            if (responseFromServer.Contains("No dictionary definitions were found for: <strong>"))
            {
                result.ResultNotFound = true;
                throw new TranslationException("Nothing found");
            }

            result.HasAudio    = responseFromServer.Contains("<object data=\"/dictionary/flash");
            responseFromServer = StringParser.Parse("<div class=\"dct-srch-otr\">", "<div class=\"dct-rt-sct\">", responseFromServer);

            //pronuncation
            if (responseFromServer.Contains("<span class=\"dct-tp\">/"))
            {
                string pronuncation = StringParser.Parse("<span class=\"dct-tp\">/", "/</span>", responseFromServer);
                pronuncation        = pronuncation.Trim();
                result.Abbreviation = pronuncation;
            }


            //translations
            //string translations = StringParser.Parse("<div class=\"dct-srch-rslt\">", "</div>", responseFromServer);



            string translations = "";

            //TODO: additional sences like in "water" - "water down" not supported

            if (responseFromServer.Contains("<div class=\"sen\">"))
            {
                translations = StringParser.Parse("<ul class=\"dct-e2\" id=\"pr-root\" >", "</ul>\n<div class=\"sen\">", responseFromServer);
            }
            else if (responseFromServer.Contains("<h3>Related phrases</h3>"))
            {
                translations = StringParser.Parse("<ul class=\"dct-e2\" id=\"pr-root\" >", "</ul>\n<h3>Related phrases</h3>", responseFromServer);
            }
            else if (responseFromServer.Contains("<h3>Web definitions</h3>"))
            {
                translations = StringParser.Parse("<ul class=\"dct-e2\" id=\"pr-root\" >", "</ul>\n<h3>Web definitions</h3>", responseFromServer);
            }
            else
            {
                translations = StringParser.Parse("<ul class=\"dct-e2\" id=\"pr-root\" >", "</ul>", responseFromServer);
            }


            StringParser  parser          = null;
            List <string> subtranslations = new List <string>();

            if (translations.Contains("<li class=\"dct-ec\""))
            {
                //"</li>\n</ul>\n</li>"
                parser = new StringParser(translations);
                string[] subtranslation_list = parser.ReadItemsList("<li class=\"dct-ec\"", "</li>\n</ul>\n</li>", "3485730457203");
                subtranslations.AddRange(subtranslation_list);
            }
            else if (translations.Contains("<div style=\"font-weight:bold\">Synonyms:</div>"))
            {
                Result synonyms_tr = CreateNewResult("Synonyms", languagesPair, subject);
                result.Childs.Add(synonyms_tr);

                string synonyms = StringParser.Parse("<div style=\"font-weight:bold\">Synonyms:</div>", "</div>", translations);
                parser = new StringParser(synonyms);
                string[] syn_group_list = parser.ReadItemsList("<li>", "</li>", "3485730457203");
                foreach (string syngroup in syn_group_list)
                {
                    string syn_group_name = StringParser.Parse("title=\"Part-of-speech\">", "</span>", syngroup);
                    Result syn_tr         = CreateNewResult(syn_group_name, languagesPair, subject);
                    synonyms_tr.Childs.Add(syn_tr);
                    parser = new StringParser(syngroup);
                    string[] syn_list = parser.ReadItemsList("<a", "</a>", "3485730457203");
                    foreach (string syn in syn_list)
                    {
                        string synonym = StringParser.ExtractRight(">", syn);
                        syn_tr.Translations.Add(synonym);
                    }
                }

                subtranslations.Add(translations);
            }
            else
            {
                subtranslations.Add(translations);
            }

            Result subres_tr  = result;
            Result sub2res_tr = null;
            Result sub3res_tr = null;
            string abbr_str;

            foreach (string subtranslation in subtranslations)
            {
                if (subtranslation.Contains("<div  class=\"dct-ec\">"))
                {
                    abbr_str  = StringParser.Parse("title=\"Part-of-speech\">", "</span>", subtranslation);
                    subres_tr = CreateNewResult(abbr_str, languagesPair, subject);
                    result.Childs.Add(subres_tr);
                }

                parser = new StringParser(subtranslation.Replace("<li class=\"dct-em\"", "<end><begin>") + "<end>");
                string[] subsubtranslation_list = parser.ReadItemsList("<begin>", "<end>", "3485730457203");

                foreach (string subsubtanslation in subsubtranslation_list)
                {
                    sub2res_tr = CreateNewResult("", languagesPair, subject);
                    subres_tr.Childs.Add(sub2res_tr);

                    if (subsubtanslation.Contains(">See also</span>"))
                    {
                        sub2res_tr.Translations.Add("See also");
                    }

                    StringParser parser2 = new StringParser(subsubtanslation.Replace("<span class=\"dct-tt\">", "<end><begin>") + "<end>");
                    string[]     sub3translation_list = parser2.ReadItemsList("<begin>", "<end>", "3485730457203");

                    foreach (string sub3tanslation in sub3translation_list)
                    {
                        string text_translation = "";
                        string text_abbr        = "";
                        if (sub3tanslation.Contains("<span"))
                        {
                            text_translation = StringParser.ExtractLeft("<span", sub3tanslation);
                            if (text_translation.Contains("</span"))
                            {
                                text_translation = StringParser.ExtractLeft("</span", text_translation);
                            }
                            text_abbr = StringParser.Parse("<span", "</span", sub3tanslation);
                            text_abbr = StringParser.ExtractRight(">", text_abbr);
                        }
                        else
                        {
                            text_translation = StringParser.ExtractLeft("</span>", sub3tanslation);
                        }

                        text_translation = StringParser.RemoveAll("<", ">", text_translation);

                        if (sub2res_tr.Translations.Count == 0)
                        {
                            sub2res_tr.Translations.Add(text_translation);
                            sub2res_tr.Abbreviation = text_abbr;
                        }
                        else
                        {
                            sub3res_tr = CreateNewResult("", languagesPair, subject);
                            sub3res_tr.Translations.Add(text_translation);
                            //sub3res_tr.Phrase = text_abbr;
                            sub2res_tr.Childs.Add(sub3res_tr);
                        }
                    }
                }
            }


            //related words
            if (responseFromServer.Contains("<h3>Related phrases</h3>"))
            {
                string related = StringParser.Parse("<ul class=\"rlt-snt\">", "</ul>", responseFromServer);
                if (!string.IsNullOrEmpty(related))
                {
                    parser = new StringParser(related);
                    string[] related_list = parser.ReadItemsList("<li>", "</li>");

                    foreach (string related_s in related_list)
                    {
                        string related_str = related_s.Replace("\n", "").Trim();
                        string subphrase   = StringParser.Parse("<div>", "</div>", related_str);
                        subphrase = StringParser.RemoveAll("<", ">", subphrase);
                        subphrase = subphrase.Replace("&nbsp", " ").Replace("\n", "").Trim();


                        string subphrasetrans = StringParser.ExtractRight("</div>", related_str);

                        subphrasetrans = StringParser.RemoveAll("<", ">", subphrasetrans);
                        subphrasetrans = subphrasetrans.Replace("&nbsp", " ").Replace("\n", "").Trim();

                        Result subres = CreateNewResult(subphrase, languagesPair, subject);
                        subres.Translations.Add(subphrasetrans);
                        result.Childs.Add(subres);
                    }
                }
            }

            //Web definitions
            if (responseFromServer.Contains("<h3>Web definitions</h3>"))
            {
                string related = StringParser.ExtractRight("<ul class=\"gls\">", responseFromServer);
                if (!string.IsNullOrEmpty(related))
                {
                    {
                        Result subres_wd = CreateNewResult(phrase, languagesPair, subject);
                        result.Childs.Add(subres_wd);

                        parser = new StringParser(related);
                        string[] related_list = parser.ReadItemsList("<li>", "</li>");

                        foreach (string related_s in related_list)
                        {
                            string related_str = related_s;
                            related_str = related_str.Replace("<br/>", "").Trim();
                            related_str = StringParser.RemoveAll("<", ">", related_str);
                            related_str = related_str.Replace("&nbsp", " ").Replace("\n", "").Trim();
                            subres_wd.Translations.Add(related_str);
                        }
                    }
                }
            }
        }