/// <summary>
            /// Parses the fetched content.
            /// </summary>
            protected override void parseContent()
            {
                // Initialize the scraper
                this.Translation = string.Empty;
                string strContent = this.Content;
                StringParser parser = new StringParser (strContent);

                // Scrape the translation
                string strTranslation = string.Empty;
                if (parser.skipToEndOf ("<span id=result_box")) {
                    if (parser.skipToEndOf ("onmouseout=\"this.style.backgroundColor='#fff'\">")) {
                        if (parser.extractTo("</span>", ref strTranslation)) {
                            strTranslation = StringParser.removeHtml (strTranslation);
                        }
                    }
                }

                #region Fix up the translation
                    int startClean = 0;
                    int endClean = 0;
                    int i=0;
                    while (i < strTranslation.Length) {
                        if (Char.IsLetterOrDigit (strTranslation[i])) {
                            startClean = i;
                            break;
                        }
                        i++;
                    }
                    i = strTranslation.Length - 1;
                    while (i > 0) {
                        char ch = strTranslation[i];
                        if (Char.IsLetterOrDigit (ch) ||
                            (Char.IsPunctuation (ch) && (ch != '\"'))) {
                            endClean = i;
                            break;
                        }
                        i--;
                    }
                    this.Translation = strTranslation.Substring (startClean, endClean - startClean + 1).Replace ("\"", "");
                #endregion
            }
Beispiel #2
0
 public static void getLinks(string strString, string strRootUrl, ref ArrayList documents, ref ArrayList images)
 {
     strString = StringParser.removeComments(strString);
     strString = StringParser.removeScripts(strString);
     StringParser stringParser = new StringParser(strString);
     stringParser.replaceEvery("'", "\"");
     string text = "";
     if (strRootUrl != null)
     {
         text = strRootUrl.Trim();
     }
     if (text.Length > 0 && !text.EndsWith("/"))
     {
         text += "/";
     }
     string text2 = "";
     stringParser.resetPosition();
     while (stringParser.skipToEndOfNoCase("href=\""))
     {
         if (stringParser.extractTo("\"", ref text2))
         {
             text2 = text2.Trim();
             if (text2.Length > 0 && text2.IndexOf("mailto:") == -1)
             {
                 if (!text2.StartsWith("http://") && !text2.StartsWith("ftp://"))
                 {
                     try
                     {
                         UriBuilder uriBuilder = new UriBuilder(text);
                         uriBuilder.Path =text2;
                         text2 = uriBuilder.Uri.ToString();
                     }
                     catch (Exception)
                     {
                         text2 = "http://" + text2;
                     }
                 }
                 if (!documents.Contains(text2))
                 {
                     documents.Add(text2);
                 }
             }
         }
     }
     stringParser.resetPosition();
     while (stringParser.skipToEndOfNoCase("src=\""))
     {
         if (stringParser.extractTo("\"", ref text2))
         {
             text2 = text2.Trim();
             if (text2.Length > 0)
             {
                 if (!text2.StartsWith("http://") && !text2.StartsWith("ftp://"))
                 {
                     try
                     {
                         UriBuilder uriBuilder2 = new UriBuilder(text);
                         uriBuilder2.Path = text2;
                         text2 = uriBuilder2.Uri.ToString();
                     }
                     catch (Exception)
                     {
                         text2 = "http://" + text2;
                     }
                 }
                 if (!images.Contains(text2))
                 {
                     images.Add(text2);
                 }
             }
         }
     }
 }
Beispiel #3
0
 public static string removeHtml(string strString)
 {
     Hashtable hashtable = new Hashtable();
     hashtable.Add("&nbsp;", " ");
     hashtable.Add("&amp;", "&");
     hashtable.Add("&aring;", "");
     hashtable.Add("&auml;", "");
     hashtable.Add("&eacute;", "");
     hashtable.Add("&iacute;", "");
     hashtable.Add("&igrave;", "");
     hashtable.Add("&ograve;", "");
     hashtable.Add("&ouml;", "");
     hashtable.Add("&quot;", "\"");
     hashtable.Add("&szlig;", "");
     StringParser stringParser = new StringParser(strString);
     IEnumerator enumerator = hashtable.Keys.GetEnumerator();
     try
     {
         while (enumerator.MoveNext())
         {
             string text = (string)enumerator.Current;
             string strReplacement = hashtable[text] as string;
             if (strString.IndexOf(text) != -1)
             {
                 stringParser.replaceEveryExact(text, strReplacement);
             }
         }
     }
     finally
     {
         IDisposable disposable = enumerator as IDisposable;
         if (disposable != null)
         {
             disposable.Dispose();
         }
     }
     stringParser.replaceEveryExact("&#0", "&#");
     stringParser.replaceEveryExact("&#39;", "'");
     stringParser.replaceEveryExact("</", " <~/");
     stringParser.replaceEveryExact("<~/", "</");
     hashtable.Clear();
     hashtable.Add("<br>", " ");
     hashtable.Add("<p>", " ");
     enumerator = hashtable.Keys.GetEnumerator();
     try
     {
         while (enumerator.MoveNext())
         {
             string text2 = (string)enumerator.Current;
             string strReplacement2 = hashtable[text2] as string;
             if (strString.IndexOf(text2) != -1)
             {
                 stringParser.replaceEvery(text2, strReplacement2);
             }
         }
     }
     finally
     {
         IDisposable disposable = enumerator as IDisposable;
         if (disposable != null)
         {
             disposable.Dispose();
         }
     }
     strString = stringParser.Content;
     string text3 = "";
     int num = 0;
     int num2;
     while ((num2 = strString.IndexOf("<", num)) != -1)
     {
         string text4 = strString.Substring(num, num2 - num);
         text3 += text4;
         num = num2 + 1;
         int num3 = strString.IndexOf(">", num);
         if (num3 == -1)
         {
             break;
         }
         num = num3 + 1;
     }
     if (num < strString.Length)
     {
         text3 += strString.Substring(num, strString.Length - num);
     }
     strString = text3;
     stringParser.Content = strString;
     stringParser.replaceEveryExact("  ", " ");
     strString = stringParser.Content.Trim();
     return strString;
 }
Beispiel #4
0
 public static string removeScripts(string strString)
 {
     string text = "";
     string text2 = "";
     StringParser stringParser = new StringParser(strString);
     while (stringParser.extractToNoCase("<script", ref text2))
     {
         text += text2;
         if (!stringParser.skipToEndOfNoCase("</script>"))
         {
             stringParser.Content = text;
             return strString;
         }
     }
     stringParser.extractToEnd(ref text2);
     return text + text2;
 }
Beispiel #5
0
 public static string removeComments(string strString)
 {
     string text = "";
     string text2 = "";
     StringParser stringParser = new StringParser(strString);
     while (stringParser.extractTo("<!--", ref text2))
     {
         text += text2;
         if (!stringParser.skipToEndOf("-->"))
         {
             return strString;
         }
     }
     stringParser.extractToEnd(ref text2);
     return text + text2;
 }