예제 #1
0
        /// <summary>
        /// Parses the fetched content.
        /// </summary>
        protected override void parseContent()
        {
            // Initialize the scraper
            this.Translation = string.Empty;
            string       strContent = this.Content;
            StringParser parser     = new StringParser(strContent);

            // Scrape the translation
            string strTranslation = string.Empty;

            if (parser.skipToEndOf("<span id=result_box"))
            {
                if (parser.skipToEndOf("onmouseout=\"this.style.backgroundColor='#fff'\">"))
                {
                    if (parser.extractTo("</span>", ref strTranslation))
                    {
                        strTranslation = StringParser.removeHtml(strTranslation);
                    }
                }
            }

            #region Fix up the translation
            int startClean = 0;
            int endClean   = 0;
            int i          = 0;
            while (i < strTranslation.Length)
            {
                if (Char.IsLetterOrDigit(strTranslation[i]))
                {
                    startClean = i;
                    break;
                }
                i++;
            }
            i = strTranslation.Length - 1;
            while (i > 0)
            {
                char ch = strTranslation[i];
                if (Char.IsLetterOrDigit(ch) ||
                    (Char.IsPunctuation(ch) && (ch != '\"')))
                {
                    endClean = i;
                    break;
                }
                i--;
            }
            this.Translation = strTranslation.Substring(startClean, endClean - startClean + 1).Replace("\"", "");
            #endregion
        }
예제 #2
0
        public static string removeComments(string strString)
        {
            string       text         = "";
            string       text2        = "";
            StringParser stringParser = new StringParser(strString);

            while (stringParser.extractTo("<!--", ref text2))
            {
                text += text2;
                if (!stringParser.skipToEndOf("-->"))
                {
                    return(strString);
                }
            }
            stringParser.extractToEnd(ref text2);
            return(text + text2);
        }
예제 #3
0
            /// <summary>
            /// Parses the fetched content.
            /// </summary>
            protected override void parseContent()
            {
                // Initialize the scraper
                this.Translation = string.Empty;
                string strContent = this.Content;
                StringParser parser = new StringParser (strContent);

                // Scrape the translation
                string strTranslation = string.Empty;
                if (parser.skipToEndOf ("<span id=result_box")) {
                    if (parser.skipToEndOf ("onmouseout=\"this.style.backgroundColor='#fff'\">")) {
                        if (parser.extractTo("</span>", ref strTranslation)) {
                            strTranslation = StringParser.removeHtml (strTranslation);
                        }
                    }
                }

                #region Fix up the translation
                    int startClean = 0;
                    int endClean = 0;
                    int i=0;
                    while (i < strTranslation.Length) {
                        if (Char.IsLetterOrDigit (strTranslation[i])) {
                            startClean = i;
                            break;
                        }
                        i++;
                    }
                    i = strTranslation.Length - 1;
                    while (i > 0) {
                        char ch = strTranslation[i];
                        if (Char.IsLetterOrDigit (ch) ||
                            (Char.IsPunctuation (ch) && (ch != '\"'))) {
                            endClean = i;
                            break;
                        }
                        i--;
                    }
                    this.Translation = strTranslation.Substring (startClean, endClean - startClean + 1).Replace ("\"", "");
                #endregion
            }
예제 #4
0
 public static void getLinks(string strString, string strRootUrl, ref ArrayList documents, ref ArrayList images)
 {
     strString = StringParser.removeComments(strString);
     strString = StringParser.removeScripts(strString);
     StringParser stringParser = new StringParser(strString);
     stringParser.replaceEvery("'", "\"");
     string text = "";
     if (strRootUrl != null)
     {
         text = strRootUrl.Trim();
     }
     if (text.Length > 0 && !text.EndsWith("/"))
     {
         text += "/";
     }
     string text2 = "";
     stringParser.resetPosition();
     while (stringParser.skipToEndOfNoCase("href=\""))
     {
         if (stringParser.extractTo("\"", ref text2))
         {
             text2 = text2.Trim();
             if (text2.Length > 0 && text2.IndexOf("mailto:") == -1)
             {
                 if (!text2.StartsWith("http://") && !text2.StartsWith("ftp://"))
                 {
                     try
                     {
                         UriBuilder uriBuilder = new UriBuilder(text);
                         uriBuilder.Path =text2;
                         text2 = uriBuilder.Uri.ToString();
                     }
                     catch (Exception)
                     {
                         text2 = "http://" + text2;
                     }
                 }
                 if (!documents.Contains(text2))
                 {
                     documents.Add(text2);
                 }
             }
         }
     }
     stringParser.resetPosition();
     while (stringParser.skipToEndOfNoCase("src=\""))
     {
         if (stringParser.extractTo("\"", ref text2))
         {
             text2 = text2.Trim();
             if (text2.Length > 0)
             {
                 if (!text2.StartsWith("http://") && !text2.StartsWith("ftp://"))
                 {
                     try
                     {
                         UriBuilder uriBuilder2 = new UriBuilder(text);
                         uriBuilder2.Path = text2;
                         text2 = uriBuilder2.Uri.ToString();
                     }
                     catch (Exception)
                     {
                         text2 = "http://" + text2;
                     }
                 }
                 if (!images.Contains(text2))
                 {
                     images.Add(text2);
                 }
             }
         }
     }
 }
예제 #5
0
 public static string removeComments(string strString)
 {
     string text = "";
     string text2 = "";
     StringParser stringParser = new StringParser(strString);
     while (stringParser.extractTo("<!--", ref text2))
     {
         text += text2;
         if (!stringParser.skipToEndOf("-->"))
         {
             return strString;
         }
     }
     stringParser.extractToEnd(ref text2);
     return text + text2;
 }
예제 #6
0
        public static void getLinks(string strString, string strRootUrl, ref ArrayList documents, ref ArrayList images)
        {
            strString = StringParser.removeComments(strString);
            strString = StringParser.removeScripts(strString);
            StringParser stringParser = new StringParser(strString);

            stringParser.replaceEvery("'", "\"");
            string text = "";

            if (strRootUrl != null)
            {
                text = strRootUrl.Trim();
            }
            if (text.Length > 0 && !text.EndsWith("/"))
            {
                text += "/";
            }
            string text2 = "";

            stringParser.resetPosition();
            while (stringParser.skipToEndOfNoCase("href=\""))
            {
                if (stringParser.extractTo("\"", ref text2))
                {
                    text2 = text2.Trim();
                    if (text2.Length > 0 && text2.IndexOf("mailto:") == -1)
                    {
                        if (!text2.StartsWith("http://") && !text2.StartsWith("ftp://"))
                        {
                            try
                            {
                                UriBuilder uriBuilder = new UriBuilder(text);
                                uriBuilder.Path = text2;
                                text2           = uriBuilder.Uri.ToString();
                            }
                            catch (Exception)
                            {
                                text2 = "http://" + text2;
                            }
                        }
                        if (!documents.Contains(text2))
                        {
                            documents.Add(text2);
                        }
                    }
                }
            }
            stringParser.resetPosition();
            while (stringParser.skipToEndOfNoCase("src=\""))
            {
                if (stringParser.extractTo("\"", ref text2))
                {
                    text2 = text2.Trim();
                    if (text2.Length > 0)
                    {
                        if (!text2.StartsWith("http://") && !text2.StartsWith("ftp://"))
                        {
                            try
                            {
                                UriBuilder uriBuilder2 = new UriBuilder(text);
                                uriBuilder2.Path = text2;
                                text2            = uriBuilder2.Uri.ToString();
                            }
                            catch (Exception)
                            {
                                text2 = "http://" + text2;
                            }
                        }
                        if (!images.Contains(text2))
                        {
                            images.Add(text2);
                        }
                    }
                }
            }
        }