/// <summary> /// Parses the fetched content. /// </summary> protected override void parseContent() { // Initialize the scraper this.Translation = string.Empty; string strContent = this.Content; StringParser parser = new StringParser(strContent); // Scrape the translation string strTranslation = string.Empty; if (parser.skipToEndOf("<span id=result_box")) { if (parser.skipToEndOf("onmouseout=\"this.style.backgroundColor='#fff'\">")) { if (parser.extractTo("</span>", ref strTranslation)) { strTranslation = StringParser.removeHtml(strTranslation); } } } #region Fix up the translation int startClean = 0; int endClean = 0; int i = 0; while (i < strTranslation.Length) { if (Char.IsLetterOrDigit(strTranslation[i])) { startClean = i; break; } i++; } i = strTranslation.Length - 1; while (i > 0) { char ch = strTranslation[i]; if (Char.IsLetterOrDigit(ch) || (Char.IsPunctuation(ch) && (ch != '\"'))) { endClean = i; break; } i--; } this.Translation = strTranslation.Substring(startClean, endClean - startClean + 1).Replace("\"", ""); #endregion }
public static string removeComments(string strString) { string text = ""; string text2 = ""; StringParser stringParser = new StringParser(strString); while (stringParser.extractTo("<!--", ref text2)) { text += text2; if (!stringParser.skipToEndOf("-->")) { return(strString); } } stringParser.extractToEnd(ref text2); return(text + text2); }
/// <summary> /// Parses the fetched content. /// </summary> protected override void parseContent() { // Initialize the scraper this.Translation = string.Empty; string strContent = this.Content; StringParser parser = new StringParser (strContent); // Scrape the translation string strTranslation = string.Empty; if (parser.skipToEndOf ("<span id=result_box")) { if (parser.skipToEndOf ("onmouseout=\"this.style.backgroundColor='#fff'\">")) { if (parser.extractTo("</span>", ref strTranslation)) { strTranslation = StringParser.removeHtml (strTranslation); } } } #region Fix up the translation int startClean = 0; int endClean = 0; int i=0; while (i < strTranslation.Length) { if (Char.IsLetterOrDigit (strTranslation[i])) { startClean = i; break; } i++; } i = strTranslation.Length - 1; while (i > 0) { char ch = strTranslation[i]; if (Char.IsLetterOrDigit (ch) || (Char.IsPunctuation (ch) && (ch != '\"'))) { endClean = i; break; } i--; } this.Translation = strTranslation.Substring (startClean, endClean - startClean + 1).Replace ("\"", ""); #endregion }
public static void getLinks(string strString, string strRootUrl, ref ArrayList documents, ref ArrayList images) { strString = StringParser.removeComments(strString); strString = StringParser.removeScripts(strString); StringParser stringParser = new StringParser(strString); stringParser.replaceEvery("'", "\""); string text = ""; if (strRootUrl != null) { text = strRootUrl.Trim(); } if (text.Length > 0 && !text.EndsWith("/")) { text += "/"; } string text2 = ""; stringParser.resetPosition(); while (stringParser.skipToEndOfNoCase("href=\"")) { if (stringParser.extractTo("\"", ref text2)) { text2 = text2.Trim(); if (text2.Length > 0 && text2.IndexOf("mailto:") == -1) { if (!text2.StartsWith("http://") && !text2.StartsWith("ftp://")) { try { UriBuilder uriBuilder = new UriBuilder(text); uriBuilder.Path =text2; text2 = uriBuilder.Uri.ToString(); } catch (Exception) { text2 = "http://" + text2; } } if (!documents.Contains(text2)) { documents.Add(text2); } } } } stringParser.resetPosition(); while (stringParser.skipToEndOfNoCase("src=\"")) { if (stringParser.extractTo("\"", ref text2)) { text2 = text2.Trim(); if (text2.Length > 0) { if (!text2.StartsWith("http://") && !text2.StartsWith("ftp://")) { try { UriBuilder uriBuilder2 = new UriBuilder(text); uriBuilder2.Path = text2; text2 = uriBuilder2.Uri.ToString(); } catch (Exception) { text2 = "http://" + text2; } } if (!images.Contains(text2)) { images.Add(text2); } } } } }
public static string removeComments(string strString) { string text = ""; string text2 = ""; StringParser stringParser = new StringParser(strString); while (stringParser.extractTo("<!--", ref text2)) { text += text2; if (!stringParser.skipToEndOf("-->")) { return strString; } } stringParser.extractToEnd(ref text2); return text + text2; }
public static void getLinks(string strString, string strRootUrl, ref ArrayList documents, ref ArrayList images) { strString = StringParser.removeComments(strString); strString = StringParser.removeScripts(strString); StringParser stringParser = new StringParser(strString); stringParser.replaceEvery("'", "\""); string text = ""; if (strRootUrl != null) { text = strRootUrl.Trim(); } if (text.Length > 0 && !text.EndsWith("/")) { text += "/"; } string text2 = ""; stringParser.resetPosition(); while (stringParser.skipToEndOfNoCase("href=\"")) { if (stringParser.extractTo("\"", ref text2)) { text2 = text2.Trim(); if (text2.Length > 0 && text2.IndexOf("mailto:") == -1) { if (!text2.StartsWith("http://") && !text2.StartsWith("ftp://")) { try { UriBuilder uriBuilder = new UriBuilder(text); uriBuilder.Path = text2; text2 = uriBuilder.Uri.ToString(); } catch (Exception) { text2 = "http://" + text2; } } if (!documents.Contains(text2)) { documents.Add(text2); } } } } stringParser.resetPosition(); while (stringParser.skipToEndOfNoCase("src=\"")) { if (stringParser.extractTo("\"", ref text2)) { text2 = text2.Trim(); if (text2.Length > 0) { if (!text2.StartsWith("http://") && !text2.StartsWith("ftp://")) { try { UriBuilder uriBuilder2 = new UriBuilder(text); uriBuilder2.Path = text2; text2 = uriBuilder2.Uri.ToString(); } catch (Exception) { text2 = "http://" + text2; } } if (!images.Contains(text2)) { images.Add(text2); } } } } }