public static int GetNumberFromText(this string text) { string clearNumberStr = text.GetMatchGroup("(?<key>[一二三四五六七八九十百千万亿兆零壹贰叁肆伍陆柒捌玖0123456789]+)").Groups["key"].Value.IsNull("0"); int result = 0; Match m = new Regex("[一二三四五六七八九]?十|[一二三四五六七八九]+百|[一二三四五六七八九]+千|[一二三四五六七八九]+万|[一二三四五六七八九]+", RegexOptions.None).Match(clearNumberStr); while (m.Success) { result += m.Groups[0].Value.Replace("一", "1") .Replace("二", "2") .Replace("三", "3") .Replace("四", "4") .Replace("五", "5") .Replace("六", "6") .Replace("七", "7") .Replace("八", "8") .Replace("九", "9") .Replace("十", "0") .Replace("百", "00") .Replace("千", "000") .Replace("万", "0000") .ToInt32() ; m = m.NextMatch(); } return result; }
private static string StripHtml(string htmlText) { if (string.IsNullOrEmpty(htmlText)) { return string.Empty; } htmlText = htmlText.Replace("<br>", "\n"); htmlText = htmlText.Replace(" ", " "); var urls = new List<string>(); var match = new Regex(@"href=([""']+)(?<Url>(([^""'])+))").Match(htmlText); while(match.Success) { urls.Add(match.Groups["Url"].Captures[0].Value); match = match.NextMatch(); } var urlPlaceholder = "-url-"; htmlText = Regex.Replace(htmlText, @"<a.*?>", urlPlaceholder); htmlText = Regex.Replace(htmlText, @"<.*?>", string.Empty); int location = 0; foreach (var url in urls) { location = htmlText.IndexOf(urlPlaceholder, location); if(location == -1) { break; } htmlText = htmlText.Remove(location, urlPlaceholder.Length); htmlText = htmlText.Insert(location, string.Format(" ({0}) ", url)); } return htmlText; }
private List<string> GetTag(string s) { List<string> tmp = new List<string> { }; Match matc = new Regex(@"#[a-zA-Z0-9_]+").Match(s); while (matc.Success) { tmp.Add(matc.Value); matc = matc.NextMatch(); } s = null; matc = null; return tmp; }
/// <summary> /// 识别Js中的Ascii 字符串为中文 /// </summary> /// <param name="AsciiString"></param> /// <returns></returns> public static string AsciiToNative(this string AsciiString) { string result = AsciiString; Match m = new Regex("(\\\\u([\\w]{4}))").Match(AsciiString); while (m.Success) { string v = m.Value; string word = v.Substring(2); byte[] codes = new byte[2]; int code = Convert.ToInt32(word.Substring(0, 2), 16); int code2 = Convert.ToInt32(word.Substring(2), 16); codes[0] = (byte)code2; codes[1] = (byte)code; result = result.Replace(v, Encoding.Unicode.GetString(codes)); m = m.NextMatch(); } return result; //MatchCollection mc = Regex.Matches(AsciiString, "(\\\\u([\\w]{4}))"); //if (mc != null && mc.Count > 0) //{ // StringBuilder sb = new StringBuilder(); // foreach (Match m2 in mc) // { // string v = m2.Value; // string word = v.Substring(2); // byte[] codes = new byte[2]; // int code = Convert.ToInt32(word.Substring(0, 2), 16); // int code2 = Convert.ToInt32(word.Substring(2), 16); // codes[0] = (byte)code2; // codes[1] = (byte)code; // sb.Append(Encoding.Unicode.GetString(codes)); // } // return sb.ToString(); //} //else //{ // return AsciiString; //} }
public object Convert(object value, Type targetType, object parameter, CultureInfo culture) { string param = (parameter as string), RegexTemplate = @"(\w+)\((\w+,\w+,\w+)\)"; if (Regex.IsMatch(param, RegexTemplate)) { //Окончание предложения в тексте, value обязательно должен быть числом double x = GetValueNumber(value); Match matc = new Regex(RegexTemplate).Match(param); while (matc.Success) { GroupCollection g = new Regex(RegexTemplate).Match(matc.Value).Groups; string FinText = g[1] + Engine.Class.FinText.get((g[2].Value.Split(',')[0]), (g[2].Value.Split(',')[1]), (g[2].Value.Split(',')[2]), (long)x); string tmp = Regex.Replace(param, (g[1] + @"\((\w+,\w+,\w+)\)"), FinText); param = tmp; tmp = null; FinText = null; matc = matc.NextMatch(); } //Проверка, нужно ли возвращать все данные или вернуть "{0:N0}" для стринг формата if (Regex.IsMatch(param, "ValueNull")) { return Regex.Replace(param, "ValueNull", "{0:N0}"); } else { return Regex.Replace(param, "value", string.Format("{0:N0}", x).Replace(((char)160), ',')); } } else { //Простая замена строки, value может быть как string так и число, но возвращаеться всегда string if (Regex.IsMatch(param, "ValueNull")) { return Regex.Replace(param, "ValueNull", "{0:N0}"); } else { return "Не реализованно"; } } }
public void colorAll() { this.running = true; this.rich.SelectAll(); this.resetFont(); string text = this.rich.Text; text.ToLower(); this.rich.Visible = false; int selectionStart = this.rich.SelectionStart; foreach (Keyword keyword in this.keywords) { for (Match match = new Regex(@"\b" + keyword.Value + @"\b").Match(text); match.Success; match = match.NextMatch()) { this.rich.Select(match.Index, match.Length); this.rich.SelectionColor = Color.FromName(keyword.Color); this.rich.ClearUndo(); try { FontFamily fontFamily = this.rich.SelectionFont.FontFamily; float size = this.rich.SelectionFont.Size; this.rich.SelectionFont = new Font(this.normalFont.FontFamily, this.normalFont.Size, keyword.Bold ? FontStyle.Bold : FontStyle.Regular); } catch (Exception exception) { MessageBox.Show(exception.ToString()); } } } for (Match match2 = new Regex(@"\-\-[^\n]*(?!=\n)").Match(this.rich.Text); match2.Success; match2 = match2.NextMatch()) { this.rich.Select(match2.Index, match2.Length); this.rich.SelectionColor = Color.Green; } this.rich.Visible = true; this.rich.Focus(); this.rich.Select(selectionStart, 0); this.resetFont(); this.running = false; }
public override void Search() { string langStr = TextMiningUtils.GetLanguageCode(mLanguage); mResultSet.Inner.Clear(); if (mCache == null || !mCache.GetFromCache("GoogleDefine", mLanguage, mQuery, mResultSetMaxSz, ref mTotalHits, ref mResultSet)) { int i = 0; string defHtml = WebUtils.GetWebPage(string.Format("http://www.google.com/search?defl={0}&q=define%3A{1}", langStr, HttpUtility.UrlEncode(mQuery))); // throws WebException Match defMatch = new Regex("<li>(?<def>[^<]*)(<br><a href=\"(?<href>[^\"]*))?", RegexOptions.Singleline).Match(defHtml); while (defMatch.Success) { string def = HttpUtility.HtmlDecode(defMatch.Result("${def}").Trim()); string href = defMatch.Result("${href}"); string url = null; Match matchUrl = new Regex("&q=(?<url>[^&]*)").Match(href); if (matchUrl.Success) { url = HttpUtility.UrlDecode(matchUrl.Result("${url}")); } mResultSet.Inner.Add(new SearchEngineResultItem(mQuery, def, url, ++i)); defMatch = defMatch.NextMatch(); } string lastUrl = null; for (int j = mResultSet.Count - 1; j >= 0; j--) { if (mResultSet[j].Url == null) { mResultSet[j].SetUrl(lastUrl); } else { lastUrl = mResultSet[j].Url; } } mTotalHits = mResultSet.Count; if (mCache != null) { mCache.PutIntoCache("GoogleDefine", mLanguage, mQuery, mTotalHits, mResultSet); } if (mResultSetMaxSz < mResultSet.Count) { mResultSet.Inner.RemoveRange(mResultSetMaxSz, mResultSet.Count - mResultSetMaxSz); } } }
private void AddTag(Engine.InfoClass.job.hashTag job, string tag) { if (tag == null) return; List<string> tmp = new List<string> { }; Match matc = new Regex(@"#[\w0-9_]+").Match(tag); while (matc.Success) { //Если тег не дубликат и в нем больше 2х символов if (matc.Value.Length > 3 && !job.HashTag.Exists(x => x.ToLower().Trim() == matc.Value.ToLower().Trim())) { tmp.Add(matc.Value); } matc = matc.NextMatch(); } //Добовляем теги if (tmp.Count != 0) { job.HashTag.AddRange(tmp); } //Чистим ресурсы tag = null; tmp = null; matc = null; job = null; }
private void Work() { IsRunning = true; StopRequested = false; Stopwatch swHttp = new Stopwatch(), swIteration = new Stopwatch(); TestRequestResult result; DateTime start; string response, scriptBeforeResult, error; WebExceptionStatus errorCode; while (!StopRequested && Iterations < MaxIterations) { Iterations++; swIteration.Start(); using (TestWebClient webClient = new TestWebClient() { Timeout = TestWorker.RequestsTimeout }) { TestCommander commander = new TestCommander(this, webClient); CommandContainerCollection ccc = new CommandContainerCollection() { new CommandContainer(commander, "Commander"), new CommandContainer(this, "Worker"), new CommandContainer(webClient, "WebClient"), new CommandContainer(webClient.CookieContainer, "Cookies"), new CommandContainer(webClient.Headers, "Headers"), }; ccc.SelectContainer("Commander"); foreach (TestRequestInfo request in this.testRequests) { if (StopRequested) { break; } errorCode = WebExceptionStatus.Success; error = null; start = DateTime.Now; ccc.Add(new CommandContainer(request, "Request")); commander.Request = request; try { scriptBeforeResult = request.ScriptBefore == null ? null : (ccc.ExecuteCommand(request.ScriptBefore) ?? "").ToString(); } catch (Exception e) { scriptBeforeResult = e.Message; } IsWaiting = true; try { if (request.PostData == null) { webClient.Headers.Remove(HttpRequestHeader.ContentType); if (commander.Burning) { response = null; error = "Burning"; swHttp.Start(); webClient.DownloadStringAsync(new Uri(request.Url)); webClient.CancelAsync(); } else { swHttp.Start(); response = webClient.DownloadString(request.Url); } } else { webClient.Headers[HttpRequestHeader.ContentType] = "application/x-www-form-urlencoded"; if (commander.Burning) { response = null; error = "Burning"; swHttp.Start(); webClient.UploadStringAsync(new Uri(request.Url), request.PostData); webClient.CancelAsync(); } else { swHttp.Start(); response = webClient.UploadString(request.Url, request.PostData); } } } catch (WebException we) { response = null; errorCode = we.Status; error = we.Message; } catch(Exception e) { response = null; errorCode = WebExceptionStatus.UnknownError; error = e.Message; } swHttp.Stop(); IsWaiting = false; result = new TestRequestResult(request) { Duration = swHttp.Elapsed, Start = start, WorkerId = Id, Status = errorCode, Valid = request.ResultValidation == null || response == null ? default(Nullable<bool>) : new Regex(request.ResultValidation, RegexOptions.Singleline).Match(response).Success, Length = response == null ? default(Nullable<int>) : response.Length, Iteration = Iterations, ScriptBeforeResult = scriptBeforeResult, Error = error, }; if (request.ResultDataExtract != null && response != null) { StringBuilder data = new StringBuilder(); Match match = new Regex(request.ResultDataExtract, RegexOptions.Singleline).Match(response); while (match.Success) { data.Append(match.Value); match = match.NextMatch(); if (match.Success) { data.Append(request.ResultDataSeparator); } } result.DataExtracted = data.ToString(); } ccc.StoredValues["Response"] = response; commander.ResponseString = response; ccc.Remove("Result"); ccc.Add(new CommandContainer(result, "Result")); commander.Result = result; try { result.ScriptAfterResult = request.ScriptAfter == null ? null : (ccc.ExecuteCommand(request.ScriptAfter) ?? "").ToString(); } catch (Exception e) { result.ScriptAfterResult = e.Message; } ccc.Remove("Request"); swHttp.Reset(); this.testResults.Add(result); Requests++; } } swIteration.Stop(); LastIterationRunTime = swIteration.Elapsed; swIteration.Reset(); } IsRunning = false; Done = Iterations == MaxIterations; }
protected static string Preprocess(string origScript, ISQLDatabase database) { // Replace simple types StringBuilder result = new StringBuilder(origScript); result = result.Replace("%TIMESTAMP%", GetType(typeof(DateTime), database)). Replace("%CHAR%", GetType(typeof(Char), database)). Replace("%BOOLEAN%", GetType(typeof(Boolean), database)). Replace("%SINGLE%", GetType(typeof(Single), database)). Replace("%DOUBLE%", GetType(typeof(Double), database)). Replace("%SMALLINT%", GetType(typeof(Int16), database)). Replace("%INTEGER%", GetType(typeof(Int32), database)). Replace("%BIGINT%", GetType(typeof(Int64), database)). Replace("%GUID%", GetType(typeof(Guid), database)). Replace("%BINARY%", GetType(typeof(byte[]), database)); // For extended replacements: First collect all patterns to be replaced... IDictionary<string, string> replacements = new Dictionary<string, string>(); string interimStr = result.ToString(); // %STRING([N])% Match match = new Regex(@"%STRING\((\d*)\)%").Match(interimStr); while (match.Success) { string pattern = match.Value; if (!replacements.ContainsKey(pattern)) { uint length = uint.Parse(match.Groups[1].Value); replacements.Add(pattern, database.GetSQLVarLengthStringType(length)); } match = match.NextMatch(); } // %STRING_FIXED([N])% match = new Regex(@"%STRING_FIXED\((\d*)\)%").Match(interimStr); while (match.Success) { string pattern = match.Value; if (!replacements.ContainsKey(pattern)) { uint length = uint.Parse(match.Groups[1].Value); replacements.Add(pattern, database.GetSQLFixedLengthStringType(length)); } match = match.NextMatch(); } // %CREATE_NEW_GUID% / %GET_LAST_GUID% string lastGuid = null; match = new Regex(@"(%CREATE_NEW_GUID%)|(%GET_LAST_GUID%)").Match(interimStr); while (match.Success) { Group g; if ((g = match.Groups[1]).Success) // %CREATE_NEW_GUID% matched result.Replace("%CREATE_NEW_GUID%", lastGuid = Guid.NewGuid().ToString("B"), g.Index, g.Length); else if ((g = match.Groups[2]).Success) // %GET_LAST_GUID% matched result.Replace("%GET_LAST_GUID%", lastGuid, g.Index, g.Length); match = match.NextMatch(); } // ... then do the actual replacements result = replacements.Aggregate(result, (current, replacement) => current.Replace(replacement.Key, replacement.Value)); return result.ToString(); }
private void getBoxName(string message) { int index = 0; string str = ""; string pattern = "<td class=\"ManageFoldersFolderNameCol\"><div.*?href=\"(?<BoxUrl>[^\"]+)\".*?>(?<BoxNamme>[^<]+)</a[\\s\\S]+?<\\/td>"; Match match = new Regex(pattern).Match(message); if (match.Length < 1) { base.ShowMessage("取箱子失败!"); } else { int num2 = 0; while (match.Success) { this.boxList[num2].boxname = base.BoxName = match.Groups["BoxNamme"].Value; this.boxList[num2].boxUrl = str = match.Groups["BoxUrl"].Value; this.boxList[num2].boxid = base.putstr(str, "FolderID=", "&", 0); match = match.NextMatch(); num2++; } index = 0; while (index < num2) { string url = base.Host + this.boxList[index].boxUrl; this.cookie = this.cookieTemp; base.MyStringBuilder.Remove(0, base.MyStringBuilder.Length); base.streamControl = true; base.MyStringBuilder = this.Request(url); this.getPages(base.MyStringBuilder.ToString(), index); index++; } lock (SelMailBoxL) { for (index = 0; index < num2; index++) { string strSql = string.Concat(new object[] { "select count(*) from MailBoxList where 序号='", base.m_NO, "' and MailBoxName = '", this.boxList[index].boxname, "'" }); if (Convert.ToInt32(GlobalValue.PopMainForm.ExecuteSQL(strSql)) == 0) { strSql = string.Concat(new object[] { "insert into MailBoxList (序号,MailBoxName)values('", base.m_NO, "','", this.boxList[index].boxname, "');" }); GlobalValue.PopMainForm.ExecuteSQL(strSql); } } } } }
public void fetch(string url) { Console.WriteLine("{0}, Thread Id= {1}", url, Thread.CurrentThread.ManagedThreadId); try { var request = (HttpWebRequest)WebRequest.Create(new Uri(url)); request.UserAgent = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7"; request.Referer = "http://www.google.com"; WebResponse response = request.GetResponse(); Stream dataStream = response.GetResponseStream(); StreamReader reader = new StreamReader(dataStream,Encoding.UTF8); string responseFromServer = reader.ReadToEnd(); response.Close(); Match proxyMatch = new Regex(@"([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}:[0-9]{1,5})", RegexOptions.IgnoreCase).Match(responseFromServer); while (proxyMatch.Success) { Group g = proxyMatch.Groups[0]; Thread.MemoryBarrier(); Ips.Add(g.ToString()); Thread.MemoryBarrier(); proxyMatch = proxyMatch.NextMatch(); } Match httpMatch = new Regex(@"<(?<Tag_Name>(a))\b[^>]*?\b(?<URL_Type>(?(1)href))\s*=\s*(?:""(?<URL>(?:\\""|[^""])*)""|'(?<URL>(?:\\'|[^'])*)')", RegexOptions.IgnoreCase).Match(responseFromServer); while (httpMatch.Success) { Thread.MemoryBarrier(); glob_visited.Add(httpMatch.Groups[4].ToString()); Thread.MemoryBarrier(); httpMatch = httpMatch.NextMatch(); } } catch (Exception e) { Console.WriteLine(e.Message); } }
public static int GetNumberFromTitle(this string title) { Match mc = new Regex("第[一二三四五六七八九〇零十百千万1234567890]+章|引子", RegexOptions.None).Match(title); if (mc.Success) { string str_chineseNumber = mc.Groups[0].Value; str_chineseNumber = str_chineseNumber.Replace("第", "").Replace("章", "").Replace("引子", "0"); int result = 0; Match m = new Regex("[一二三四五六七八九]?十|[一二三四五六七八九]+百|[一二三四五六七八九]+千|[一二三四五六七八九]+万|[一二三四五六七八九]+", RegexOptions.None).Match(str_chineseNumber); while (m.Success) { result += m.Groups[0].Value.Replace("一", "1") .Replace("二", "2") .Replace("三", "3") .Replace("四", "4") .Replace("五", "5") .Replace("六", "6") .Replace("七", "7") .Replace("八", "8") .Replace("九", "9") .Replace("十", "0") .Replace("百", "00") .Replace("千", "000") .Replace("万", "0000") .ToInt32() ; m = m.NextMatch(); } return result; } else { return -1; } }
/// <summary> /// 替换标签 /// </summary> /// <param name="TmpString"></param> /// <returns></returns> public string ReplaceTagContent(string TmpString) { Match mc = new Regex("\\[(?<key>.*?)\\](?<key2>.*?)\\[/(?<key3>.*?)\\]", RegexOptions.None).Match(TmpString); while (mc.Success) { if (mc.Groups["key"].Value == mc.Groups["key3"].Value) { TmpString = TmpString.Replace( mc.Groups[0].Value, GetTagContent(string.Format("[{0}]{1}[/{0}]", mc.Groups["key"].Value, mc.Groups["key2"].Value)) ); } mc = mc.NextMatch(); } return TmpString; }
/// <summary> /// 替换系统参数 /// </summary> /// <param name="TmpString"></param> /// <returns></returns> public string ReplaceSystemSetting(string TmpString) { Match mc_sys = new Regex("\\[\\!--sys.(?<key>.*?)--\\]", RegexOptions.None).Match(TmpString); while (mc_sys.Success) { TmpString = Regex.Replace( TmpString, string.Format("\\[\\!--sys\\.{0}--\\]", mc_sys.Groups["key"].Value), GetSysSettingContent(mc_sys.Groups["key"].Value) ); mc_sys = mc_sys.NextMatch(); } return TmpString; }
/// <summary> /// 替换公共模版变量 /// </summary> /// <param name="TmpString">模版内容</param> /// <returns></returns> public string ReplacePublicTemplate(string TmpString) { if (TmpString.IsNullOrEmpty()) { return ""; } Match mc_pubic = new Regex("\\[\\!--temp.(?<key>.*?)--\\]", RegexOptions.None).Match(TmpString); while (mc_pubic.Success) { TmpString = Regex.Replace( TmpString, string.Format("\\[\\!--temp\\.{0}--\\]", mc_pubic.Groups["key"].Value), GetPublicTemplate(mc_pubic.Groups["key"].Value) ); mc_pubic = mc_pubic.NextMatch(); } return TmpString; }
private void updateFuncList() { Match match = new Regex(@"function \S*(?!=\()").Match(this.richTextBox1.Text); this.listBox1.Items.Clear(); this.funcList.Clear(); this.funcLine.Clear(); while (match.Success) { string item = match.Value.Split(new char[] { ' ' })[1]; this.listBox1.Items.Add(item); this.funcList.Add(item); this.funcLine.Add(this.richTextBox1.GetLineFromCharIndex(match.Index).ToString()); match = match.NextMatch(); } }
private static List<int> ReadFeatureVectors(StreamReader reader) { string line; List<int> feature_vectors = new List<int>(); while ((line = reader.ReadLine()) != null) { if (!line.StartsWith("#")) { Match label_match = new Regex(@"^(?<label>[+-]?\d+([.]\d+)?)(\s|$)").Match(line); Debug.Assert(label_match.Success); int label = Convert.ToInt32(label_match.Result("${label}")); Match match = new Regex(@"(?<feature>\d+):(?<weight>[-]?[\d\.]+)").Match(line); List<int> features = new List<int>(); List<float> weights = new List<float>(); while (match.Success) { int feature = Convert.ToInt32(match.Result("${feature}")); float weight = Convert.ToSingle(match.Result("${weight}"), System.Globalization.CultureInfo.InvariantCulture); match = match.NextMatch(); features.Add(feature); weights.Add(weight); } int vec_id = SvmLightLib.NewFeatureVector(features.Count, features.ToArray(), weights.ToArray(), label); feature_vectors.Add(vec_id); } } return feature_vectors; }
/// <summary> /// 获取字符串中得匹配结果 /// </summary> /// <param name="input">源字符串</param> /// <param name="pattern">正则表达式</param> /// <returns>返回的结果集</returns> public static List<string> GetMatch(this string input, string pattern) { List<string> result = new List<string>(); Match m = new Regex(pattern, RegexOptions.IgnoreCase).Match(input); while (m.Success) { result.Add(m.Groups["key"].Value); m = m.NextMatch(); } return result; }
private void ParseUri(MyUri uri, ref MyWebRequest request) { string str = ""; if ((request != null) && request.response.KeepAlive) { str = str + "连接转至: " + uri.Host + "\r\n\r\n"; } else { str = str + "连接: " + uri.Host + "\r\n\r\n"; } ListViewItem item = null; Monitor.Enter(this.listViewThreads); try { item = this.listViewThreads.Items[int.Parse(Thread.CurrentThread.Name)]; item.SubItems[1].Text = uri.Depth.ToString(); item.ImageIndex = 1; item.BackColor = System.Drawing.Color.WhiteSmoke; item.SubItems[2].Text = "正在连接"; item.ForeColor = System.Drawing.Color.Red; item.SubItems[3].Text = uri.AbsoluteUri; item.SubItems[4].Text = ""; item.SubItems[5].Text = ""; } catch (Exception) { } Monitor.Exit(this.listViewThreads); try { object obj2; request = MyWebRequest.Create(uri, request, this.KeepAlive); request.Timeout = this.RequestTimeout * 0x3e8; MyWebResponse response = request.GetResponse(); str = str + request.Header + response.Header; if (!response.ResponseUri.Equals(uri)) { this.EnqueueUri(new MyUri(response.ResponseUri.AbsoluteUri), true); obj2 = str; str = string.Concat(new object[] { obj2, "重定向到: ", response.ResponseUri, "\r\n" }); request = null; } else { if ((!this.AllMIMETypes && (response.ContentType != null)) && (this.MIMETypes.Length > 0)) { string str2 = response.ContentType.ToLower(); int index = str2.IndexOf(';'); if (index != -1) { str2 = str2.Substring(0, index); } if ((str2.IndexOf('*') == -1) && ((index = this.MIMETypes.IndexOf(str2)) == -1)) { this.LogError(uri.AbsoluteUri, str + "\r\nUnlisted Content-Type (" + str2 + "), check settings."); request = null; return; } Match match = new Regex(@"\d+").Match(this.MIMETypes, index); int num3 = int.Parse(match.Value) * 0x400; int num4 = int.Parse(match.NextMatch().Value) * 0x400; if ((num3 < num4) && ((response.ContentLength < num3) || (response.ContentLength > num4))) { this.LogError(uri.AbsoluteUri, string.Concat(new object[] { str, "\r\nContentLength limit error (", response.ContentLength, ")" })); request = null; return; } } string[] strArray = new string[] { ".gif", ".jpg", ".css", ".zip", ".exe" }; bool flag = true; foreach (string str3 in strArray) { if (uri.AbsoluteUri.ToLower().EndsWith(str3)) { flag = false; break; } } foreach (string str3 in this.ExcludeFiles) { if ((str3.Trim().Length > 0) && uri.AbsoluteUri.ToLower().EndsWith(str3)) { flag = false; break; } } string strBody = uri.ToString(); if (this.Compared(uri.LocalPath.Substring(uri.LocalPath.LastIndexOf('.') + 1).ToLower()) && (uri.ToString().Substring(uri.ToString().Length - 1, 1) != "/")) { this.LogError("丢弃--非网页文件", strBody); } else { int num5; UriKind absolute = UriKind.Absolute; if (!string.IsNullOrEmpty(strBody) && Uri.IsWellFormedUriString(strBody, absolute)) { string page = GetPage(strBody); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); Html html = new Html { Web = page, Url = strBody }; CommonAnalyze analyze = new CommonAnalyze(); analyze.LoadHtml(html); Net.LikeShow.ContentAnalyze.Document result = analyze.GetResult(); stopwatch.Stop(); string bt = result.Title.Replace("[(title)]", ""); switch (bt) { case null: case "": bt = result.Doc.Substring(20).ToString(); break; } if ((result.Doc == null) || (result.Doc == "")) { this.LogError("丢弃--空内容或非内空页", strBody); } else { Lucene.Net.Documents.Document document3; string str7 = result.Doc + bt; if (this.cgcount >= 10) { string keywords = this.MD5string(result.Doc.ToString()); string keyWordsSplitBySpace = ""; IndexSearcher searcher = new IndexSearcher(this.path); keyWordsSplitBySpace = GetKeyWordsSplitBySpace(keywords, new KTDictSegTokenizer()); Query query = new QueryParser("J_md5_bai", new KTDictSegAnalyzer(true)).Parse(keyWordsSplitBySpace); if (searcher.Search(query).Doc(0).Get("J_md5_bai") == keywords) { this.LogError("排除--重复", strBody); } else { this.cgcount++; this.LogUri(bt, "引索完成"); document3 = new Lucene.Net.Documents.Document(); document3.Add(new Field("分类", this.page_py, Field.Store.YES, Field.Index.TOKENIZED)); document3.Add(new Field("J_title_bai", bt, Field.Store.YES, Field.Index.TOKENIZED)); document3.Add(new Field("J_msgContent_bai", str7, Field.Store.YES, Field.Index.TOKENIZED)); document3.Add(new Field("J_SiteType_bai", result.SiteType.ToString(), Field.Store.YES, Field.Index.NO)); document3.Add(new Field("J_URL_bai", strBody, Field.Store.YES, Field.Index.NO)); document3.Add(new Field("J_addtime_bai", DateTime.Now.ToShortDateString(), Field.Store.YES, Field.Index.NO)); document3.Add(new Field("J_md5_bai", this.MD5string(result.Doc.ToString()), Field.Store.YES, Field.Index.TOKENIZED)); this.writer.AddDocument(document3); } } else { this.cgcount++; this.LogUri(bt, "引索完成"); document3 = new Lucene.Net.Documents.Document(); document3.Add(new Field("分类", this.page_py, Field.Store.YES, Field.Index.TOKENIZED)); document3.Add(new Field("J_title_bai", bt, Field.Store.YES, Field.Index.TOKENIZED)); document3.Add(new Field("J_msgContent_bai", str7, Field.Store.YES, Field.Index.TOKENIZED)); document3.Add(new Field("J_SiteType_bai", result.SiteType.ToString(), Field.Store.YES, Field.Index.NO)); document3.Add(new Field("J_URL_bai", strBody, Field.Store.YES, Field.Index.NO)); document3.Add(new Field("J_addtime_bai", DateTime.Now.ToShortDateString(), Field.Store.YES, Field.Index.NO)); document3.Add(new Field("J_md5_bai", this.MD5string(result.Doc.ToString()), Field.Store.YES, Field.Index.TOKENIZED)); this.writer.AddDocument(document3); } } } item.SubItems[2].Text = "正在下载"; item.ForeColor = System.Drawing.Color.Black; string input = ""; byte[] buffer = new byte[0x2800]; int nNum = 0; while ((num5 = response.socket.Receive(buffer, 0, 0x2800, SocketFlags.None)) > 0) { nNum += num5; if (flag) { input = input + Encoding.ASCII.GetString(buffer, 0, num5); } item.SubItems[4].Text = this.Commas(nNum); if (response.ContentLength > 0) { item.SubItems[5].Text = '%' + ((100 - (((response.ContentLength - nNum) * 100) / response.ContentLength))).ToString(); } if ((response.KeepAlive && (nNum >= response.ContentLength)) && (response.ContentLength > 0)) { break; } } if (response.KeepAlive) { str = str + "Connection kept alive to be used in subpages.\r\n"; } else { response.Close(); str = str + "Connection closed.\r\n"; } this.FileCount++; this.ByteCount += nNum; if ((this.ThreadsRunning && flag) && (uri.Depth < this.WebDepth)) { str = str + "\r\nParsing page ...\r\n"; string pattern = "(href|HREF|src|SRC)[ ]*=[ ]*[\"'][^\"'#>]+[\"']"; MatchCollection matchs = new Regex(pattern).Matches(input); obj2 = str; str = string.Concat(new object[] { obj2, "Found: ", matchs.Count, " ref(s)\r\n" }); this.URLCount += matchs.Count; foreach (Match match in matchs) { pattern = match.Value.Substring(match.Value.IndexOf('=') + 1).Trim(new char[] { '"', '\'', '#', ' ', '>' }); try { if (!(((pattern.IndexOf("..") == -1) && !pattern.StartsWith("/")) && pattern.StartsWith("http://"))) { pattern = new Uri(uri, pattern).AbsoluteUri; } this.Normalize(ref pattern); MyUri uri2 = new MyUri(pattern); if ((((uri2.Scheme != Uri.UriSchemeHttp) && (uri2.Scheme != Uri.UriSchemeHttps)) || ((uri2.Host.Split(new char[] { '.' })[1] != this.urllhost[1]) && this.KeepSameServer)) || !this.Compared_jpg(uri2.LocalPath.Substring(uri2.LocalPath.LastIndexOf('.') + 1).ToLower())) { continue; } Global.URL = uri2.ToString(); if ((Global.BXBH != "") && (Redspider_link.bxbh() == 2)) { continue; } uri2.Depth = uri.Depth + 1; if (this.EnqueueUri(uri2, true)) { str = str + uri2.AbsoluteUri + "\r\n"; } } catch (Exception) { } } } } } } catch (Exception exception) { this.LogError(uri.AbsoluteUri, str + exception.Message); request = null; } finally { this.EraseItem(item); } }
/// <summary> /// 序列化HTML表单 /// </summary> /// <param name="html"></param> /// <param name="id"></param> /// <returns></returns> public static NameValueCollection SerializeForm(this string html, string id) { string strForm = ""; if (id[0] == '#') { strForm = html.GetMatch(string.Format("<form[\\s\\S]*?id=\"{0}\"[\\s\\S]*?>(?<key>[\\s\\S]*?)</form>", id.Replace("#", "")))[0]; } else if (id[0] == '.') { strForm = html.GetMatch(string.Format("<form[\\s\\S]*?class=\"{0}\"[\\s\\S]*?>(?<key>[\\s\\S]*?)</form>", id.Replace(".", "")))[0]; } else if (id[0] == '@') { strForm = html.GetMatch(string.Format("<form[\\s\\S]*?name=\"{0}\"[\\s\\S]*?>(?<key>[\\s\\S]*?)</form>", id.Replace("@", "")))[0]; } else { strForm = html.GetMatch(string.Format("<form[\\s\\S]*?id=\"{0}\"[\\s\\S]*?>(?<key>[\\s\\S]*?)</form>", id.Replace("#", "")))[0]; } Match match_Input = new Regex("<input .*?>|<textarea[\\w\\W]*?</textarea>").Match(strForm); NameValueCollection nv = new NameValueCollection(); while (match_Input.Success) { string ele = match_Input.Groups[0].Value.ToS(); if ((ele.GetHtmlElementAttribute("type") == "checkbox" || ele.GetHtmlElementAttribute("type") == "radio") && ele.GetHtmlElementAttribute("checked") != "checked") { match_Input = match_Input.NextMatch(); continue; } else { nv.Add(ele.GetHtmlElementAttribute("name"), ele.GetHtmlElementAttribute("value")); } match_Input = match_Input.NextMatch(); } return nv; }
private void getNewBoxName(string strfolder) { int index = 0; try { string input = ""; if (this.urls.IndexOf("mail.163.com") != -1) { input = strfolder; string pattern = "<object>[\\s\\S]+?name=\"id\">(?<BoxID>[\\d]+)?<\\/int>[\\s\\S]+?name=\"name\">(?<BoxName>[^<]+)?<\\/str[\\s\\S]+?<\\/object>"; Match match = new Regex(pattern).Match(input); if (match.Length < 1) { base.ShowMessage("取箱子失败!"); } else { index = 0; while (match.Success) { this.boxList[index].boxname = match.Groups["BoxName"].Value; this.boxList[index].boxid = match.Groups["BoxID"].Value; match = match.NextMatch(); index++; } for (int i = 0; i < index; i++) { this.getNewMailId(this.urls, this.boxList[i].boxname, this.boxList[i].boxid); } } } else { base.ShowMessage("取箱子失败!"); } } catch (Exception exception) { base.ShowMessage("取箱子失败!" + exception.Message); } }