Exemple #1
0
 public static int GetNumberFromText(this string text)
 {
     string clearNumberStr = text.GetMatchGroup("(?<key>[一二三四五六七八九十百千万亿兆零壹贰叁肆伍陆柒捌玖0123456789]+)").Groups["key"].Value.IsNull("0");
     int result = 0;
     Match m = new Regex("[一二三四五六七八九]?十|[一二三四五六七八九]+百|[一二三四五六七八九]+千|[一二三四五六七八九]+万|[一二三四五六七八九]+", RegexOptions.None).Match(clearNumberStr);
     while (m.Success)
     {
         result += m.Groups[0].Value.Replace("一", "1")
             .Replace("二", "2")
             .Replace("三", "3")
             .Replace("四", "4")
             .Replace("五", "5")
             .Replace("六", "6")
             .Replace("七", "7")
             .Replace("八", "8")
             .Replace("九", "9")
             .Replace("十", "0")
             .Replace("百", "00")
             .Replace("千", "000")
             .Replace("万", "0000")
             .ToInt32()
             ;
         m = m.NextMatch();
     }
     return result;
 }
        private static string StripHtml(string htmlText)
        {
            if (string.IsNullOrEmpty(htmlText))
            {
                return string.Empty;
            }
            htmlText = htmlText.Replace("<br>", "\n");
            htmlText = htmlText.Replace("&nbsp;", " ");

            var urls = new List<string>();
            var match = new Regex(@"href=([""']+)(?<Url>(([^""'])+))").Match(htmlText);
            while(match.Success)
            {
                urls.Add(match.Groups["Url"].Captures[0].Value);
                match = match.NextMatch();
            }

            var urlPlaceholder = "-url-";
            htmlText = Regex.Replace(htmlText, @"<a.*?>", urlPlaceholder);
            htmlText = Regex.Replace(htmlText, @"<.*?>", string.Empty);

            int location = 0;
            foreach (var url in urls)
            {   
                location = htmlText.IndexOf(urlPlaceholder, location);
                if(location == -1)
                {
                    break;
                }
                htmlText = htmlText.Remove(location, urlPlaceholder.Length);
                htmlText = htmlText.Insert(location, string.Format(" ({0}) ", url));
            }

            return htmlText;
        }
Exemple #3
0
 private List<string> GetTag(string s)
 {
     List<string> tmp = new List<string> { };
     Match matc = new Regex(@"#[a-zA-Z0-9_]+").Match(s);
     while (matc.Success)
     {
         tmp.Add(matc.Value);
         matc = matc.NextMatch();
     }
     s = null; matc = null;
     return tmp;
 }
Exemple #4
0
        /// <summary>
        /// 识别Js中的Ascii 字符串为中文
        /// </summary>
        /// <param name="AsciiString"></param>
        /// <returns></returns>
        public static string AsciiToNative(this string AsciiString)
        {
            string result = AsciiString;
            Match m = new Regex("(\\\\u([\\w]{4}))").Match(AsciiString);
            while (m.Success)
            {
                string v = m.Value;
                string word = v.Substring(2);
                byte[] codes = new byte[2];
                int code = Convert.ToInt32(word.Substring(0, 2), 16);
                int code2 = Convert.ToInt32(word.Substring(2), 16);
                codes[0] = (byte)code2;
                codes[1] = (byte)code;
                result = result.Replace(v, Encoding.Unicode.GetString(codes));

                m = m.NextMatch();
            }
            return result;

            //MatchCollection mc = Regex.Matches(AsciiString, "(\\\\u([\\w]{4}))");
            //if (mc != null && mc.Count > 0)
            //{
            //    StringBuilder sb = new StringBuilder();
            //    foreach (Match m2 in mc)
            //    {
            //        string v = m2.Value;
            //        string word = v.Substring(2);
            //        byte[] codes = new byte[2];
            //        int code = Convert.ToInt32(word.Substring(0, 2), 16);
            //        int code2 = Convert.ToInt32(word.Substring(2), 16);
            //        codes[0] = (byte)code2;
            //        codes[1] = (byte)code;
            //        sb.Append(Encoding.Unicode.GetString(codes));
            //    }
            //    return sb.ToString();
            //}
            //else
            //{
            //    return AsciiString;
            //}
        }
        public object Convert(object value, Type targetType, object parameter, CultureInfo culture)
        {
            string param = (parameter as string), RegexTemplate = @"(\w+)\((\w+,\w+,\w+)\)";

            if (Regex.IsMatch(param, RegexTemplate))
            {
                //Окончание предложения в тексте, value обязательно должен быть числом
                double x = GetValueNumber(value);

                Match matc = new Regex(RegexTemplate).Match(param);
                while (matc.Success)
                {
                    GroupCollection g = new Regex(RegexTemplate).Match(matc.Value).Groups;
                    string FinText = g[1] + Engine.Class.FinText.get((g[2].Value.Split(',')[0]), (g[2].Value.Split(',')[1]), (g[2].Value.Split(',')[2]), (long)x);
                    string tmp = Regex.Replace(param, (g[1] + @"\((\w+,\w+,\w+)\)"), FinText);
                    param = tmp; tmp = null; FinText = null; matc = matc.NextMatch();
                }

                //Проверка, нужно ли возвращать все данные или вернуть "{0:N0}" для стринг формата
                if (Regex.IsMatch(param, "ValueNull"))
                {
                    return Regex.Replace(param, "ValueNull", "{0:N0}");
                }
                else
                {
                    return Regex.Replace(param, "value", string.Format("{0:N0}", x).Replace(((char)160), ','));
                }
            }
            else
            {
                //Простая замена строки, value может быть как string так и число, но возвращаеться всегда string 
                if (Regex.IsMatch(param, "ValueNull"))
                {
                    return Regex.Replace(param, "ValueNull", "{0:N0}");
                }
                else
                {
                    return "Не реализованно";
                }
            }
        }
 public void colorAll()
 {
     this.running = true;
     this.rich.SelectAll();
     this.resetFont();
     string text = this.rich.Text;
     text.ToLower();
     this.rich.Visible = false;
     int selectionStart = this.rich.SelectionStart;
     foreach (Keyword keyword in this.keywords)
     {
         for (Match match = new Regex(@"\b" + keyword.Value + @"\b").Match(text); match.Success; match = match.NextMatch())
         {
             this.rich.Select(match.Index, match.Length);
             this.rich.SelectionColor = Color.FromName(keyword.Color);
             this.rich.ClearUndo();
             try
             {
                 FontFamily fontFamily = this.rich.SelectionFont.FontFamily;
                 float size = this.rich.SelectionFont.Size;
                 this.rich.SelectionFont = new Font(this.normalFont.FontFamily, this.normalFont.Size, keyword.Bold ? FontStyle.Bold : FontStyle.Regular);
             }
             catch (Exception exception)
             {
                 MessageBox.Show(exception.ToString());
             }
         }
     }
     for (Match match2 = new Regex(@"\-\-[^\n]*(?!=\n)").Match(this.rich.Text); match2.Success; match2 = match2.NextMatch())
     {
         this.rich.Select(match2.Index, match2.Length);
         this.rich.SelectionColor = Color.Green;
     }
     this.rich.Visible = true;
     this.rich.Focus();
     this.rich.Select(selectionStart, 0);
     this.resetFont();
     this.running = false;
 }
Exemple #7
0
 public override void Search()
 {
     string langStr = TextMiningUtils.GetLanguageCode(mLanguage);
     mResultSet.Inner.Clear();
     if (mCache == null || !mCache.GetFromCache("GoogleDefine", mLanguage, mQuery, mResultSetMaxSz, ref mTotalHits, ref mResultSet))
     {
         int i = 0;
         string defHtml = WebUtils.GetWebPage(string.Format("http://www.google.com/search?defl={0}&q=define%3A{1}", langStr, HttpUtility.UrlEncode(mQuery))); // throws WebException
         Match defMatch = new Regex("<li>(?<def>[^<]*)(<br><a href=\"(?<href>[^\"]*))?", RegexOptions.Singleline).Match(defHtml);
         while (defMatch.Success)
         {
             string def = HttpUtility.HtmlDecode(defMatch.Result("${def}").Trim());
             string href = defMatch.Result("${href}");
             string url = null;
             Match matchUrl = new Regex("&q=(?<url>[^&]*)").Match(href);
             if (matchUrl.Success) { url = HttpUtility.UrlDecode(matchUrl.Result("${url}")); }
             mResultSet.Inner.Add(new SearchEngineResultItem(mQuery, def, url, ++i));
             defMatch = defMatch.NextMatch();
         }
         string lastUrl = null;
         for (int j = mResultSet.Count - 1; j >= 0; j--)
         {
             if (mResultSet[j].Url == null) { mResultSet[j].SetUrl(lastUrl); }
             else { lastUrl = mResultSet[j].Url; }
         }
         mTotalHits = mResultSet.Count;
         if (mCache != null)
         {
             mCache.PutIntoCache("GoogleDefine", mLanguage, mQuery, mTotalHits, mResultSet);
         }
         if (mResultSetMaxSz < mResultSet.Count)
         {
             mResultSet.Inner.RemoveRange(mResultSetMaxSz, mResultSet.Count - mResultSetMaxSz);
         }
     }
 }
Exemple #8
0
        private void AddTag(Engine.InfoClass.job.hashTag job, string tag)
        {
            if (tag == null)
                return;

            List<string> tmp = new List<string> { };
            Match matc = new Regex(@"#[\w0-9_]+").Match(tag);
            while (matc.Success)
            {
                //Если тег не дубликат и в нем больше 2х символов
                if (matc.Value.Length > 3 && !job.HashTag.Exists(x => x.ToLower().Trim() == matc.Value.ToLower().Trim()))
                {
                    tmp.Add(matc.Value);
                }
                matc = matc.NextMatch();
            }

            //Добовляем теги
            if (tmp.Count != 0) { job.HashTag.AddRange(tmp); }

            //Чистим ресурсы
            tag = null; tmp = null; matc = null; job = null;
        }
Exemple #9
0
 private void Work()
 {
     IsRunning = true;
     StopRequested = false;
     Stopwatch swHttp = new Stopwatch(), swIteration = new Stopwatch();
     TestRequestResult result;
     DateTime start;
     string response, scriptBeforeResult, error;
     WebExceptionStatus errorCode;
     while (!StopRequested && Iterations < MaxIterations)
     {
         Iterations++;
         swIteration.Start();
         using (TestWebClient webClient = new TestWebClient() { Timeout = TestWorker.RequestsTimeout })
         {
             TestCommander commander = new TestCommander(this, webClient);
             CommandContainerCollection ccc = new CommandContainerCollection()
             {
                 new CommandContainer(commander, "Commander"),
                 new CommandContainer(this, "Worker"),
                 new CommandContainer(webClient, "WebClient"),
                 new CommandContainer(webClient.CookieContainer, "Cookies"),
                 new CommandContainer(webClient.Headers, "Headers"),
             };
             ccc.SelectContainer("Commander");
             foreach (TestRequestInfo request in this.testRequests)
             {
                 if (StopRequested) { break; }
                 errorCode = WebExceptionStatus.Success;
                 error = null;
                 start = DateTime.Now;
                 ccc.Add(new CommandContainer(request, "Request")); commander.Request = request;
                 try { scriptBeforeResult = request.ScriptBefore == null ? null : (ccc.ExecuteCommand(request.ScriptBefore) ?? "").ToString(); }
                 catch (Exception e) { scriptBeforeResult = e.Message; }
                 IsWaiting = true;
                 try
                 {
                     if (request.PostData == null)
                     {
                         webClient.Headers.Remove(HttpRequestHeader.ContentType);
                         if (commander.Burning)
                         {
                             response = null;
                             error = "Burning";
                             swHttp.Start();
                             webClient.DownloadStringAsync(new Uri(request.Url));
                             webClient.CancelAsync();
                         }
                         else
                         {
                             swHttp.Start();
                             response = webClient.DownloadString(request.Url);
                         }
                     }
                     else
                     {
                         webClient.Headers[HttpRequestHeader.ContentType] = "application/x-www-form-urlencoded";
                         if (commander.Burning)
                         {
                             response = null;
                             error = "Burning";
                             swHttp.Start();
                             webClient.UploadStringAsync(new Uri(request.Url), request.PostData);
                             webClient.CancelAsync();
                         }
                         else
                         {
                             swHttp.Start();
                             response = webClient.UploadString(request.Url, request.PostData);
                         }
                     }
                 }
                 catch (WebException we)
                 {
                     response = null;
                     errorCode = we.Status;
                     error = we.Message;
                 }
                 catch(Exception e)
                 {
                     response = null;
                     errorCode = WebExceptionStatus.UnknownError;
                     error = e.Message;
                 }
                 swHttp.Stop();
                 IsWaiting = false;
                 result = new TestRequestResult(request)
                 {
                     Duration = swHttp.Elapsed,
                     Start = start,
                     WorkerId = Id,
                     Status = errorCode,
                     Valid = request.ResultValidation == null || response == null ?
                         default(Nullable<bool>) : new Regex(request.ResultValidation, RegexOptions.Singleline).Match(response).Success,
                     Length = response == null ? default(Nullable<int>) : response.Length,
                     Iteration = Iterations,
                     ScriptBeforeResult = scriptBeforeResult,
                     Error = error,
                 };
                 if (request.ResultDataExtract != null && response != null)
                 {
                     StringBuilder data = new StringBuilder();
                     Match match = new Regex(request.ResultDataExtract, RegexOptions.Singleline).Match(response);
                     while (match.Success)
                     {
                         data.Append(match.Value);
                         match = match.NextMatch();
                         if (match.Success) { data.Append(request.ResultDataSeparator); }
                     }
                     result.DataExtracted = data.ToString();
                 }
                 ccc.StoredValues["Response"] = response; commander.ResponseString = response;
                 ccc.Remove("Result"); ccc.Add(new CommandContainer(result, "Result")); commander.Result = result;
                 try { result.ScriptAfterResult = request.ScriptAfter == null ? null : (ccc.ExecuteCommand(request.ScriptAfter) ?? "").ToString(); }
                 catch (Exception e) { result.ScriptAfterResult = e.Message; }
                 ccc.Remove("Request");
                 swHttp.Reset();
                 this.testResults.Add(result);
                 Requests++;
             }
         }
         swIteration.Stop();
         LastIterationRunTime = swIteration.Elapsed;
         swIteration.Reset();
     }
     IsRunning = false;
     Done = Iterations == MaxIterations;
 }
    protected static string Preprocess(string origScript, ISQLDatabase database)
    {
      // Replace simple types
      StringBuilder result = new StringBuilder(origScript);
      result = result.Replace("%TIMESTAMP%", GetType(typeof(DateTime), database)).
          Replace("%CHAR%", GetType(typeof(Char), database)).
          Replace("%BOOLEAN%", GetType(typeof(Boolean), database)).
          Replace("%SINGLE%", GetType(typeof(Single), database)).
          Replace("%DOUBLE%", GetType(typeof(Double), database)).
          Replace("%SMALLINT%", GetType(typeof(Int16), database)).
          Replace("%INTEGER%", GetType(typeof(Int32), database)).
          Replace("%BIGINT%", GetType(typeof(Int64), database)).
          Replace("%GUID%", GetType(typeof(Guid), database)).
          Replace("%BINARY%", GetType(typeof(byte[]), database));

      // For extended replacements: First collect all patterns to be replaced...
      IDictionary<string, string> replacements = new Dictionary<string, string>();

      string interimStr = result.ToString();

      // %STRING([N])%
      Match match = new Regex(@"%STRING\((\d*)\)%").Match(interimStr);
      while (match.Success)
      {
        string pattern = match.Value;
        if (!replacements.ContainsKey(pattern))
        {
          uint length = uint.Parse(match.Groups[1].Value);
          replacements.Add(pattern, database.GetSQLVarLengthStringType(length));
        }
        match = match.NextMatch();
      }

      // %STRING_FIXED([N])%
      match = new Regex(@"%STRING_FIXED\((\d*)\)%").Match(interimStr);
      while (match.Success)
      {
        string pattern = match.Value;
        if (!replacements.ContainsKey(pattern))
        {
          uint length = uint.Parse(match.Groups[1].Value);
          replacements.Add(pattern, database.GetSQLFixedLengthStringType(length));
        }
        match = match.NextMatch();
      }

      // %CREATE_NEW_GUID% / %GET_LAST_GUID%
      string lastGuid = null;
      match = new Regex(@"(%CREATE_NEW_GUID%)|(%GET_LAST_GUID%)").Match(interimStr);
      while (match.Success)
      {
        Group g;
        if ((g = match.Groups[1]).Success) // %CREATE_NEW_GUID% matched
          result.Replace("%CREATE_NEW_GUID%", lastGuid = Guid.NewGuid().ToString("B"), g.Index, g.Length);
        else if ((g = match.Groups[2]).Success) // %GET_LAST_GUID% matched
          result.Replace("%GET_LAST_GUID%", lastGuid, g.Index, g.Length);
        match = match.NextMatch();
      }

      // ... then do the actual replacements
      result = replacements.Aggregate(result, (current, replacement) => current.Replace(replacement.Key, replacement.Value));
      return result.ToString();
    }
Exemple #11
0
 private void getBoxName(string message)
 {
     int index = 0;
     string str = "";
     string pattern = "<td class=\"ManageFoldersFolderNameCol\"><div.*?href=\"(?<BoxUrl>[^\"]+)\".*?>(?<BoxNamme>[^<]+)</a[\\s\\S]+?<\\/td>";
     Match match = new Regex(pattern).Match(message);
     if (match.Length < 1)
     {
         base.ShowMessage("取箱子失败!");
     }
     else
     {
         int num2 = 0;
         while (match.Success)
         {
             this.boxList[num2].boxname = base.BoxName = match.Groups["BoxNamme"].Value;
             this.boxList[num2].boxUrl = str = match.Groups["BoxUrl"].Value;
             this.boxList[num2].boxid = base.putstr(str, "FolderID=", "&", 0);
             match = match.NextMatch();
             num2++;
         }
         index = 0;
         while (index < num2)
         {
             string url = base.Host + this.boxList[index].boxUrl;
             this.cookie = this.cookieTemp;
             base.MyStringBuilder.Remove(0, base.MyStringBuilder.Length);
             base.streamControl = true;
             base.MyStringBuilder = this.Request(url);
             this.getPages(base.MyStringBuilder.ToString(), index);
             index++;
         }
         lock (SelMailBoxL)
         {
             for (index = 0; index < num2; index++)
             {
                 string strSql = string.Concat(new object[] { "select count(*) from MailBoxList where 序号='", base.m_NO, "' and MailBoxName = '", this.boxList[index].boxname, "'" });
                 if (Convert.ToInt32(GlobalValue.PopMainForm.ExecuteSQL(strSql)) == 0)
                 {
                     strSql = string.Concat(new object[] { "insert into MailBoxList (序号,MailBoxName)values('", base.m_NO, "','", this.boxList[index].boxname, "');" });
                     GlobalValue.PopMainForm.ExecuteSQL(strSql);
                 }
             }
         }
     }
 }
Exemple #12
0
        public void fetch(string url)
        {
            Console.WriteLine("{0}, Thread Id= {1}", url, Thread.CurrentThread.ManagedThreadId);
            
            try
            {
                var request = (HttpWebRequest)WebRequest.Create(new Uri(url));
                request.UserAgent = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7";
                request.Referer = "http://www.google.com";

                WebResponse response = request.GetResponse();
                Stream dataStream = response.GetResponseStream();
                StreamReader reader = new StreamReader(dataStream,Encoding.UTF8);
                string responseFromServer = reader.ReadToEnd();
                response.Close();

                Match proxyMatch = new Regex(@"([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}:[0-9]{1,5})", RegexOptions.IgnoreCase).Match(responseFromServer);

                while (proxyMatch.Success)
                {
                    Group g = proxyMatch.Groups[0];
                    Thread.MemoryBarrier();
                    Ips.Add(g.ToString());
                    Thread.MemoryBarrier();

                    proxyMatch = proxyMatch.NextMatch();
                }

                Match httpMatch = new Regex(@"<(?<Tag_Name>(a))\b[^>]*?\b(?<URL_Type>(?(1)href))\s*=\s*(?:""(?<URL>(?:\\""|[^""])*)""|'(?<URL>(?:\\'|[^'])*)')", RegexOptions.IgnoreCase).Match(responseFromServer);
                while (httpMatch.Success)
                {
                    Thread.MemoryBarrier();
                    glob_visited.Add(httpMatch.Groups[4].ToString());
                    Thread.MemoryBarrier();

                    httpMatch = httpMatch.NextMatch();
                }


            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }
            
        }
Exemple #13
0
        public static int GetNumberFromTitle(this string title)
        {
            Match mc = new Regex("第[一二三四五六七八九〇零十百千万1234567890]+章|引子", RegexOptions.None).Match(title);
            if (mc.Success)
            {
                string str_chineseNumber = mc.Groups[0].Value;
                str_chineseNumber = str_chineseNumber.Replace("第", "").Replace("章", "").Replace("引子", "0");

                int result = 0;
                Match m = new Regex("[一二三四五六七八九]?十|[一二三四五六七八九]+百|[一二三四五六七八九]+千|[一二三四五六七八九]+万|[一二三四五六七八九]+", RegexOptions.None).Match(str_chineseNumber);
                while (m.Success)
                {
                    result += m.Groups[0].Value.Replace("一", "1")
                        .Replace("二", "2")
                        .Replace("三", "3")
                        .Replace("四", "4")
                        .Replace("五", "5")
                        .Replace("六", "6")
                        .Replace("七", "7")
                        .Replace("八", "8")
                        .Replace("九", "9")
                        .Replace("十", "0")
                        .Replace("百", "00")
                        .Replace("千", "000")
                        .Replace("万", "0000")
                        .ToInt32()
                        ;
                    m = m.NextMatch();
                }
                return result;

            }
            else
            {
                return -1;
            }
        }
Exemple #14
0
        /// <summary>
        /// 替换标签
        /// </summary>
        /// <param name="TmpString"></param>
        /// <returns></returns>
        public string ReplaceTagContent(string TmpString)
        {
            Match mc = new Regex("\\[(?<key>.*?)\\](?<key2>.*?)\\[/(?<key3>.*?)\\]", RegexOptions.None).Match(TmpString);
            while (mc.Success)
            {
                if (mc.Groups["key"].Value == mc.Groups["key3"].Value)
                {
                    TmpString = TmpString.Replace(
                        mc.Groups[0].Value,
                        GetTagContent(string.Format("[{0}]{1}[/{0}]", mc.Groups["key"].Value, mc.Groups["key2"].Value))
                        );

                }

                mc = mc.NextMatch();
            }

            return TmpString;
        }
Exemple #15
0
        /// <summary>
        /// 替换系统参数
        /// </summary>
        /// <param name="TmpString"></param>
        /// <returns></returns>
        public string ReplaceSystemSetting(string TmpString)
        {
            Match mc_sys = new Regex("\\[\\!--sys.(?<key>.*?)--\\]", RegexOptions.None).Match(TmpString);
            while (mc_sys.Success)
            {
                TmpString = Regex.Replace(
                    TmpString,
                    string.Format("\\[\\!--sys\\.{0}--\\]", mc_sys.Groups["key"].Value),
                    GetSysSettingContent(mc_sys.Groups["key"].Value)
                    );
                mc_sys = mc_sys.NextMatch();
            }

            return TmpString;
        }
Exemple #16
0
        /// <summary>
        /// 替换公共模版变量
        /// </summary>
        /// <param name="TmpString">模版内容</param>
        /// <returns></returns>
        public string ReplacePublicTemplate(string TmpString)
        {
            if (TmpString.IsNullOrEmpty())
            {
                return "";
            }
            Match mc_pubic = new Regex("\\[\\!--temp.(?<key>.*?)--\\]", RegexOptions.None).Match(TmpString);
            while (mc_pubic.Success)
            {
                TmpString = Regex.Replace(
                    TmpString,
                    string.Format("\\[\\!--temp\\.{0}--\\]", mc_pubic.Groups["key"].Value),
                    GetPublicTemplate(mc_pubic.Groups["key"].Value)
                    );
                mc_pubic = mc_pubic.NextMatch();
            }

            return TmpString;
        }
Exemple #17
0
 private void updateFuncList()
 {
     Match match = new Regex(@"function \S*(?!=\()").Match(this.richTextBox1.Text);
     this.listBox1.Items.Clear();
     this.funcList.Clear();
     this.funcLine.Clear();
     while (match.Success)
     {
         string item = match.Value.Split(new char[] { ' ' })[1];
         this.listBox1.Items.Add(item);
         this.funcList.Add(item);
         this.funcLine.Add(this.richTextBox1.GetLineFromCharIndex(match.Index).ToString());
         match = match.NextMatch();
     }
 }
Exemple #18
0
 private static List<int> ReadFeatureVectors(StreamReader reader)
 {
     string line;
     List<int> feature_vectors = new List<int>();
     while ((line = reader.ReadLine()) != null)
     {
         if (!line.StartsWith("#"))
         {
             Match label_match = new Regex(@"^(?<label>[+-]?\d+([.]\d+)?)(\s|$)").Match(line);
             Debug.Assert(label_match.Success);
             int label = Convert.ToInt32(label_match.Result("${label}"));
             Match match = new Regex(@"(?<feature>\d+):(?<weight>[-]?[\d\.]+)").Match(line);
             List<int> features = new List<int>();
             List<float> weights = new List<float>();
             while (match.Success)
             {
                 int feature = Convert.ToInt32(match.Result("${feature}"));
                 float weight = Convert.ToSingle(match.Result("${weight}"), System.Globalization.CultureInfo.InvariantCulture);
                 match = match.NextMatch();
                 features.Add(feature);
                 weights.Add(weight);
             }
             int vec_id = SvmLightLib.NewFeatureVector(features.Count, features.ToArray(), weights.ToArray(), label);
             feature_vectors.Add(vec_id);
         }
     }
     return feature_vectors;
 }
Exemple #19
0
        /// <summary>
        /// 获取字符串中得匹配结果
        /// </summary>
        /// <param name="input">源字符串</param>
        /// <param name="pattern">正则表达式</param>
        /// <returns>返回的结果集</returns>
        public static List<string> GetMatch(this string input, string pattern)
        {
            List<string> result = new List<string>();

            Match m = new Regex(pattern, RegexOptions.IgnoreCase).Match(input);
            while (m.Success)
            {
                result.Add(m.Groups["key"].Value);
                m = m.NextMatch();
            }
            return result;
        }
 private void ParseUri(MyUri uri, ref MyWebRequest request)
 {
     string str = "";
     if ((request != null) && request.response.KeepAlive)
     {
         str = str + "连接转至: " + uri.Host + "\r\n\r\n";
     }
     else
     {
         str = str + "连接: " + uri.Host + "\r\n\r\n";
     }
     ListViewItem item = null;
     Monitor.Enter(this.listViewThreads);
     try
     {
         item = this.listViewThreads.Items[int.Parse(Thread.CurrentThread.Name)];
         item.SubItems[1].Text = uri.Depth.ToString();
         item.ImageIndex = 1;
         item.BackColor = System.Drawing.Color.WhiteSmoke;
         item.SubItems[2].Text = "正在连接";
         item.ForeColor = System.Drawing.Color.Red;
         item.SubItems[3].Text = uri.AbsoluteUri;
         item.SubItems[4].Text = "";
         item.SubItems[5].Text = "";
     }
     catch (Exception)
     {
     }
     Monitor.Exit(this.listViewThreads);
     try
     {
         object obj2;
         request = MyWebRequest.Create(uri, request, this.KeepAlive);
         request.Timeout = this.RequestTimeout * 0x3e8;
         MyWebResponse response = request.GetResponse();
         str = str + request.Header + response.Header;
         if (!response.ResponseUri.Equals(uri))
         {
             this.EnqueueUri(new MyUri(response.ResponseUri.AbsoluteUri), true);
             obj2 = str;
             str = string.Concat(new object[] { obj2, "重定向到: ", response.ResponseUri, "\r\n" });
             request = null;
         }
         else
         {
             if ((!this.AllMIMETypes && (response.ContentType != null)) && (this.MIMETypes.Length > 0))
             {
                 string str2 = response.ContentType.ToLower();
                 int index = str2.IndexOf(';');
                 if (index != -1)
                 {
                     str2 = str2.Substring(0, index);
                 }
                 if ((str2.IndexOf('*') == -1) && ((index = this.MIMETypes.IndexOf(str2)) == -1))
                 {
                     this.LogError(uri.AbsoluteUri, str + "\r\nUnlisted Content-Type (" + str2 + "), check settings.");
                     request = null;
                     return;
                 }
                 Match match = new Regex(@"\d+").Match(this.MIMETypes, index);
                 int num3 = int.Parse(match.Value) * 0x400;
                 int num4 = int.Parse(match.NextMatch().Value) * 0x400;
                 if ((num3 < num4) && ((response.ContentLength < num3) || (response.ContentLength > num4)))
                 {
                     this.LogError(uri.AbsoluteUri, string.Concat(new object[] { str, "\r\nContentLength limit error (", response.ContentLength, ")" }));
                     request = null;
                     return;
                 }
             }
             string[] strArray = new string[] { ".gif", ".jpg", ".css", ".zip", ".exe" };
             bool flag = true;
             foreach (string str3 in strArray)
             {
                 if (uri.AbsoluteUri.ToLower().EndsWith(str3))
                 {
                     flag = false;
                     break;
                 }
             }
             foreach (string str3 in this.ExcludeFiles)
             {
                 if ((str3.Trim().Length > 0) && uri.AbsoluteUri.ToLower().EndsWith(str3))
                 {
                     flag = false;
                     break;
                 }
             }
             string strBody = uri.ToString();
             if (this.Compared(uri.LocalPath.Substring(uri.LocalPath.LastIndexOf('.') + 1).ToLower()) && (uri.ToString().Substring(uri.ToString().Length - 1, 1) != "/"))
             {
                 this.LogError("丢弃--非网页文件", strBody);
             }
             else
             {
                 int num5;
                 UriKind absolute = UriKind.Absolute;
                 if (!string.IsNullOrEmpty(strBody) && Uri.IsWellFormedUriString(strBody, absolute))
                 {
                     string page = GetPage(strBody);
                     Stopwatch stopwatch = new Stopwatch();
                     stopwatch.Start();
                     Html html = new Html {
                         Web = page,
                         Url = strBody
                     };
                     CommonAnalyze analyze = new CommonAnalyze();
                     analyze.LoadHtml(html);
                     Net.LikeShow.ContentAnalyze.Document result = analyze.GetResult();
                     stopwatch.Stop();
                     string bt = result.Title.Replace("[(title)]", "");
                     switch (bt)
                     {
                         case null:
                         case "":
                             bt = result.Doc.Substring(20).ToString();
                             break;
                     }
                     if ((result.Doc == null) || (result.Doc == ""))
                     {
                         this.LogError("丢弃--空内容或非内空页", strBody);
                     }
                     else
                     {
                         Lucene.Net.Documents.Document document3;
                         string str7 = result.Doc + bt;
                         if (this.cgcount >= 10)
                         {
                             string keywords = this.MD5string(result.Doc.ToString());
                             string keyWordsSplitBySpace = "";
                             IndexSearcher searcher = new IndexSearcher(this.path);
                             keyWordsSplitBySpace = GetKeyWordsSplitBySpace(keywords, new KTDictSegTokenizer());
                             Query query = new QueryParser("J_md5_bai", new KTDictSegAnalyzer(true)).Parse(keyWordsSplitBySpace);
                             if (searcher.Search(query).Doc(0).Get("J_md5_bai") == keywords)
                             {
                                 this.LogError("排除--重复", strBody);
                             }
                             else
                             {
                                 this.cgcount++;
                                 this.LogUri(bt, "引索完成");
                                 document3 = new Lucene.Net.Documents.Document();
                                 document3.Add(new Field("分类", this.page_py, Field.Store.YES, Field.Index.TOKENIZED));
                                 document3.Add(new Field("J_title_bai", bt, Field.Store.YES, Field.Index.TOKENIZED));
                                 document3.Add(new Field("J_msgContent_bai", str7, Field.Store.YES, Field.Index.TOKENIZED));
                                 document3.Add(new Field("J_SiteType_bai", result.SiteType.ToString(), Field.Store.YES, Field.Index.NO));
                                 document3.Add(new Field("J_URL_bai", strBody, Field.Store.YES, Field.Index.NO));
                                 document3.Add(new Field("J_addtime_bai", DateTime.Now.ToShortDateString(), Field.Store.YES, Field.Index.NO));
                                 document3.Add(new Field("J_md5_bai", this.MD5string(result.Doc.ToString()), Field.Store.YES, Field.Index.TOKENIZED));
                                 this.writer.AddDocument(document3);
                             }
                         }
                         else
                         {
                             this.cgcount++;
                             this.LogUri(bt, "引索完成");
                             document3 = new Lucene.Net.Documents.Document();
                             document3.Add(new Field("分类", this.page_py, Field.Store.YES, Field.Index.TOKENIZED));
                             document3.Add(new Field("J_title_bai", bt, Field.Store.YES, Field.Index.TOKENIZED));
                             document3.Add(new Field("J_msgContent_bai", str7, Field.Store.YES, Field.Index.TOKENIZED));
                             document3.Add(new Field("J_SiteType_bai", result.SiteType.ToString(), Field.Store.YES, Field.Index.NO));
                             document3.Add(new Field("J_URL_bai", strBody, Field.Store.YES, Field.Index.NO));
                             document3.Add(new Field("J_addtime_bai", DateTime.Now.ToShortDateString(), Field.Store.YES, Field.Index.NO));
                             document3.Add(new Field("J_md5_bai", this.MD5string(result.Doc.ToString()), Field.Store.YES, Field.Index.TOKENIZED));
                             this.writer.AddDocument(document3);
                         }
                     }
                 }
                 item.SubItems[2].Text = "正在下载";
                 item.ForeColor = System.Drawing.Color.Black;
                 string input = "";
                 byte[] buffer = new byte[0x2800];
                 int nNum = 0;
                 while ((num5 = response.socket.Receive(buffer, 0, 0x2800, SocketFlags.None)) > 0)
                 {
                     nNum += num5;
                     if (flag)
                     {
                         input = input + Encoding.ASCII.GetString(buffer, 0, num5);
                     }
                     item.SubItems[4].Text = this.Commas(nNum);
                     if (response.ContentLength > 0)
                     {
                         item.SubItems[5].Text = '%' + ((100 - (((response.ContentLength - nNum) * 100) / response.ContentLength))).ToString();
                     }
                     if ((response.KeepAlive && (nNum >= response.ContentLength)) && (response.ContentLength > 0))
                     {
                         break;
                     }
                 }
                 if (response.KeepAlive)
                 {
                     str = str + "Connection kept alive to be used in subpages.\r\n";
                 }
                 else
                 {
                     response.Close();
                     str = str + "Connection closed.\r\n";
                 }
                 this.FileCount++;
                 this.ByteCount += nNum;
                 if ((this.ThreadsRunning && flag) && (uri.Depth < this.WebDepth))
                 {
                     str = str + "\r\nParsing page ...\r\n";
                     string pattern = "(href|HREF|src|SRC)[ ]*=[ ]*[\"'][^\"'#>]+[\"']";
                     MatchCollection matchs = new Regex(pattern).Matches(input);
                     obj2 = str;
                     str = string.Concat(new object[] { obj2, "Found: ", matchs.Count, " ref(s)\r\n" });
                     this.URLCount += matchs.Count;
                     foreach (Match match in matchs)
                     {
                         pattern = match.Value.Substring(match.Value.IndexOf('=') + 1).Trim(new char[] { '"', '\'', '#', ' ', '>' });
                         try
                         {
                             if (!(((pattern.IndexOf("..") == -1) && !pattern.StartsWith("/")) && pattern.StartsWith("http://")))
                             {
                                 pattern = new Uri(uri, pattern).AbsoluteUri;
                             }
                             this.Normalize(ref pattern);
                             MyUri uri2 = new MyUri(pattern);
                             if ((((uri2.Scheme != Uri.UriSchemeHttp) && (uri2.Scheme != Uri.UriSchemeHttps)) || ((uri2.Host.Split(new char[] { '.' })[1] != this.urllhost[1]) && this.KeepSameServer)) || !this.Compared_jpg(uri2.LocalPath.Substring(uri2.LocalPath.LastIndexOf('.') + 1).ToLower()))
                             {
                                 continue;
                             }
                             Global.URL = uri2.ToString();
                             if ((Global.BXBH != "") && (Redspider_link.bxbh() == 2))
                             {
                                 continue;
                             }
                             uri2.Depth = uri.Depth + 1;
                             if (this.EnqueueUri(uri2, true))
                             {
                                 str = str + uri2.AbsoluteUri + "\r\n";
                             }
                         }
                         catch (Exception)
                         {
                         }
                     }
                 }
             }
         }
     }
     catch (Exception exception)
     {
         this.LogError(uri.AbsoluteUri, str + exception.Message);
         request = null;
     }
     finally
     {
         this.EraseItem(item);
     }
 }
Exemple #21
0
        /// <summary>
        /// 序列化HTML表单
        /// </summary>
        /// <param name="html"></param>
        /// <param name="id"></param>
        /// <returns></returns>
        public static NameValueCollection SerializeForm(this string html, string id)
        {
            string strForm = "";
            if (id[0] == '#')
            {
                strForm = html.GetMatch(string.Format("<form[\\s\\S]*?id=\"{0}\"[\\s\\S]*?>(?<key>[\\s\\S]*?)</form>", id.Replace("#", "")))[0];
            }
            else if (id[0] == '.')
            {
                strForm = html.GetMatch(string.Format("<form[\\s\\S]*?class=\"{0}\"[\\s\\S]*?>(?<key>[\\s\\S]*?)</form>", id.Replace(".", "")))[0];
            }
            else if (id[0] == '@')
            {
                strForm = html.GetMatch(string.Format("<form[\\s\\S]*?name=\"{0}\"[\\s\\S]*?>(?<key>[\\s\\S]*?)</form>", id.Replace("@", "")))[0];
            }
            else
            {
                strForm = html.GetMatch(string.Format("<form[\\s\\S]*?id=\"{0}\"[\\s\\S]*?>(?<key>[\\s\\S]*?)</form>", id.Replace("#", "")))[0];
            }

            Match match_Input = new Regex("<input .*?>|<textarea[\\w\\W]*?</textarea>").Match(strForm);

            NameValueCollection nv = new NameValueCollection();
            while (match_Input.Success)
            {
                string ele = match_Input.Groups[0].Value.ToS();
                if ((ele.GetHtmlElementAttribute("type") == "checkbox" || ele.GetHtmlElementAttribute("type") == "radio") && ele.GetHtmlElementAttribute("checked") != "checked")
                {
                    match_Input = match_Input.NextMatch();
                    continue;
                }
                else
                {
                    nv.Add(ele.GetHtmlElementAttribute("name"), ele.GetHtmlElementAttribute("value"));
                }
                match_Input = match_Input.NextMatch();

            }

            return nv;
        }
Exemple #22
0
 private void getNewBoxName(string strfolder)
 {
     int index = 0;
     try
     {
         string input = "";
         if (this.urls.IndexOf("mail.163.com") != -1)
         {
             input = strfolder;
             string pattern = "<object>[\\s\\S]+?name=\"id\">(?<BoxID>[\\d]+)?<\\/int>[\\s\\S]+?name=\"name\">(?<BoxName>[^<]+)?<\\/str[\\s\\S]+?<\\/object>";
             Match match = new Regex(pattern).Match(input);
             if (match.Length < 1)
             {
                 base.ShowMessage("取箱子失败!");
             }
             else
             {
                 index = 0;
                 while (match.Success)
                 {
                     this.boxList[index].boxname = match.Groups["BoxName"].Value;
                     this.boxList[index].boxid = match.Groups["BoxID"].Value;
                     match = match.NextMatch();
                     index++;
                 }
                 for (int i = 0; i < index; i++)
                 {
                     this.getNewMailId(this.urls, this.boxList[i].boxname, this.boxList[i].boxid);
                 }
             }
         }
         else
         {
             base.ShowMessage("取箱子失败!");
         }
     }
     catch (Exception exception)
     {
         base.ShowMessage("取箱子失败!" + exception.Message);
     }
 }