Example #1
0
        void BeginCheckAnonymous(RatedProxy proxy)
        {
            Uri           anonymUri = new Uri("http://checker.samair.ru/");
            DownloaderObj obj       = new DownloaderObj(anonymUri, EndCheckAnonimous, true, proxy, CookieOptions.NoCookies, 3);

            Downloader.Queue(obj);
        }
Example #2
0
        void CheckYaPage(DownloaderObj obj)
        {
            bool noYaBan = PageIsOk(obj.DataStr, GlobalResourceCache.YaPagePattern);

            obj.Proxy.YaRate = noYaBan ? 1 : 0;
            obj.Proxy.YaChecked++;
        }
Example #3
0
        void EndCheckAnonimous(DownloaderObj obj)
        {
            AnonymousRegexes anonRegxs = GlobalResourceCache.AnonymCheck;

            if (obj.DataStr != null)
            {
                if (anonRegxs.Anonymous.IsMatch(obj.DataStr))
                {
                    obj.Proxy.AnonymousLevel = AnonymousLevel.Anonymous;
                }
                else if (anonRegxs.HightAnonymous.IsMatch(obj.DataStr))
                {
                    obj.Proxy.AnonymousLevel = AnonymousLevel.HightAnonymous;
                }
                else
                {
                    obj.Proxy.AnonymousLevel = AnonymousLevel.NotAnonymous;
                }
            }
            else
            {
                if (_logging)
                {
                    GlobalLog.Err("Cant download page from anon checking site, p:" + obj.Proxy.Address.Host);
                }
            }
        }
Example #4
0
 public CookieCollection GetCookies(DownloaderObj obj)
 {
     while (obj.Attempts > 0)
     {
         obj.Attempts--;
         try
         {
             obj.Request = CreateRequest(obj);
             obj.Response = obj.Request.GetResponse() as HttpWebResponse;
             if (obj.Request.HaveResponse && obj.Response.Cookies != null)
                 return obj.Response.Cookies;
         }
         catch (WebException e)
         {
             GlobalLog.Err(e, "Host: " + obj.Uri.Host);
             HandleWebState(e, obj);
         }
         catch (Exception e)
         {
             throw e;
         }
         finally
         {
             if (obj.Request != null)
                 obj.Request.Abort();
             if (obj.Response != null)
                 obj.Response.Close();
         }
     }
     return null;
 }
Example #5
0
        void EndDownload(DownloaderObj obj)
        {
            object[]          args     = obj.Arg as object[];
            PatternsContainer sitePatt = args[0] as PatternsContainer;
            SyncWaitObj       waiter   = args[1] as SyncWaitObj;
            int siteChecksCount        = (int)args[2];

            if (obj.DataStr != null)
            {
                bool originalPage = sitePatt.Validation.Validate(obj.DataStr);
                if (originalPage)
                {
                    waiter.MultidownloadRate += (double)obj.Attempts / (DownloadsAttempts - 1);
                }
            }

            if (Interlocked.Increment(ref waiter.Count) == siteChecksCount)
            {
                obj.Proxy.MultidownloadRate = waiter.MultidownloadRate / siteChecksCount;
                EndTest(obj.Proxy);

                ////DEBUG
                //Console.WriteLine("Downloads END {0}, rate ", DateTime.Now.ToShortTimeString(), obj.Proxy.MultidownloadRate);
            }
        }
Example #6
0
        int?GetAvgLatency(Uri uri, RatedProxy proxy, int attempts)
        {
            Stopwatch timer        = new Stopwatch();
            int       totalLatency = 0;
            int       score        = 0;

            for (int i = 0; i < attempts; i++)
            {
                DownloaderObj obj = new DownloaderObj(uri, null, false, proxy, CookieOptions.Empty, 1);
                timer.Restart();
                bool haveResponse = Downloader.HaveResponce(obj);
                timer.Stop();

                if (haveResponse)
                {
                    totalLatency += (int)timer.ElapsedMilliseconds;
                    score++;
                }
            }
            if (score == 0)
            {
                return(null);
            }

            return(totalLatency / score);
        }
Example #7
0
        private HttpWebResponse GetResponse(DownloaderObj obj)
        {
            HttpWebRequest request = CreateRequest(obj);

            //ADD POST DATA HANDLING ??
            return(request.GetResponse() as HttpWebResponse);
        }
Example #8
0
        /// <summary>
        /// Post image data on server
        /// </summary>
        /// <param name="image">Image data</param>
        /// <param name="type">Image extension type</param>
        /// <param name="param">Recognize params</param>
        public void PostImage(byte[] image, ImgType type, RecognizeParams param = null)
        {
            if (image == null || image.Length == 0)
            {
                throw new ArgumentException("Bad image data");
            }

            List <KeyValuePair <string, string> > postParams = new List <KeyValuePair <string, string> >();

            postParams.Add(new KeyValuePair <string, string>("method", "post"));
            postParams.Add(new KeyValuePair <string, string>("key", ACCOUNT_ID));

            if (param != null)
            {
                foreach (var keyValue in param.Params)
                {
                    postParams.Add(keyValue);
                }
            }

            byte[] fbData = BuldFormbasedData(postParams, StreamEncoding.GetString(image), type);

            string        contentType = "multipart/form-data; boundary=" + _boundary;
            RequestParams reqParams   = new RequestParams(null, null, contentType, null, true, "POST", StreamEncoding, false);
            DownloaderObj obj         = new DownloaderObj(_postUri, EndPostImage, true, null, CookieOptions.Empty, 4, null, null, false, 1000, null, reqParams);

            obj.PostData = fbData;
            Downloader.Queue(obj);
        }
Example #9
0
        public static List<string> ParseGoogleKeys(int count, string startKey)
        {
            List<string> keys = new List<string>();
            keys.Add(startKey);

            string pattern = "<p[^>]*><a href=\".*?q=([^&]*)&[^>]*?>.*?</a></p>";
            Regex rx = new Regex(pattern, RegexOptions.Compiled);

            int i = 0;
            while (count > keys.Count & i <= keys.Count - 1)
            {
                string key = Uri.EscapeUriString(keys[i].Replace(' ', '+'));

                Uri uri = new Uri("http://www.google.ru/search?q=" + key + "&sourceid=opera&num=0&ie=utf-8&oe=utf-8&start=0");

                DownloaderObj obj = new DownloaderObj(uri, null, true, null, CookieOptions.NoCookies, 3);
                Downloader.DownloadSync(obj);
                if (obj.DataStr == null) return null;

                MatchCollection results = rx.Matches(obj.DataStr);
                foreach (Match m in results)
                {
                    keys.Add(m.Groups[1].Value);
                }
                keys = keys.Distinct().ToList<string>();
                if (OnKeyPageParsed!=null) OnKeyPageParsed(null, new KeyEventArgs(keys.Count, i, count));
                i++;
            }
            return keys;
        }
Example #10
0
        void CheckGooglePage(DownloaderObj obj)
        {
            bool noGoogleBan = PageIsOk(obj.DataStr, GlobalResourceCache.GooglePagePattern);

            obj.Proxy.GoogleRate = noGoogleBan ? 1 : 0;
            obj.Proxy.GoogleChecked++;
        }
Example #11
0
        internal void BeginReceive(DownloaderObj obj)
        {
            if (obj.Attempts > 0)
            {
                obj.Attempts--;
                try
                {
                    obj.Request = CreateRequest(obj);
                    if (obj.PostData != null)
                    {
                        PostRequestData(obj.Request, obj.PostData);
                    }

                    //Console.WriteLine("Send request to -- {0}", obj.Uri.OriginalString);
                    obj.Request.BeginGetResponse(EndReceive, obj);
                }
                catch (WebException e)
                {
                    GlobalLog.Err(e, "Host: " + obj.Uri.Host);
                    HandleWebState(e, obj);
                    RetryOrCallback(obj);
                }
                catch (Exception e)
                {
                    GlobalLog.Err(e, "Host: " + obj.Uri.Host);
                    RetryOrCallback(obj);
                }
            }
        }
Example #12
0
        //public void QueueGooglePages(int numPage, string key, Regex rx, WaitObj waiter)
        //{
        //    Uri uri = new Uri("http://www.google.ru/search?q=" + key + "&sourceid=opera&num=0&ie=utf-8&oe=utf-8&start=" + numPage);
        //    DownloaderObj obj = new DownloaderObj(uri, EndGetPage, true, null, false, 10, rx);
        //    Downloader.Queue(obj);
        //}

        void EndGetPage(DownloaderObj obj)
        {
            object[] args = obj.Arg as object[];

            if (obj.DataStr != null)
            {
                List<string> urls = new List<string>();
                Regex rx = args[0] as Regex;
                WaitObj waiter = args[1] as WaitObj;
                MatchCollection urlsMatches = rx.Matches(obj.DataStr);
                foreach (Match urlMatch in urlsMatches)
                {
                    urls.Add(urlMatch.Groups[1].Value);
                }
                if (OnParsed!=null) OnParsed(urls);
                if (Interlocked.Decrement(ref waiter.Count) == 0 && OnCompleted != null) OnCompleted(this, EventArgs.Empty);
            }
            else
            {
                //ProxyRotator proxyGiver = args[2] as ProxyRotator;
                //proxyGiver.TryGetProxy(ref obj.PrxContainer);
                //obj.Proxies = proxyGiver;
                obj.Attempts = 10;
                obj.CallBack = EndGetPageWithProxy;
            }
        }
Example #13
0
        private void HandleBadDownload(DownloaderObj obj)
        {
            DevourTarget target = obj.Arg as DevourTarget;

            if (obj.State == HttpDownloadResult.BadAddress ||
                obj.State == HttpDownloadResult.DocumentUnavailable)
            {
                AddFaultTarget(target);                                           //Document extremely guilty
                _proxies.Release(obj.PrxContainer as ProxyContainer, false);      //Proxy simply had a bad day
            }
            else
            {
                if (obj.State == HttpDownloadResult.ProxyError)
                {
                    _proxies.Fire(obj.PrxContainer as ProxyContainer);            //Proxy extremely guilty
                    _targets.Enqueue(target);                                     //Document not in the business
                }
                else
                {
                    if (--target.Lifes <= 0)
                    {
                        AddFaultTarget(target);                                   //Sth going wrong
                    }
                    _proxies.Release(obj.PrxContainer as ProxyContainer, false);  //Proxy simply had a bad day
                }
            }
        }
Example #14
0
        private void DevourCallback(DownloaderObj obj)
        {
            if (obj.DataStr != null)
            {
                if (_validator != null)
                {
                    if (_validator.Validate(obj.DataStr))
                    {
                        HandleSuccessDownload(obj);
                    }
                    else
                    {
                        HandleBadDownload(obj);
                    }
                }
                else
                {
                    HandleSuccessDownload(obj);
                }
            }
            else
            {
                HandleBadDownload(obj);
            }

            MoveObjComplete();
        }
Example #15
0
 public static List<RatedProxy> ParseProxyFromPage(Uri uri)
 {
     DownloaderObj obj = new DownloaderObj(uri, null, true);
     Downloader.DownloadSync(obj);
     if (obj.DataStr == null)
         return null;
     return ParseProxy(obj.DataStr);
 }
Example #16
0
        public List <RatedProxy> ParsePage(string data)
        {
            if (data == null)
            {
                return(null);
            }
            List <RatedProxy>           proxies           = new List <RatedProxy>();
            Dictionary <string, string> imageLinksAndHash = new Dictionary <string, string>();

            string ipPattern = @"<td>(?<ip>[^<]*)</td><td><img src=""(?<image>/images/proxylist_port_\d*.gif)""></td>";
            Regex  ipRx      = new Regex(ipPattern);

            MatchCollection ipMatches = ipRx.Matches(data);

            Hashtable portHashes = LoadPortHashes();

            foreach (Match ipMatch in ipMatches)
            {
                string imagePath = "http://hideme.ru" + ipMatch.Groups["image"].Value;

                DownloaderObj obj = new DownloaderObj(new Uri(imagePath), null, false, null);
                Downloader.DownloadSync(obj);
                if (obj.Data == null)
                {
                    continue;
                }

                string imageHash = GetMd5HashString(obj.Data);
                if (imageHash == null)
                {
                    continue;
                }

                if (portHashes.Contains(imageHash))
                {
                    string port = portHashes[imageHash] as string;
                    string ip   = ipMatch.Groups["ip"].Value;

                    if (ip.IsValidIP() && port.IsValidPort())
                    {
                        proxies.Add(new RatedProxy(ip + ":" + port));
                    }
                }
                else
                {
                    if (!imageLinksAndHash.ContainsKey(imageHash))
                    {
                        imageLinksAndHash.Add(imageHash, imagePath);
                    }
                    continue;
                }
            }
            AddUnknownPortImage(imageLinksAndHash);
            return(proxies);
        }
Example #17
0
        private void DevourOne(ProxyContainer proxyCont, DevourTarget target)
        {
            DownloaderObj obj = new DownloaderObj(target.Uri, DevourCallback, true, proxyCont, this.CookieOptions, target.Attempts, target, null, false, 1000, this.TimingParams, this.RequestParams);

            Downloader.Queue(obj);
#if DEBUG
            Interlocked.Increment(ref __queuedObjects);
            GlobalLog.Write("__queuedObjects: {0}", __queuedObjects);
#endif
            MoveObjQueued();
        }
Example #18
0
 public ResponseState GetWebState(WebException e, DownloaderObj obj)
 {
     if (e.Response == null)
     {
         return(HandleWebExcStatus(e.Status));
     }
     else
     {
         return(HandleHttpCode((e.Response as HttpWebResponse).StatusCode));
     }
 }
Example #19
0
 public static CookieCollection GetCookies(DownloaderObj obj)
 {
     if (!obj.CookieOptions.HasFlag(CookieOptions.NoCookies))
     {
         return(new MagicClient().GetCookies(obj));
     }
     else
     {
         return(null);
     }
 }
Example #20
0
 private static string Parse(string address, string pattern)
 {
     Uri uri = UriHandler.CreateUri(address);
     DownloaderObj obj = new DownloaderObj(uri, null, true, null, CookieOptions.UseShared & CookieOptions.SaveShared, 5);
     Downloader.DownloadSync(obj);
     if (obj.DataStr != null)
     {
         return (new Regex(pattern)).Match(obj.DataStr).Groups[1].ToString();
     }
     else return null;
 }
Example #21
0
        public static List <RatedProxy> ParseProxyFromPage(Uri uri)
        {
            DownloaderObj obj = new DownloaderObj(uri, null, true);

            Downloader.DownloadSync(obj);
            if (obj.DataStr == null)
            {
                return(null);
            }
            return(ParseProxy(obj.DataStr));
        }
Example #22
0
 private void ParseSerpData(DownloaderObj obj)
 {
     List<RatedProxy> p = ProxyParser.ParseProxy(obj.DataStr);
     if (p != null)
     {
         lock (SerpProxy)
         {
             SerpProxy.AddRange(p);
         }
         if (OnUrlsPrsProgrChanged != null) OnUrlsPrsProgrChanged(p.Count, obj.Uri.OriginalString);
     }
 }
Example #23
0
 void RetryOrCallback(DownloaderObj obj)
 {
     if (obj.Attempts > 0)
     {
         Thread.Sleep(Rnd.Next(obj.AttemptPause / 2, (int)(obj.AttemptPause * 1.5)));
         BeginReceive(obj);
     }
     else
     {
         CallbackAndContinue(obj);
     }
 }
Example #24
0
        void EndDownloadAndParse(DownloaderObj obj)
        {
            object[] args = obj.Arg as object[];
            IProxySiteProvider proxySiteProvider = args[0] as IProxySiteProvider;
            WaitObj waiter = args[1] as WaitObj;

            List<RatedProxy> proxies = null;

            proxies = proxySiteProvider.ParsePage(obj.DataStr);

            NotifyAboutProgress(waiter, proxies);
        }
Example #25
0
        void EndDownloadAndParse(DownloaderObj obj)
        {
            object[]           args = obj.Arg as object[];
            IProxySiteProvider proxySiteProvider = args[0] as IProxySiteProvider;
            WaitObj            waiter            = args[1] as WaitObj;

            List <RatedProxy> proxies = null;

            proxies = proxySiteProvider.ParsePage(obj.DataStr);

            NotifyAboutProgress(waiter, proxies);
        }
Example #26
0
 void GetProductCallback(DownloaderObj obj)
 {
     if (obj.DataStr != null)
     {
         List <Uri> products = GetProductsUrls(obj.DataStr);
         lock (allProductsUrlsSync)
         {
             allProductsUrls.AddRange(products);
             SaveProducts(products);
         }
         Console.WriteLine("Found {0} products on categoryIndx {1}", allProductsUrls.Count, (int)obj.Arg);
     }
 }
Example #27
0
        //ASYNC(NOT TESTED)
        #region Async RBL
        //void BeginRBLCheck(RatedProxy proxy)
        //{
        //    List<string[]> rblList = GlobalResourceCache.RBLList;
        //    int indx = 0;
        //    double rblBanRate = 0;
        //    int rblChecks = rblList.Count;

        //    BeginRBLCheck(indx, rblBanRate, rblChecks, proxy);
        //}
        //void EndRBLCheck(DownloaderObj obj)
        //{
        //    object[] args = obj.Arg as object[];
        //    int indx = (int)args[0];
        //    double rblBanRate = (double)args[1];
        //    int rblChecks = (int)args[2];

        //    List<string[]> rblList = GlobalResourceCache.RBLList;
        //    string[] rblSet = rblList[indx];
        //    Regex positiveRx = new Regex(rblSet[1]);
        //    Regex negativeRx = new Regex(rblSet[2]);

        //    double rate = GetRblBanRate(obj.DataStr, positiveRx, negativeRx);
        //    if (rate > -1)
        //    {
        //        rblBanRate += rate;
        //    }
        //    else
        //        rblChecks--;

        //    indx++;
        //    if (indx < rblList.Count)
        //    {
        //        BeginRBLCheck(indx, rblBanRate, rblChecks, obj.PrxContainer);
        //    }
        //    else if (rblChecks > 0)
        //    {
        //        obj.PrxContainer.RBLBanRate = rblBanRate / rblChecks;
        //    }
        //}
        //double GetRblBanRate(string data, Regex positiveRx, Regex negativeRx)
        //{
        //    if (data == null)
        //        return -1;
        //    double blocked = negativeRx.Matches(data).Count;
        //    double ok = positiveRx.Matches(data).Count;

        //    bool noResults = blocked == 0 && ok == 0;

        //    if (noResults)
        //        return -1;
        //    if (blocked == 0)
        //        return 0;
        //    if (ok == 0)
        //        return 1;

        //    return blocked / ok;
        //}
        //void BeginRBLCheck(int indx, double rblBanRate, int rblChecks, RatedProxy proxy)
        //{
        //    List<string[]> rblList = GlobalResourceCache.RBLList;

        //    if (indx < rblList.Count)
        //    {
        //        string[] rblSet = rblList[indx];
        //        DownloaderObj obj = new DownloaderObj(new Uri(rblSet[0]), EndRBLCheck, true, proxy, CookieOptions.NoCookies, 3, new object[] { indx, rblBanRate, rblChecks });
        //        Downloader.Queue(obj);
        //    }
        //}
        #endregion
        #endregion

        #region DownloadsCheck
        void BeginDownloads(RatedProxy proxy)
        {
            //Console.WriteLine("Downloads START {0}", DateTime.Now.ToShortTimeString());
            List <PatternsContainer> sitePatterns = PagePatternGrabber.LoadPatterns(PATH.TagClassPatterns);
            SyncWaitObj waiter = new SyncWaitObj(0);

            foreach (var patt in sitePatterns)
            {
                object[]      args = { patt, waiter, sitePatterns.Count };
                DownloaderObj obj  = new DownloaderObj(patt.Uri, EndDownload, true, proxy, CookieOptions.NoCookies, DownloadsAttempts, args);
                Downloader.Queue(obj);
            }
        }
Example #28
0
        public void BeginDownloadPages(int count, string uriStr, string replaseSubstr, IProxySiteProvider proxySiteProvider)
        {
            if (count == 0 || string.IsNullOrEmpty(uriStr) || string.IsNullOrEmpty(replaseSubstr) || proxySiteProvider == null)
                throw new ArgumentException("Bad argumenst");

            WaitObj waiter = new WaitObj(count);

            for (int i = 0; i < count; i++)
            {
                Uri uri = new Uri(uriStr.Replace(replaseSubstr, i.ToString()));
                DownloaderObj obj = new DownloaderObj(uri, EndDownloadAndParse, true, null, CookieOptions.NoCookies, 10, new object[] { proxySiteProvider, waiter });
                Downloader.Queue(obj);
            }
        }
Example #29
0
        /// <summary>
        /// Post data callback
        /// </summary>
        private void EndPostImage(DownloaderObj obj)
        {
            if (obj.DataStr != null)
            {
                HandlePostState(obj.DataStr);
            }
            else
            {
#if DEBUG
                GlobalLog.Err("null data in EndPostImage, POST_DATA:\n{0}", obj.PostData);
#endif
                Err(ErrorState.ConnectProblem);
            }
        }
Example #30
0
        private static string Parse(string address, string pattern)
        {
            Uri           uri = UriHandler.CreateUri(address);
            DownloaderObj obj = new DownloaderObj(uri, null, true, null, CookieOptions.UseShared & CookieOptions.SaveShared, 5);

            Downloader.DownloadSync(obj);
            if (obj.DataStr != null)
            {
                return((new Regex(pattern)).Match(obj.DataStr).Groups[1].ToString());
            }
            else
            {
                return(null);
            }
        }
Example #31
0
 void CallbackAndContinue(DownloaderObj obj)
 {
     if (obj.CallBack != null)
     {
         try
         {
             obj.CallBack.Invoke(obj);
         }
         catch (Exception e)
         {
             GlobalLog.Err(e, "CallBack err");
         }
     }
     Downloader.ProcessNext(obj.Uri);
 }
Example #32
0
        void HandleWebState(WebException e, DownloaderObj obj)
        {
            obj.State = new DownloadStateProvider().GetWebState(obj.Request, obj.Response);

            if (obj.State == DownloadState.BadAddress ||
                obj.State == DownloadState.DocumentUnavailable ||
                obj.State == DownloadState.ProxyError)
            {
                obj.Attempts = 0;
            }

            if (obj.State == DownloadState.ServiceUnavailable && obj.Attempts > 0)
            {
                Thread.Sleep(Rnd.Next(obj.AttemptPause / 2, (int)(obj.AttemptPause * 1.5)));
            }
        }
Example #33
0
        private void ParseSerpData(DownloaderObj obj)
        {
            List <RatedProxy> p = ProxyParser.ParseProxy(obj.DataStr);

            if (p != null)
            {
                lock (SerpProxy)
                {
                    SerpProxy.AddRange(p);
                }
                if (OnUrlsPrsProgrChanged != null)
                {
                    OnUrlsPrsProgrChanged(p.Count, obj.Uri.OriginalString);
                }
            }
        }
Example #34
0
        public void BeginDownloadPages(int count, string uriStr, string replaseSubstr, IProxySiteProvider proxySiteProvider)
        {
            if (count == 0 || string.IsNullOrEmpty(uriStr) || string.IsNullOrEmpty(replaseSubstr) || proxySiteProvider == null)
            {
                throw new ArgumentException("Bad argumenst");
            }

            WaitObj waiter = new WaitObj(count);

            for (int i = 0; i < count; i++)
            {
                Uri           uri = new Uri(uriStr.Replace(replaseSubstr, i.ToString()));
                DownloaderObj obj = new DownloaderObj(uri, EndDownloadAndParse, true, null, CookieOptions.NoCookies, 10, new object[] { proxySiteProvider, waiter });
                Downloader.Queue(obj);
            }
        }
Example #35
0
        public List <Uri> GetAllCategoriesProducts(List <string> categories)
        {
            int indx = 0;

            Downloader.MaxParallelRequests = 20;
            foreach (var category in categories)
            {
                for (int i = 1; i < 6; i++)
                {
                    DownloaderObj obj = new DownloaderObj(new Uri(category + "?pg=" + i), GetProductCallback, true, null, CookieOptions.NoCookies, 100, indx);
                    Downloader.Queue(obj);
                }
                indx++;
            }
            return(allProductsUrls);
        }
Example #36
0
        private void SetYandexWordsWeight()
        {
            foreach (string[] theme in allThemes)
            {
                string escape = Uri.EscapeUriString(theme[0]);

                Uri           pageUri = new Uri("http://wordstat.yandex.ru/?cmd=words&page=1&t=" + escape + "&geo=&text_geo=");
                DownloaderObj obj     = new DownloaderObj(pageUri, null);
                Downloader.DownloadSync(obj);

                string pattern = "<a href=\"?cmd=words&amp;page=1&amp;ts=[^;]*;key=[^;]*;t=" + escape + @""">[^<]*</a>\s*</td>\s*" +
                                 @"<td><[^>]*></div>[^<]*</td>\s*" +
                                 "<td[^>]*>([^<]*)</td>";
                theme[2] = (Int32.Parse(new Regex(pattern, RegexOptions.Compiled).Match(obj.DataStr).Groups[1].Value) + Int32.Parse(theme[2])).ToString();
                Thread.Sleep(Rnd.Next(4000, 6000));
            }
        }
Example #37
0
        internal void EndReceive(IAsyncResult ar)
        {
            if (!ExecutionContext.IsFlowSuppressed())
            {
                ExecutionContext.SuppressFlow();
            }

            DownloaderObj obj = ar.AsyncState as DownloaderObj;

            try
            {
#if DEBUG
                GlobalLog.Write("Get responce from " + obj.Uri.Host);
#endif
                obj.Response = obj.Request.EndGetResponse(ar) as HttpWebResponse;

                HandleRedirectAndCookies(obj);

                if (TryReceiveData(obj))
                {
                    CallbackAndContinue(obj);
                    return;
                }
#if DEBUG
                GlobalLog.Write("CANT Downloaded data from " + obj.Uri.Host);
#endif
            }
            catch (WebException e)
            {
                GlobalLog.Err(e, "Host: " + obj.Uri.Host);
                HandleWebState(e, obj);
            }
            catch (Exception e)
            {
                GlobalLog.Err(e, "Host: " + obj.Uri.Host);
            }
            finally
            {
                if (obj.Response != null)
                {
                    obj.Response.Close();
                }
            }
            RetryOrCallback(obj);
        }
        public List<RatedProxy> ParsePage(string data)
        {
            if (data == null) return null;
            List<RatedProxy> proxies = new List<RatedProxy>();
            Dictionary<string, string> imageLinksAndHash = new Dictionary<string, string>();

            string ipPattern = @"<td>(?<ip>[^<]*)</td><td><img src=""(?<image>/images/proxylist_port_\d*.gif)""></td>";
            Regex ipRx = new Regex(ipPattern);

            MatchCollection ipMatches = ipRx.Matches(data);

            Hashtable portHashes = LoadPortHashes();

            foreach (Match ipMatch in ipMatches)
            {
                string imagePath = "http://hideme.ru" + ipMatch.Groups["image"].Value;

                DownloaderObj obj = new DownloaderObj(new Uri(imagePath), null, false, null);
                Downloader.DownloadSync(obj);
                if (obj.Data == null)
                    continue;

                string imageHash = GetMd5HashString(obj.Data);
                if (imageHash == null)
                    continue;

                if (portHashes.Contains(imageHash))
                {
                    string port = portHashes[imageHash] as string;
                    string ip = ipMatch.Groups["ip"].Value;

                    if (ip.IsValidIP() && port.IsValidPort())
                        proxies.Add(new RatedProxy(ip + ":" + port));
                }
                else
                {
                    if (!imageLinksAndHash.ContainsKey(imageHash))
                        imageLinksAndHash.Add(imageHash, imagePath);
                    continue;
                }
            }
            AddUnknownPortImage(imageLinksAndHash);
            return proxies;
        }
Example #39
0
 public bool HaveResponce(DownloaderObj obj)
 {
     while (obj.Attempts > 0)
     {
         obj.Attempts--;
         try
         {
             obj.Request = CreateRequest(obj);
             bool haveResp = obj.Request.HaveResponse;
             obj.Request.Abort();
             return haveResp;
         }
         catch (WebException e)
         {
             GlobalLog.Err(e, "Host: " + obj.Uri.Host);
             HandleWebState(e, obj);
         }
         catch (Exception e)
         {
             throw e;
         }
     }
     return false;
 }
Example #40
0
 void RetryOrCallback(DownloaderObj obj)
 {
     if (obj.Attempts > 0)
     {
         Thread.Sleep(Rnd.Next(obj.AttemptPause / 2, (int)(obj.AttemptPause * 1.5)));
         BeginReceive(obj);
     }
     else
     {
         CallbackAndContinue(obj);
     }
 }
Example #41
0
        //Method need some refactoring
        private bool TryReceiveData(DownloaderObj obj)
        {
            byte[] data = ReadResponseStream(obj.Response, obj.TimingParams);
            obj.State = new DownloadStateProvider().GetWebState(obj.Request, obj.Response);

            if (data != null)
            {
                if (obj.NeedString)
                    obj.DataStr = obj.RequestParam.Encoding.GetString(data);
                else
                    obj.Data = data;
                return true;
            }
            else
            {
                return obj.State == DownloadState.Success_2xx || obj.State == DownloadState.Info_1xx;
            }
        }
Example #42
0
        private void HandleSuccessDownload(DownloaderObj obj)
        {
            _proxies.Release(obj.PrxContainer as ProxyContainer, true);

            DevourTarget target = obj.Arg as DevourTarget;

            //MoveReadQueue();
            try
            {
                MoveReadQueue();
                target.Reader.ReadData(obj.DataStr, target);
                MoveReadComplete(target);
            }
            catch(Exception ex)
            {
                GlobalLog.Err(ex, "error while reading data in devourer");
            }

            AddSuccessTarget(obj.Uri);
        }
Example #43
0
 void GetYandexPage(int numPage, string key, Regex rx, WaitObj waiter)
 {
     Uri pageUri = new Uri("http://yandex.ru/yandsearch?p=" + numPage + "&text=" + Uri.EscapeDataString(key));
     DownloaderObj obj = new DownloaderObj(pageUri, null);
     Downloader.DownloadSync(obj);
 }
Example #44
0
        public static List<string> ParseYandexKeys(int count, string startKey)
        {
            List<string> keys = new List<string>();
            keys.Add(startKey);

            string pattern = @"<td>\s*<a href=""\?cmd=words&amp;page=1&amp;ts=[^&]*&amp;key=[^&]*&amp;t=([^""]*)"">[^<]*</a>\s*</td>";
            string splitPattern = "<tr class=\"thead\" valign=\"bottom\">";
            string capchaPattern = @"<input type=""hidden"" name=""captcha_id"" value=""([^""]*)""[^>]*>";

            Regex rx = new Regex(pattern, RegexOptions.Compiled);
            Regex splitRx = new Regex(splitPattern, RegexOptions.Compiled);
            Regex capchaRx = new Regex(capchaPattern, RegexOptions.Compiled);

            int failTryCount = 0;
            int i = 0;
            CookieCollection cookies = new CookieCollection();
            while (count > keys.Count & i <= keys.Count - 1)
            {
                string key = Uri.EscapeUriString(keys[i]);
                string content = string.Empty;

                Uri keyUri = new Uri("http://wordstat.yandex.ru/?cmd=words&page=1&t=" + key + "&geo=&text_geo=");
                DownloaderObj obj = new DownloaderObj(keyUri, null, true, null, CookieOptions.UseShared & CookieOptions.Take, 5, null, cookies);
                Downloader.DownloadSync(obj);

                if (obj.DataStr == null & failTryCount < 5)
                {
                    failTryCount++;
                    continue;
                }
                else if (content == null) break;

                Match capchaResult = capchaRx.Match(content);
                if (capchaResult.Success)
                {
                    obj.Attempts = 3; obj.Uri = new Uri("http://kiks.yandex.ru/su/");
                    Downloader.HaveResponce(obj);
                    cookies = obj.Cookie;
                    continue;
                }

                content = splitRx.Split(content)[1];

                MatchCollection results = rx.Matches(content);

                if (results.Count <= 14)
                {
                    foreach (Match m in results)
                    {
                        keys.Add(Uri.UnescapeDataString(m.Groups[1].Value));
                    }
                }
                else
                {
                    for (int j = 1; j < 14; j++)
                    {
                        keys.Add(Uri.UnescapeDataString(results[j].Groups[1].Value));
                    }
                }
                keys = keys.Distinct().ToList<string>();
                i++;
                Console.WriteLine("ParseYandexKeys collect {0} keys and index is on {1} posotion...", keys.Count, i);
            }
            return keys;
        }
Example #45
0
 public List<Uri> GetAllCategoriesProducts(List<string> categories)
 {
     int indx = 0;
     Downloader.MaxParallelRequests = 20;
     foreach (var category in categories)
     {
         for (int i = 1; i < 6; i++)
         {
             DownloaderObj obj = new DownloaderObj(new Uri(category + "?pg=" + i), GetProductCallback, true, null, CookieOptions.NoCookies, 100, indx);
             Downloader.Queue(obj);
         }
         indx++;
     }
     return allProductsUrls;
 }
Example #46
0
 List<string> CollectAllASINs(List<Uri> productsUrls)
 {
     List<string> ASINs = new List<string>();
     foreach (var productUrl in productsUrls)
     {
         DownloaderObj obj = new DownloaderObj(productUrl, null, true, null, CookieOptions.NoCookies, 100);
         Downloader.DownloadSync(obj);
         if (obj.DataStr != null)
         {
             string ASIN = GetProductASIN(obj.DataStr);
             if (!string.IsNullOrEmpty(ASIN))
             {
                 ASINs.Add(ASIN);
             }
         }
     }
     return ASINs;
 }
Example #47
0
        private void AskRecognize(string id)
        {
            Uri resolveUri = new Uri(_resolveStr + id);
            DownloaderObj obj = new DownloaderObj(resolveUri, null, true, null, CookieOptions.Empty, 5);
            Downloader.DownloadSync(obj);
            if (obj.DataStr != null)
            {
                HandleRecognizeState(obj.DataStr, id);
            }
            else
            {
#if DEBUG
                GlobalLog.Err("can't ASK data, id:{0}", id);       
#endif
                Err(ErrorState.ConnectProblem);
            }
        }
Example #48
0
        /// <summary>
        /// Post data callback
        /// </summary>
        private void EndPostImage(DownloaderObj obj)
        {
            if (obj.DataStr != null)
            {
                HandlePostState(obj.DataStr);
            }
            else
            {
#if DEBUG
                GlobalLog.Err("null data in EndPostImage, POST_DATA:\n{0}", obj.PostData);
#endif
                Err(ErrorState.ConnectProblem);
            }
        }
Example #49
0
        /// <summary>
        /// Post image data on server
        /// </summary>
        /// <param name="image">Image data</param>
        /// <param name="type">Image extension type</param>
        /// <param name="param">Recognize params</param>
        public void PostImage(byte[] image, ImgType type, RecognizeParams param = null)
        {
            if (image == null || image.Length == 0)
                throw new ArgumentException("Bad image data");

            List<KeyValuePair<string, string>> postParams = new List<KeyValuePair<string, string>>();
            postParams.Add(new KeyValuePair<string, string>("method", "post"));
            postParams.Add(new KeyValuePair<string, string>("key", ACCOUNT_ID));

            if (param!=null)
            {
                foreach (var keyValue in param.Params)
                {
                    postParams.Add(keyValue);
                }
            }

            byte[] fbData = BuldFormbasedData(postParams, StreamEncoding.GetString(image), type);

            string contentType = "multipart/form-data; boundary=" + _boundary;
            RequestParams reqParams = new RequestParams(null, null, contentType, null, true, "POST", StreamEncoding, false);
            DownloaderObj obj = new DownloaderObj(_postUri, EndPostImage, true, null, CookieOptions.Empty, 4, null, null, false, 1000, null, reqParams);
            obj.PostData = fbData;
            Downloader.Queue(obj);
        }
Example #50
0
        internal void BeginReceive(DownloaderObj obj)
        {
            if (obj.Attempts > 0)
            {
                obj.Attempts--;
                try
                {
                    obj.Request = CreateRequest(obj);
                    if (obj.PostData != null)
                        PostRequestData(obj.Request, obj.PostData);

                    //Console.WriteLine("Send request to -- {0}", obj.Uri.OriginalString);
                    obj.Request.BeginGetResponse(EndReceive, obj);
                }
                catch (WebException e)
                {
                    GlobalLog.Err(e, "Host: " + obj.Uri.Host);
                    HandleWebState(e, obj);
                    RetryOrCallback(obj);
                }
                catch (Exception e)
                {
                    GlobalLog.Err(e, "Host: " + obj.Uri.Host);
                    RetryOrCallback(obj);
                }
            }
        }
Example #51
0
        //public void QueueGooglePages(int numPage, string key, Regex rx, WaitObj waiter)
        //{
        //    Uri uri = new Uri("http://www.google.ru/search?q=" + key + "&sourceid=opera&num=0&ie=utf-8&oe=utf-8&start=" + numPage);
        //    DownloaderObj obj = new DownloaderObj(uri, EndGetPage, true, null, false, 10, rx);
        //    Downloader.Queue(obj);
        //}
        void EndGetPage(DownloaderObj obj)
        {
            object[] args = obj.Arg as object[];

            if (obj.DataStr != null)
            {
                List<string> urls = new List<string>();
                Regex rx = args[0] as Regex;
                WaitObj waiter = args[1] as WaitObj;
                MatchCollection urlsMatches = rx.Matches(obj.DataStr);
                foreach (Match urlMatch in urlsMatches)
                {
                    urls.Add(urlMatch.Groups[1].Value);
                }
                if (OnParsed!=null) OnParsed(urls);
                if (Interlocked.Decrement(ref waiter.Count) == 0 && OnCompleted != null) OnCompleted(this, EventArgs.Empty);
            }
            else
            {
                //ProxyRotator proxyGiver = args[2] as ProxyRotator;
                //proxyGiver.TryGetProxy(ref obj.PrxContainer);
                //obj.Proxies = proxyGiver;
                obj.Attempts = 10;
                obj.CallBack = EndGetPageWithProxy;
            }
        }
Example #52
0
 public static CookieCollection GetCookies(DownloaderObj obj)
 {
     if (!obj.CookieOptions.HasFlag(CookieOptions.NoCookies))
     {
         return new MagicClient().GetCookies(obj);
     }
     else
         return null;
 }
Example #53
0
 void AsyncDownloadCategoryPage(int curIndx)
 {
     ThreadPool.QueueUserWorkItem((object o) =>
         {
             HashSet<string> curCategories = new HashSet<string>();
             DownloaderObj obj = new DownloaderObj(tempList[curIndx], null, true, null, CookieOptions.NoCookies, 1000);
             Downloader.DownloadSync(obj);
             if (obj.DataStr != null)
             {
                 string pageMenuData = GetMenuDataPiece(obj.DataStr, tempList[curIndx]);
                 curCategories = GetAllMenuLinks(pageMenuData, tempList[curIndx]);
             }
             foreach (var category in curCategories)
             {
                 string clearCategory = _linkRefRx.Replace(category, "");
                 lock (allCategoriesSync)
                 {
                     lock (tempListSync)
                     {
                         if (!allCategories.Contains(clearCategory))
                         {
                             allCategories.Add(clearCategory);
                             SaveCategory(clearCategory);
                             Console.WriteLine("Total collected categoryes - {0}", allCategories.Count);
                             tempList.Add(new Uri(clearCategory));
                         }
                     }
                 }
             }
             tempList[curIndx] = null;
             Console.WriteLine("waiter.Count - " + waiter.Count);
             if (Interlocked.Decrement(ref waiter.Count) == 0)
             {
                 waiter.WaitEvent.Set();
             }
         });
 }
Example #54
0
        private void AskRecognize(object arg)
        {
            object[] args = arg as object[];
            string id = args[1] as string;
            Timer timer = args[0] as Timer;

            Uri resolveUri = new Uri(_resolveStr + id);
            DownloaderObj obj = new DownloaderObj(resolveUri, null, true, null, CookieOptions.Empty, 5);
            Downloader.DownloadSync(obj);
            if (obj.DataStr != null)
            {
                HandleRecognizeState(obj.DataStr, id);
            }
            else
            {
                Err(ErrorState.ConnectProblem);
            }
            timer.Dispose();
        }
Example #55
0
 void GetProductCallback(DownloaderObj obj)
 {
     if (obj.DataStr != null)
     {
         List<Uri> products = GetProductsUrls(obj.DataStr);
         lock (allProductsUrlsSync)
         {
             allProductsUrls.AddRange(products);
             SaveProducts(products);
         }
         Console.WriteLine("Found {0} products on categoryIndx {1}", allProductsUrls.Count, (int)obj.Arg);
     }
 }
Example #56
0
        private void DevourCallback(DownloaderObj obj)
        {
            if (obj.DataStr != null)
            {
                if (_validator != null)
                {
                    if (_validator.Validate(obj.DataStr))
                        HandleSuccessDownload(obj);
                    else
                        HandleBadDownload(obj);
                }
                else
                {
                    HandleSuccessDownload(obj);
                }
            }
            else
            {
                HandleBadDownload(obj);
            }

            MoveObjComplete();
        }
Example #57
0
        void HandleWebState(WebException e, DownloaderObj obj)
        {
            obj.State = new DownloadStateProvider().GetWebState(obj.Request, obj.Response);

            if (obj.State == DownloadState.BadAddress ||
                obj.State == DownloadState.DocumentUnavailable ||
                obj.State == DownloadState.ProxyError)
            {
                obj.Attempts = 0;
            }

            if (obj.State == DownloadState.ServiceUnavailable && obj.Attempts > 0)
            {
                Thread.Sleep(Rnd.Next(obj.AttemptPause / 2, (int)(obj.AttemptPause * 1.5)));
            }
        }
Example #58
0
 private void DevourOne(ProxyContainer proxyCont, DevourTarget target)
 {
     DownloaderObj obj = new DownloaderObj(target.Uri, DevourCallback, true, proxyCont, this.CookieOptions, target.Attempts, target, null, false, 1000, this.TimingParams, this.RequestParams);
     Downloader.Queue(obj);
     #if DEBUG
     Interlocked.Increment(ref __queuedObjects);
     GlobalLog.Write("__queuedObjects: {0}", __queuedObjects);
     #endif
     MoveObjQueued();
 }
Example #59
0
 void EndGetPageWithProxy(DownloaderObj obj)
 {
 }
Example #60
0
        private void HandleBadDownload(DownloaderObj obj)
        {
            DevourTarget target = obj.Arg as DevourTarget;

            if (obj.State == HttpDownloadResult.BadAddress ||
                obj.State == HttpDownloadResult.DocumentUnavailable)
            {
                AddFaultTarget(target);                                             //Document extremely guilty
                _proxies.Release(obj.PrxContainer as ProxyContainer, false);      //Proxy simply had a bad day
            }
            else
            {
                if (obj.State == HttpDownloadResult.ProxyError)
                {
                    _proxies.Fire(obj.PrxContainer as ProxyContainer);            //Proxy extremely guilty
                    _targets.Enqueue(target);                                       //Document not in the business
                }
                else
                {
                    if (--target.Lifes <= 0)
                    {
                        AddFaultTarget(target);                                     //Sth going wrong
                    }
                    _proxies.Release(obj.PrxContainer as ProxyContainer, false);  //Proxy simply had a bad day
                }
            }
        }