public PersonatorAddressCheckResult ScrappingCheckAddress(AddressDTO address)
        {
            var url = String.Format(
                "http://www.melissadata.com/Lookups/AddressVerify.asp?name={0}&Company=&Address={1}&city={2}&state={3}&zip={4}",
                HttpUtility.UrlEncode(address.FinalFullName),
                HttpUtility.UrlEncode(StringHelper.JoinTwo(" ", address.FinalAddress1, address.FinalAddress2)),
                HttpUtility.UrlEncode(address.FinalCity),
                HttpUtility.UrlEncode(address.FinalState),
                HttpUtility.UrlEncode(StringHelper.JoinTwo("-", address.Zip, address.ZipAddon)));


            _log.Info("Request url: " + url);
            var response = _htmlScraper.GetHtml(url, ProxyUseTypes.Mellisa, (proxy, status, html) =>
            {
                var correctContent = !(html ?? "").Contains("Lookup Level Exceeded") && (html ?? "").Contains("Address Verify");
                _log.Info(url + (correctContent ? " (Correct Content)" : " (Incorrect Content)") + ", Status=" + status + ", Is Empty Html=" + String.IsNullOrEmpty(html) + ", Proxy=" + proxy.IPAddress + ":" + proxy.Port);
                if (status == HttpStatusCode.OK && !correctContent)
                {
                    _log.Info("Content=" + html);
                }

                return(status == HttpStatusCode.OK && correctContent && !String.IsNullOrEmpty(html));
            });

            if (response.IsFail)
            {
                _log.Fatal("ScrappingCheckAddress. No result: " + response.Message);
            }

            return(new PersonatorAddressCheckResult()
            {
                IsNotServedByUSPSNote = (response.Data ?? "").Contains("Address is served by FedEx, UPS and NOT the USPS"),
            });
        }
Beispiel #2
0
        public bool CallMellissaPage()
        {
            var url      = "http://www.melissadata.com/Lookups/AddressVerify.asp?name=linda%20airhart&Company=&Address=300%20garber%20st&city=plains&state=MT&zip=59859-0178";
            var response = _htmlScraper.GetHtml(url, ProxyUseTypes.Mellisa, ((dto, code, arg3) => { return(true); }));
            var result   = false;

            if (response.IsSuccess && response.Data.Contains("Address is served by FedEx, UPS and NOT the USPS"))
            {
                result = true;
            }

            url      = "http://www.melissadata.com/Lookups/AddressVerify.asp?name=EVELYN+JOSE&Company=&Address=7137+35+AVENUE+NW&city=CALGARY&state=AB&zip=T3B+1T1";
            response = _htmlScraper.GetHtml(url);
            result   = false;
            if (response.IsSuccess && response.Data.Contains("Address is served by FedEx, UPS and NOT the USPS"))
            {
                result = true;
            }

            return(result);
        }
Beispiel #3
0
        public CallResult <IList <ImageInfo> > GetMainImageFromUrl(string url, IWebPageParser pageParser, out long downloadedSize)
        {
            downloadedSize = 0;
            var html = String.Empty;

            try
            {
                /*
                 * <div id="imgTagWrapperId" class="imgTagWrapper" style="height: 801.299px;">
                 *
                 *          <img alt="Sara's Prints Little Girls' Short Sleeve Nightie, Red/Pink Chevron, 2"
                 * src="http://ecx.images-amazon.com/images/I/81YDdI2Rk0L._UY879_.jpg"
                 * data-old-hires="http://ecx.images-amazon.com/images/I/81YDdI2Rk0L._UL1500_.jpg"
                 * class="a-dynamic-image  a-stretch-vertical" id="landingImage"
                 * data-a-dynamic-image="{&quot;http://ecx.images-amazon.com/images/I/81YDdI2Rk0L._UY606_.jpg&quot;:[405,606],&quot;http://ecx.images-amazon.com/images/I/81YDdI2Rk0L._UY879_.jpg&quot;:[587,879],&quot;http://ecx.images-amazon.com/images/I/81YDdI2Rk0L._UY741_.jpg&quot;:[495,741],&quot;http://ecx.images-amazon.com/images/I/81YDdI2Rk0L._UY445_.jpg&quot;:[297,445],&quot;http://ecx.images-amazon.com/images/I/81YDdI2Rk0L._UY500_.jpg&quot;:[334,500],&quot;http://ecx.images-amazon.com/images/I/81YDdI2Rk0L._UY679_.jpg&quot;:[453,679],&quot;http://ecx.images-amazon.com/images/I/81YDdI2Rk0L._UY550_.jpg&quot;:[367,550]}" style="max-height: 801px; max-width: 587px;">
                 *
                 *      </div>
                 */

                var htmlResult = _htmlScraper.GetHtml(url, ProxyUseTypes.Amazon, (proxy, status, content) =>
                {
                    return(status == HttpStatusCode.OK &&
                           pageParser.ValidateHtml(content));
                });
                if (htmlResult.IsSuccess)
                {
                    html           = htmlResult.Data;
                    downloadedSize = html.Length;

                    return(pageParser.GetLargeImages(html));
                }
                else
                {
                    CallHelper.ThrowIfFail(htmlResult);
                }
            }
            catch (Exception ex)
            {
                _log.Error("Parsing html page issue, url=" + url, ex);
                _log.Info("HTML: " + html);
                return(new CallResult <IList <ImageInfo> >()
                {
                    Status = CallStatus.Fail,
                    Exception = ex,
                });
            }
            return(new CallResult <IList <ImageInfo> >()
            {
                Status = CallStatus.Fail
            });
        }