Ejemplo n.º 1
0
        public bool HasSimilarElements(ScrapedElement targetElement, IEnumerable<ScrapedElement> candidateElements, decimal maxDistance, out Tuple<ElementMatch<ScrapedElement>, decimal>[] similarElements)
        {
            var targetElementTextLength = targetElement.Text.Length;

            var candidates = GetElementsWithSimilarTag(targetElement.Tag, candidateElements);

            var tuples = (from c in candidates
                          let maxLength = Math.Max(c.Text.Length, targetElementTextLength)
                          let textDiff = maxLength.GetPercentageOfTotal(TextDistance(c.Text, targetElement.Text))
                          let attrDiff = AttributesSimilarity(targetElement, c)
                          where textDiff <= _maxTextDiffThreshold && attrDiff <= _maxAttributeDiffThreshold
                          select new Tuple<ScrapedElement, decimal>(c, (textDiff * _textWeight) + (attrDiff * _attributeWeight))).ToArray();

            tuples = (from c in tuples
                      let distRect = maxDistance.GetPercentageOfTotal(RectangleUtil.DistanceBetweenRectangles(c.Item1.Location, targetElement.Location))
                      let areaRect = RectangleUtil.AreaChangeAsPercent(c.Item1.Location, targetElement.Location)
                      let tagDist = c.Item1.Tag.Equals(targetElement.Tag) ? (100 * _tagWeight) : 0
                      let total = c.Item2 + tagDist + (distRect * _distanceWeight) + (areaRect * _areaWeight)
                      orderby total
                      select new Tuple<ScrapedElement, decimal>(c.Item1, total)).ToArray();

            similarElements = tuples.Select(t => new Tuple<ElementMatch<ScrapedElement>, decimal>(new ElementMatch<ScrapedElement> { This = t.Item1 }, t.Item2)).ToArray();

            return tuples.Length > 0;
        }
Ejemplo n.º 2
0
        private static bool ElementsEqual(ScrapedElement e1, ScrapedElement e2)
        {
            /* Some comparisons are redundant - no need to check the html if we've checked
             *  the attributes and tag (and vice versa).  However, checking the tag and attributes
             *  is more accurate because order doesn't matter, whereas the html could have the
             *  attributes in any order.
             */
            var e1Null = ReferenceEquals(null, e1);
            var e2Null = ReferenceEquals(null, e2);

            if (e1Null && e2Null) return true;
            if (e1Null || e2Null) return false;

            return (e1.Tag == e2.Tag
                    && e1.Location.Equals(e2.Location)
                    && e1.Css.DictionaryEqual(e2.Css)
                    && e1.Attributes.DictionaryEqual(e2.Attributes)
                    && e1.Text == e2.Text);
        }
Ejemplo n.º 3
0
        public void ExactMatch()
        {
            // Arrange
            var scrapedElement = new ScrapedElement
                {
                    Attributes = new Dictionary<string, string> { { "id", "foo" } },
                    Css = new Dictionary<string, string> { { "foo", "bar" } },
                    Text = "foo",
                    Tag = "div",
                    Location = new Rectangle(10, 10, 15, 15),
                };
            ScrapedElement match;

            // Act
            var found = _elementMapper.HasExactMatch(scrapedElement, _scrapedElements, out match);

            // Assert
            Assert.IsTrue(found);
            Assert.AreEqual("foo", match.Text);
        }
        public void HasSimilarTextMatch()
        {
            // Arrange
            var scrapedElement = new ScrapedElement
            {
                Text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque justo lorem, lacinia ac accumsan ut, auctor vel ligula. Fusce ut arcu purus. Proin id metus sit amet est venenatis auctor. Curabitur nunc elit, pretium at mattis luctus, molestie quis enim. Mauris eu ipsum a ligula auctor facilisis ac sed arcu. Vestibulum tristique lobortis nibh id blandit. Suspendisse nulla elit, dictum eget lobortis et, luctus at felis. Duis aliquet, quam lobortis congue rutrum, mauris libero posuere est, eu faucibus leo risus eu arcu. Nam viverra lobortis sem egestas fringilla.",
                Tag = "div",
                Location = new Rectangle(10, 10, 15, 15),
                Attributes = new Dictionary<string, string> { { "foo", "bar" } },
            };
            Tuple<ElementMatch<ScrapedElement>, decimal>[] matches;

            // Act
            var found = _elementMapper.HasSimilarElements(scrapedElement, _scrapedElements, 50M, out matches);

            // Assert
            Assert.IsTrue(found);
            Assert.IsTrue(ReferenceEquals(matches[0].Item1.This, _scrapedElements[0]));
            Assert.IsTrue(ReferenceEquals(matches[1].Item1.This, _scrapedElements[1]));
            Assert.IsTrue(ReferenceEquals(matches[2].Item1.This, _scrapedElements[2]));
        }
Ejemplo n.º 5
0
        public void IdMatch()
        {
            // Arrange
            var scrapedElement = new ScrapedElement { Attributes = new Dictionary<string, string> { { "id", "foo" } }, };
            ScrapedElement match;

            // Act
            var found = _elementMapper.HasIdMatch(scrapedElement, _scrapedElements, out match);

            // Assert
            Assert.IsTrue(found);
            Assert.IsTrue(match.Attributes["id"] == "foo");
        }
Ejemplo n.º 6
0
        public void NoIdMatch()
        {
            // Arrange
            var scrapedElement = new ScrapedElement { Attributes = new Dictionary<string, string> { { "id", "foobar" } }, };
            ScrapedElement match;

            // Act
            var found = _elementMapper.HasIdMatch(scrapedElement, _scrapedElements, out match);

            // Assert
            Assert.IsFalse(found);
            Assert.IsNull(match);
        }
Ejemplo n.º 7
0
        public void NoExactMatch()
        {
            // Arrange
            var scrapedElement = new ScrapedElement
                {
                    Text = "foo",
                    Tag = "span",
                    Location = new Rectangle(10, 10, 15, 15),
                    Attributes = new Dictionary<string, string> { { "id", "foo" } },
                    Css = new Dictionary<string, string> { { "foo", "bar" } },
                };
            ScrapedElement match;

            // Act
            var found = _elementMapper.HasExactMatch(scrapedElement, _scrapedElements, out match);

            // Assert
            Assert.IsFalse(found);
            Assert.IsNull(match);
        }
Ejemplo n.º 8
0
        public void MatchUnorderedHtmlAttributes()
        {
            // Arrange
            var scrapedElement = new ScrapedElement
                {
                    Html = @"<div id=""foo"" style=""bar"" class=""baz"">foo</div>",
                    Text = "foo",
                    Tag = "div",
                    Location = new Rectangle(10, 10, 15, 15),
                    Attributes = new Dictionary<string, string> { { "style", "bar" }, { "class", "baz" }, { "id", "foo" } },
                    Css = new Dictionary<string, string> { { "foo", "bar" } },
                };
            var scrapedElements = new[]
                {
                    new ScrapedElement
                        {
                            Html = @"<div id=""foo"" style=""bar"" class=""baz"">foo</div>",
                            Text = "foo",
                            Tag = "div",
                            Location = new Rectangle(10, 10, 15, 15),
                            Attributes = new Dictionary<string, string> { { "id", "foo" }, { "style", "bar" }, { "class", "baz" } },
                            Css = new Dictionary<string, string> { { "foo", "bar" } },
                        }
                };
            ScrapedElement match;

            // Act
            var found = _elementMapper.HasExactMatch(scrapedElement, scrapedElements, out match);

            // Assert
            Assert.IsTrue(found);
        }
Ejemplo n.º 9
0
        // TODO: high value testing area
        private bool HasChanges(ScrapedElement element, Image pageScreenshotA, Image pageScreenshotB, Scrape pageA, Scrape pageB, out ElementChangeResult changes)
        {
            var correspondingScrapedElement = element.CorrespondingScrapedElement;
            changes = new ElementChangeResult();

            var changed = false;

            // Location changed
            decimal percentageChange = 0;
            if (!correspondingScrapedElement.Location.Equals(element.Location))
            {
                changed = true;
                changes.LocationChanges = GetLocationChanges(correspondingScrapedElement.Location, element.Location, out percentageChange);
            }

            changes.LocationPercentageChange = percentageChange;

            // Css changed
            percentageChange = 0;
            if (!correspondingScrapedElement.Css.DictionaryEqual(element.Css))
            {
                changed = true;
                changes.CssChanges = GetCssChanges(correspondingScrapedElement.Css, element.Css, out percentageChange);
            }

            changes.CssPercentageChange = percentageChange;

            // Html changed
            percentageChange = 0;
            if (correspondingScrapedElement.Html != element.Html)
            {
                changed = true;
                changes.HtmlChanges = GetStringChanges(correspondingScrapedElement.Html, element.Html, out percentageChange);
            }

            changes.HtmlPercentageChange = percentageChange;

            // Text changed
            percentageChange = 0;
            if (correspondingScrapedElement.Text != element.Text)
            {
                changed = true;
                changes.TextChanges = GetStringChanges(correspondingScrapedElement.Text, element.Text, out percentageChange);
            }

            changes.TextPercentageChange = percentageChange;

            // Pixels changed
            changes.PixelChanges = GetPixelChanges(pageScreenshotA, pageScreenshotB, element, correspondingScrapedElement, pageA, pageB, out percentageChange);
            changes.PixelPercentageChange = percentageChange;
            if (percentageChange > 0M)
            {
                changed = true;
            }

            // Store location on screenshot
            changes.LocationOnScreenshot = element.LocationOnScreenshot;

            return changed;
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Calculates the pixel difference between two elements.
        /// </summary>
        /// <param name="psA">The screenshot of the first page.</param>
        /// <param name="psB">The screenshot of the second page.</param>
        /// <param name="eA">The first element.</param>
        /// <param name="eB">The second element.</param>
        /// <param name="pA">The information about the first page.</param>
        /// <param name="pB">The information about the second page.</param>
        /// <param name="percentageChange">The change as a percentage.</param>
        /// <returns>An object containing information about the pixel differences.</returns>
        private PixelChange GetPixelChanges(Image psA, Image psB, ScrapedElement eA, ScrapedElement eB, Scrape pA, Scrape pB, out decimal percentageChange)
        {
            PixelChange pixelChange = null;

            Image originalA       = ImageUtil.CropImage(psA, eA.LocationOnScreenshot);
            Image originalB       = ImageUtil.CropImage(psB, eB.LocationOnScreenshot);
            Region regionA        = ImageUtil.GetClippedRegion(eA.LocationOnScreenshot, pA.Elements.Select(e => e.LocationOnScreenshot));
            Region regionB        = ImageUtil.GetClippedRegion(eB.LocationOnScreenshot, pB.Elements.Select(e => e.LocationOnScreenshot));
            Bitmap clippedA       = ImageUtil.GetClippedImage(new Size(eA.LocationOnScreenshot.Width, eA.LocationOnScreenshot.Height), originalA, regionA);
            Bitmap clippedB       = ImageUtil.GetClippedImage(new Size(eB.LocationOnScreenshot.Width, eB.LocationOnScreenshot.Height), originalB, regionB);
            Bitmap diffMask       = ImageUtil.BitmapDiff(clippedA, clippedB, _ia, out percentageChange);
            Bitmap fromRegionMask = ImageUtil.DrawRegionAsMasks(new Size(eA.LocationOnScreenshot.Width, eA.LocationOnScreenshot.Height), regionA, originalA, _ia);
            Bitmap toRegionMask   = ImageUtil.DrawRegionAsMasks(new Size(eB.LocationOnScreenshot.Width, eB.LocationOnScreenshot.Height), regionB, originalB, _ia);

            if (percentageChange > 0 || eB.LocationOnScreenshot.Width != eA.LocationOnScreenshot.Width || eB.LocationOnScreenshot.Height != eA.LocationOnScreenshot.Height)
            {
                pixelChange = new PixelChange
                {
                    From        = originalA,
                    FromClipped = clippedA,
                    FromMask    = fromRegionMask,
                    To          = originalB,
                    ToClipped   = clippedB,
                    ToMask      = toRegionMask,
                    Diff        = diffMask
                };
            }

            regionA.Dispose();
            regionB.Dispose();

            return pixelChange;
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Returns information about the given element.
        /// </summary>
        /// <param name="pageScreenshot">The screenshot of the page containing the element.</param>
        /// <param name="scrapedElement">The element.</param>
        /// <param name="page">The page containing the element.</param>
        /// <returns>An ElementAddRemoveResult object.</returns>
        private ElementAddRemoveResult GetElementData(Image pageScreenshot, ScrapedElement scrapedElement, Scrape page)
        {
            var originalA = ImageUtil.CropImage(pageScreenshot, scrapedElement.LocationOnScreenshot);
            var regionA   = ImageUtil.GetClippedRegion(scrapedElement.LocationOnScreenshot, page.Elements.Select(e => e.LocationOnScreenshot));
            var clippedA  = ImageUtil.GetClippedImage(new Size(scrapedElement.LocationOnScreenshot.Width, scrapedElement.LocationOnScreenshot.Height), originalA, regionA);
            var imageMask = ImageUtil.DrawRegionAsMasks(new Size(scrapedElement.LocationOnScreenshot.Width, scrapedElement.LocationOnScreenshot.Height), regionA, originalA, _ia);

            var add = new ElementAddRemoveResult
            {
                Attributes   = scrapedElement.Attributes,
                Html         = scrapedElement.Html,
                Text         = scrapedElement.Text,
                Location     = scrapedElement.LocationOnScreenshot,
                Tag          = scrapedElement.Tag,
                Image        = originalA,
                ImageClipped = clippedA,
                ImageMask    = imageMask
            };

            return add;
        }
Ejemplo n.º 12
0
        public Scrape FakeScrape(FakeScrapeParams fakeScrapeParams)
        {
            fakeScrapeParams.Cookies = new List<string>();

            #region Header/Cookie Content

            if (!fakeScrapeParams.Cookies.Any())
            {
                fakeScrapeParams.Cookies.AddRange(FakeCookies());
            }
            var headerOne = new List<string>()
            {
                "Content-Length:194",
                "Cache-Control:public, must-revalidate",
                "Content-Type:application/x-javascript",
                "Date:Thu, 20 Sep 2012 17:15:03 GMT",
                "ETag:JsJt380DknGc4kAEEn76og=="
            };
            var headerTwo = new List<string>()
            {
                "Content-Length:17423",
                "Cache-Control:public, must-revalidate",
                "Content-Type:application/x-javascript",
                "Date:Thu, 20 Sep 2012 17:15:03 GMT",
                "ETag:qloGz7WY45YMKQ1Fmuuw8A=="
            };
            var headerThree = new List<string>()
            {
                "Content-Length:2552",
                "Cache-Control:public, must-revalidate",
                "Content-Type:image/gif",
                "Date:Thu, 20 Sep 2012 17:15:03 GMT",
                "ETag:UAFdRlkmdsJ1EGIoGalWng=="
            };

            #endregion

            if (fakeScrapeParams.Resources == null)
            {
                //Uri, statusCode, StatusDesc, Headers
                var first = GetSession().List<Resource>(3).First(1)
                    .Impose(x => x.Uri, "http://c.mfcreativedev.com/webparts/banner/Banner.js?v=c5589edb")
                    .Impose(x => x.StatusCode, HttpStatusCode.OK)
                    .Impose(x => x.StatusDescription, "OK")
                    .Impose(x => x.Headers, headerOne)
                    .Next(1)
                    .Impose(x => x.Uri, "http://c.mfcreativedev.com/webparts/header/HeaderV1_2.js?v=730f5c7b1")
                    .Impose(x => x.StatusCode, HttpStatusCode.OK)
                    .Impose(x => x.StatusDescription, "OK")
                    .Impose(x => x.Headers, headerTwo)
                    .Next(1)
                    .Impose(x => x.Uri, "http://c.mfcreativedev.com/s/0/p/0/i/ances_logo.gif")
                    .Impose(x => x.StatusCode, HttpStatusCode.OK)
                    .Impose(x => x.StatusDescription, "OK")
                    .Impose(x => x.Headers, headerThree)
                    .All().Get().ToArray();
                fakeScrapeParams.Resources = first;
            }

            if (fakeScrapeParams.Elements == null)
            {
                var elements = new List<ScrapedElement>();
                var ele1 = new ScrapedElement()
                {
                    Attributes                  = new Dictionary<string, string>() {{"id", "mngb"}},
                    CorrespondingScrapedElement = null,
                    Css                         = new Dictionary<string, string>(),
                    Html                        = "<div id=\"mngb\"></div>",
                    Location                    = new Rectangle(0, 0, 800, 30),
                    LocationOnScreenshot        = new Rectangle(0, 0, 800, 30),
                    Tag                         = "div",
                    Text                        = ""
                };
                elements.Add(ele1);
                fakeScrapeParams.Elements = new List<ScrapedElement>(elements);
            }
            var scr = new Scrape
            {
                Id                    = new ObjectId(fakeScrapeParams.Id),
                ExcludeJquerySelector = fakeScrapeParams.Exclude,
                IncludeJquerySelector = fakeScrapeParams.Include,
                Script                = fakeScrapeParams.Script,
                BoundingRectangle     = fakeScrapeParams.Bounding,
                Path                  = new StringAsReference {Value = fakeScrapeParams.Path},
                Elements              = fakeScrapeParams.Elements,
                Resources             = fakeScrapeParams.Resources,
                Html                  = fakeScrapeParams.Html,
                HtmlRef               = new StringAsReference {Value = fakeScrapeParams.HtmlRef},
                Url                   = fakeScrapeParams.Url,
                Screenshot            = fakeScrapeParams.ScreenShot,
                ScreenshotRef         = new StringAsReference {Value = fakeScrapeParams.ScreenShotRef},
                ViewportSize          = fakeScrapeParams.ViewportSize == null ? new Size(800 , 600) : fakeScrapeParams.ViewportSize.Value,
                Browser               = fakeScrapeParams.Browser,
                BrowserVersion        = fakeScrapeParams.BrowserVersion,
                TimeStamp             = fakeScrapeParams.TimeStamp == null ? DateTime.Now : fakeScrapeParams.TimeStamp.Value,
                Platform              = fakeScrapeParams.Platform,
                Cookies               = fakeScrapeParams.Cookies
            };
            return scr;
        }
Ejemplo n.º 13
0
        public bool HasIdMatch(ScrapedElement element, IEnumerable<ScrapedElement> elements, out ScrapedElement idMatch)
        {
            idMatch = elements.FirstOrDefault(e => IdsMatch(e, element));

            return idMatch != null;
        }
Ejemplo n.º 14
0
        public bool HasExactMatch(ScrapedElement element, IEnumerable<ScrapedElement> elements, out ScrapedElement exactMatch)
        {
            exactMatch = elements.FirstOrDefault(e => ElementsEqual(e, element));

            return exactMatch != null;
        }
Ejemplo n.º 15
0
        private bool IdsMatch(ScrapedElement eA, ScrapedElement eB)
        {
            string idB;
            string idA;
            if (!eB.Attributes.TryGetValue("id", out idB)) return false;
            if (!eA.Attributes.TryGetValue("id", out idA)) return false;

            return idA == idB;
        }
Ejemplo n.º 16
0
        /// <summary>
        /// Determines the attribute similarity of two given attribute dictionaries.
        /// </summary>
        /// <param name="eA">The first element.</param>
        /// <param name="eB">The second element.</param>
        /// <returns>A decimal value.</returns>
        private decimal AttributesSimilarity(ScrapedElement eA, ScrapedElement eB)
        {
            var keysA = eA.Attributes.Select(k => k.Key).ToArray();
            var keysB = eB.Attributes.Select(k => k.Key).ToArray();
            var added = keysB.Except(keysA).ToArray();
            var deleted = keysA.Except(keysB).ToArray();
            var common = keysA.Except(deleted).ToArray();
            var changed = (from key in common
                           where eA.Attributes[key] != eB.Attributes[key]
                           select new AttributeChangeDetail { From = eA.Attributes[key], To = eB.Attributes[key], Key = key }).ToArray();

            var unchanged = common.Except(changed.Select(i => i.Key)).ToArray();

            var addedDeleted = (added.Length + added.Length) * 100;
            var changedPercentage = (from detail in changed
                                     let maxLength = Math.Max(detail.From.Length, detail.To.Length)
                                     let distance = TextDistance(detail.From, detail.To)
                                     select maxLength.GetPercentageOfTotal(distance)).Sum();

            var total = (unchanged.Length + changed.Length + deleted.Length + added.Length) * 100;
            var change = total.GetPercentageOfTotal(addedDeleted + changedPercentage);

            return change;
        }