예제 #1
0
        public void ExpressionTest_Match()
        {
            BoundExpressionGroup vendorGroup1 = new BoundExpressionGroup("Vendor", new BoundSelector("<div>", "автомобиль1</div> <div>", "</div>"));
            BoundExpressionGroup vendorGroup2 = new BoundExpressionGroup("Vendor", new BoundSelector("<div>", "автомобиль2</div> <div>", "</div>"));
            BoundExpressionGroup yearGroup = new BoundExpressionGroup("Year", new BoundSelector("выпуска <br>", "</br>"));

            string content = @"<div>Продается автомобиль1</div> <div>Opel</div>Год выпуска <br> 2000 </br>
                           Delimitters <div>Продается автомобиль2</div> <div>BMW</div> Год выпуска <br>1111 </br> Delimitters";

            var tokens = new List<BoundExpressionToken>();
            tokens.Add(new BoundExpressionCondition(
                vendorGroup1, new BoundExpressionGroup[]{vendorGroup1, yearGroup},
                vendorGroup2, new BoundExpressionGroup[]{vendorGroup2, yearGroup}));

            BoundExpression expr = new BoundExpression(tokens);

            var matches = expr.Matches(content);

            Assert.AreEqual(matches.Count, 2);

            Assert.AreEqual(matches[0]["Vendor"], "Opel");
            Assert.AreEqual(matches[0][yearGroup.Name], "2000");

            Assert.AreEqual(matches[1]["Vendor"], "BMW");
            Assert.AreEqual(matches[1][yearGroup.Name], "1111");
        }
 public BoundExpressionCondition(BoundExpressionGroup leftCondition, BoundExpressionToken[] leftTokens,
     BoundExpressionGroup rightCondition, BoundExpressionToken[] rightTokens)
 {
     _leftCondition = leftCondition;
     _leftTokens = leftTokens;
     _rightCondition = rightCondition;
     _rightTokens = rightTokens;
 }
        public override Selector CreateSelector()
        {
            var adTerminationSelector = new BoundSelector("</tab", "le>");

            var date = new BoundExpressionGroup("Date", new BoundSelector("strup> &#8593; </a>", "</div></td>"));
            var price = new BoundExpressionGroup("Price", new BoundSelector("class=zhyofoto>", "р<br>"),
                                                     new BoundSelector("class=tprice>", "р<br>"));

            var detailUrl = new BoundExpressionGroup("DetailUrl", new BoundSelector("<A href='newflat-", "'") { StringComparison = StringComparison.OrdinalIgnoreCase });
            var rooms = new BoundExpressionGroup("Rooms", new BoundSelector("class=site3", ">", "(") { Superposition = true });

            var size = new BoundExpressionGroup("Size", new BoundSelector("(", ")"));
            var address = new BoundExpressionGroup("Address", new BoundSelector(",", ","));
            var floor = new BoundExpressionGroup("Floor", new BoundSelector("этаж", " ,"), adTerminationSelector);

            var withoutDate = new BoundExpressionGroup("C1", new BoundSelector("class=strup>", "</a> </td>"));
            var withDate = new BoundExpressionGroup("C2", new BoundSelector("class=strup>", "</div></td>"));

            return new BoundExpressionSelector("Ad", new BoundExpression(
                    new BoundExpressionCondition(
                        withoutDate, new BoundExpressionToken[]
                        {
                            price,
                            detailUrl,
                            rooms,
                            size,
                            address,
                            floor
                        },

                        withDate, new BoundExpressionToken[]
                        {
                            date,
                            price,
                            detailUrl,
                            rooms,
                            size,
                            address,
                            floor
                        }
                    )
                ));
        }