예제 #1
0
        /// <summary>
        /// Adds a food item to a lunch menu for a given weekday.
        /// </summary>
        public static void AddFoodItemForWeekDay(LunchMenu lunchMenu,
                                                 WeekDay currentWeekDay,
                                                 LunchMenuFeature foodFeature,
                                                 LunchMenuFeature priceFeature = null)
        {
            var lunchMenuFoodItem = new LunchMenuFoodItem
            {
                FoodItem = Utils.HtmlDecode(foodFeature.InnerText).Trim()
            };

            if (priceFeature != null)
            {
                lunchMenuFoodItem.Price = ParsePrice(priceFeature.InnerText);
            }
            else
            {
                // TODO: parse food item and price from the same string
            }

            if (lunchMenu.FoodItems.ContainsKey(currentWeekDay))
            {
                lunchMenu.FoodItems[currentWeekDay].Add(lunchMenuFoodItem);
            }
            else
            {
                lunchMenu.FoodItems.Add(currentWeekDay, new List<LunchMenuFoodItem>
                {
                    lunchMenuFoodItem
                });
            }
        }
        private void CompleteLunchRestaurantAnalysis(LunchMenu parseResult)
        {
            if (parseResult.FoodItems.IsEmpty())
            {
                Logger.InfoFormat("-> {0} - no results.\n", parseResult.RestaurantKey);
            }

            // DB update + print result
        }
예제 #3
0
        public LunchMenu ParseLunchMenu(LunchRestaurant restaurant)
        {
            var lunchMenu = new LunchMenu { RestaurantKey = restaurant.URL, Confidence = 0 };

            var doc = Utils.GetLunchRestaurantDocumentForUrl(restaurant.AbsoluteURL);
            if (ShouldSkipAnalysis(doc))
            {
                return lunchMenu;
            }

            var nodeCount = doc.HtmlDocument.DocumentNode.DescendantNodes().Count();

            // TODO: yliraja pisteille / noodeille jo Seeker puolelle - jumittaa muuten strategioissa
            if (nodeCount > 7000)
            {
                return lunchMenu;
            }

            // 1. first, let's collect and print the basic features for the document
            var features = SimpleFeatureDetector.DetectFeatures(doc.HtmlDocument)
                                                .Where(f => f.Type != LunchMenuFeatureType.Unknown)
                                                .ToList();

            // SimpleFeatureDetector.PrintDetectedFeatures(features);

            // 2. let's make sure all 5 weekdays are found
            if (!RequiredWeekdays.All(day => features.Any(f => f.Type == LunchMenuFeatureType.Weekday &&
                                                               f.InnerText.ToLowerInvariant().Contains(day))))
            {
                // we shouldn't continue with this strategy
                return lunchMenu;
            }

            // 3. let's find the index for the first weekday
            var firstWeekdayIndex = features.FindIndex(f => f.Type == LunchMenuFeatureType.Weekday &&
                                                            f.InnerText.ToLowerInvariant().Contains("maanantai"));
            if (firstWeekdayIndex == -1)
            {
                return lunchMenu;
            }

            // 4. let's loop through the detected features and collect food items for each weekday
            var currentWeekDay = WeekDay.Monday;
            for (var i = (firstWeekdayIndex + 1); i < features.Count; i++)
            {
                var currentFeature = features[i];
                LunchMenuFeature nextFeature = null;
                if (features.Count != (i + 1))
                {
                    nextFeature = features[i + 1];
                }

                if (currentFeature.Type == LunchMenuFeatureType.FoodItemAndPrice)
                {
                    AddFoodItemForWeekDay(lunchMenu, currentWeekDay, currentFeature);
                }
                else if (currentFeature.Type == LunchMenuFeatureType.FoodItem &&
                         nextFeature != null && nextFeature.Type == LunchMenuFeatureType.Price)
                {
                    AddFoodItemForWeekDay(lunchMenu, currentWeekDay, currentFeature, nextFeature);
                }
                else if (currentFeature.Type == LunchMenuFeatureType.Weekday)
                {
                    // we'll only accept detected weekday as the next weekday if weekdays are in correct order
                    var detectedWeekday = ParseWeekDay(currentFeature.InnerText);
                    var nextWeekday = (WeekDay)Enum.ToObject(typeof(WeekDay), (int)currentWeekDay + 1);

                    if (detectedWeekday != nextWeekday &&
                        (lunchMenu.FoodItems == null || lunchMenu.FoodItems.Keys.Count < 5))
                    {
                        return lunchMenu;
                    }

                    currentWeekDay = detectedWeekday;
                }
            }

            // 5. finally, let's make sure we have atleast some food items for each day
            var detectedDays = lunchMenu.FoodItems.Keys.Count;
            var foodItemsForEachWeekday = lunchMenu.FoodItems.GroupBy(f => f.Key).All(g => g.Count() > 0);

            if (detectedDays >= 5 && foodItemsForEachWeekday)
            {
                lunchMenu.Confidence = 1;
            }

            if (lunchMenu.FoodItems.Count > 0)
            {
                var result = new StringBuilder("results for {0}:\n".With(restaurant.AbsoluteURL));
                foreach (var weekDayItems in lunchMenu.FoodItems.GroupBy(f => f.Key).Where(g => g.Any()))
                {
                    result.AppendFormat("-> {0} - {1} food items\n", weekDayItems.Key, weekDayItems.Count());
                }

                Logger.Info(result.ToString());
            }
            else
            {
                Logger.InfoFormat("{0} - no results.\n", restaurant.AbsoluteURL);
            }

            return lunchMenu;
        }