private void ParseIngredients(ScannedRecipe scannedRecipe, ParserResult result)
        {
            var ingredients       = new List <RecipeIngredient>();
            var ingredientOrdinal = 1;

            scannedRecipe.IngredientSection.Content.ForEach(c =>
            {
                try
                {
                    var correctedIngredient = Regex.Replace(c.Trim(), @"^\d+(\)|\.)", string.Empty);
                    if (!string.IsNullOrEmpty(correctedIngredient.Trim()))
                    {
                        var ingredient     = IngredientParser.Parse(correctedIngredient);
                        ingredient.Ordinal = ingredientOrdinal++;
                        ingredients.Add(ingredient);
                    }
                }
                catch (RecipeParseException ex)
                {
                    result.Errors.Add(ParseError.FromException(ex, c));
                }
            });

            result.Output.Ingredients = ingredients;
        }
        private void ParseDescription(ScannedRecipe scannedRecipe, ParserResult result)
        {
            var description = new StringBuilder();

            scannedRecipe.DescriptionSection.Content.ForEach(c =>
            {
                if (!string.IsNullOrEmpty(c))
                {
                    description.AppendLine(c); // we're appending line because that's how we found it.
                }
            });

            result.Output.Description = description.ToString();
        }
        private bool ValidateScannedRecipe(ScannedRecipe scannedRecipe, ParserResult result)
        {
            if (!scannedRecipe.IngredientSection.Content.Any())
            {
                result.Errors.Add(new ParseError
                {
                    Character    = -1,
                    Line         = -1,
                    Description  = "Could Not find any ingredients",
                    ErrorCode    = ParseErrorCode.NoIngredients,
                    ErrorType    = ErrorType.MissingSection,
                    UnparsedLine = ""
                });
            }

            if (!scannedRecipe.DescriptionSection.Content.Any())
            {
                result.Errors.Add(new ParseError
                {
                    Character    = -1,
                    Line         = -1,
                    Description  = "Could Not find any description",
                    ErrorCode    = ParseErrorCode.NoDescription,
                    ErrorType    = ErrorType.MissingSection,
                    UnparsedLine = ""
                });
            }

            if (!scannedRecipe.InstructionSection.Content.Any())
            {
                result.Errors.Add(new ParseError
                {
                    Character    = -1,
                    Line         = -1,
                    Description  = "Could Not find any instructions",
                    ErrorCode    = ParseErrorCode.NoInstructions,
                    ErrorType    = ErrorType.MissingSection,
                    UnparsedLine = ""
                });
            }

            if (result.Errors.Any(e => e.ErrorType == ErrorType.MissingSection))
            {
                return(false);
            }
            return(true);
        }
        private void ParseInstructions(ScannedRecipe scannedRecipe, ParserResult result)
        {
            var instructionList    = new List <Step>();
            var instructionOrdinal = 1;

            scannedRecipe.InstructionSection.Content.ForEach(c =>
            {
                if (!string.IsNullOrEmpty(c))
                {
                    // get rid of Numbers if they exist at start of line.
                    var correctedStep = Regex.Replace(c.Trim(), @"^\d+(\)|\.)", string.Empty);

                    instructionList.Add(new Step
                    {
                        Ordinal      = instructionOrdinal++,
                        Instructions = correctedStep.Trim()
                    });
                }
            });

            result.Output.Steps = instructionList;
        }
示例#5
0
        public IEnumerable <ScannedRecipe> Scan()
        {
            var scanned = new List <ScannedRecipe>();

            var web = new HtmlWeb();

            if (Encoding != null)
            {
                web.OverrideEncoding = Encoding;
            }

            HtmlDocument doc = null;

            try
            {
                doc = web.Load(Url);
            }
            catch (EncodingNotSupportedException)
            {
                if (Encoding != Encoding.UTF8)
                {
                    Encoding = Encoding.UTF8;
                    return(Scan());
                }
            }

            var ingredientsCaptionNodes = ScanIngredientsCaptionNodes(doc);

            foreach (var ingredientsCaptionNode in ingredientsCaptionNodes)
            {
                Cancel?.ThrowIfCancellationRequested();

                var ingredientsSectionNode = ScanIngredientsSectionNode(doc, ingredientsCaptionNode);
                var recipe = new ScannedRecipe();
                scanned.Add(recipe);

                var dishCaptionNodes = ScanDishCaptionNodes(doc, ingredientsCaptionNode);

                var dishes       = new List <ScannedDish>();
                var servingsList = new List <int>();
                foreach (var dishCaptionNode in dishCaptionNodes)
                {
                    dishes.Add(ScanDish(doc, dishCaptionNode));
                    servingsList.Add(ScanServings(doc, dishCaptionNode));
                }
                recipe.Dish = dishes.FirstOrDefault();
                if (recipe.Dish == null)
                {
                    recipe.Dish = new ScannedDish
                    {
                        Candidates = new List <Dish>(),
                    }
                }
                ;
                recipe.Servings = servingsList.Min();

                var ingredientSectionNodes = ScanIngredientSectionNodes(doc, ingredientsSectionNode);
                var ingredients            = new List <ScannedIngredient>();
                foreach (var subSectionNode in ingredientSectionNodes)
                {
                    Cancel?.ThrowIfCancellationRequested();

                    var section = ScanIngredientSection(doc, subSectionNode);

                    var ingredientsBySection = ScanIngredientsFromIngredientSection(doc, subSectionNode);

                    ingredients.AddRange(ingredientsBySection.Select(i =>
                    {
                        var d     = IngredientItemParser.Parse(i);
                        d.Section = section;
                        return(d);
                    }));
                }
                recipe.Ingredients = ingredients;

                // refine data
                var sections             = recipe.Ingredients.Select(i => i.Section).Distinct();
                var reliableSectionNames = sections.Where(s => s.Candidates.Count() == 1)
                                           .SelectMany(s => s.Candidates.Select(c => c.Name)).ToList();
                foreach (var s in sections.Where(s => s.Candidates.Count() > 1))
                {
                    foreach (var sc in s.Candidates)
                    {
                        if (reliableSectionNames.Any(n => n == sc.Name))
                        {
                            s.Candidates = s.Candidates.Where(c => c != sc);
                        }
                    }
                }
            }

            AppendTemporaryIndecies(scanned);

            return(scanned);
        }