private void ParseIngredients(ScannedRecipe scannedRecipe, ParserResult result) { var ingredients = new List <RecipeIngredient>(); var ingredientOrdinal = 1; scannedRecipe.IngredientSection.Content.ForEach(c => { try { var correctedIngredient = Regex.Replace(c.Trim(), @"^\d+(\)|\.)", string.Empty); if (!string.IsNullOrEmpty(correctedIngredient.Trim())) { var ingredient = IngredientParser.Parse(correctedIngredient); ingredient.Ordinal = ingredientOrdinal++; ingredients.Add(ingredient); } } catch (RecipeParseException ex) { result.Errors.Add(ParseError.FromException(ex, c)); } }); result.Output.Ingredients = ingredients; }
private void ParseDescription(ScannedRecipe scannedRecipe, ParserResult result) { var description = new StringBuilder(); scannedRecipe.DescriptionSection.Content.ForEach(c => { if (!string.IsNullOrEmpty(c)) { description.AppendLine(c); // we're appending line because that's how we found it. } }); result.Output.Description = description.ToString(); }
private bool ValidateScannedRecipe(ScannedRecipe scannedRecipe, ParserResult result) { if (!scannedRecipe.IngredientSection.Content.Any()) { result.Errors.Add(new ParseError { Character = -1, Line = -1, Description = "Could Not find any ingredients", ErrorCode = ParseErrorCode.NoIngredients, ErrorType = ErrorType.MissingSection, UnparsedLine = "" }); } if (!scannedRecipe.DescriptionSection.Content.Any()) { result.Errors.Add(new ParseError { Character = -1, Line = -1, Description = "Could Not find any description", ErrorCode = ParseErrorCode.NoDescription, ErrorType = ErrorType.MissingSection, UnparsedLine = "" }); } if (!scannedRecipe.InstructionSection.Content.Any()) { result.Errors.Add(new ParseError { Character = -1, Line = -1, Description = "Could Not find any instructions", ErrorCode = ParseErrorCode.NoInstructions, ErrorType = ErrorType.MissingSection, UnparsedLine = "" }); } if (result.Errors.Any(e => e.ErrorType == ErrorType.MissingSection)) { return(false); } return(true); }
private void ParseInstructions(ScannedRecipe scannedRecipe, ParserResult result) { var instructionList = new List <Step>(); var instructionOrdinal = 1; scannedRecipe.InstructionSection.Content.ForEach(c => { if (!string.IsNullOrEmpty(c)) { // get rid of Numbers if they exist at start of line. var correctedStep = Regex.Replace(c.Trim(), @"^\d+(\)|\.)", string.Empty); instructionList.Add(new Step { Ordinal = instructionOrdinal++, Instructions = correctedStep.Trim() }); } }); result.Output.Steps = instructionList; }
public IEnumerable <ScannedRecipe> Scan() { var scanned = new List <ScannedRecipe>(); var web = new HtmlWeb(); if (Encoding != null) { web.OverrideEncoding = Encoding; } HtmlDocument doc = null; try { doc = web.Load(Url); } catch (EncodingNotSupportedException) { if (Encoding != Encoding.UTF8) { Encoding = Encoding.UTF8; return(Scan()); } } var ingredientsCaptionNodes = ScanIngredientsCaptionNodes(doc); foreach (var ingredientsCaptionNode in ingredientsCaptionNodes) { Cancel?.ThrowIfCancellationRequested(); var ingredientsSectionNode = ScanIngredientsSectionNode(doc, ingredientsCaptionNode); var recipe = new ScannedRecipe(); scanned.Add(recipe); var dishCaptionNodes = ScanDishCaptionNodes(doc, ingredientsCaptionNode); var dishes = new List <ScannedDish>(); var servingsList = new List <int>(); foreach (var dishCaptionNode in dishCaptionNodes) { dishes.Add(ScanDish(doc, dishCaptionNode)); servingsList.Add(ScanServings(doc, dishCaptionNode)); } recipe.Dish = dishes.FirstOrDefault(); if (recipe.Dish == null) { recipe.Dish = new ScannedDish { Candidates = new List <Dish>(), } } ; recipe.Servings = servingsList.Min(); var ingredientSectionNodes = ScanIngredientSectionNodes(doc, ingredientsSectionNode); var ingredients = new List <ScannedIngredient>(); foreach (var subSectionNode in ingredientSectionNodes) { Cancel?.ThrowIfCancellationRequested(); var section = ScanIngredientSection(doc, subSectionNode); var ingredientsBySection = ScanIngredientsFromIngredientSection(doc, subSectionNode); ingredients.AddRange(ingredientsBySection.Select(i => { var d = IngredientItemParser.Parse(i); d.Section = section; return(d); })); } recipe.Ingredients = ingredients; // refine data var sections = recipe.Ingredients.Select(i => i.Section).Distinct(); var reliableSectionNames = sections.Where(s => s.Candidates.Count() == 1) .SelectMany(s => s.Candidates.Select(c => c.Name)).ToList(); foreach (var s in sections.Where(s => s.Candidates.Count() > 1)) { foreach (var sc in s.Candidates) { if (reliableSectionNames.Any(n => n == sc.Name)) { s.Candidates = s.Candidates.Where(c => c != sc); } } } } AppendTemporaryIndecies(scanned); return(scanned); }