private static void ParseRecipes(IIngredientClassifier classifier, IIngredientTypes types) { List <ClassifiedRecipe> result = new List <ClassifiedRecipe>(); IngregientClassifierSetup.ParseRecipesImpl("ingredients.xls", "./", classifier, types, result); string tempFile = System.IO.Path.GetTempFileName(); File.Copy("ingredients.xls", tempFile, true); System.Diagnostics.Process.Start("excel.exe", tempFile); }
public static void ParseRecipesImpl( string ingredientsLogFileName, string downloaderPath, IIngredientClassifier classifier, IIngredientTypes types, List <ClassifiedRecipe> recipes) { Dictionary <string, int> whyNotParsed = new Dictionary <string, int>(); var sources = new List <KeyValuePair <IRecipeDownloader, IRecipeParser> >(); // adding skinnytaste sources.Add( new KeyValuePair <IRecipeDownloader, IRecipeParser>( new SkinnyTasteRecipeDownloader("/", downloaderPath), new SkinnyTasteRecipeParser() )); // adding paleoleap sources.Add( new KeyValuePair <IRecipeDownloader, IRecipeParser>( new PaleoLeapRecipeDownloader("/", downloaderPath), new PaleoLeapRecipeParser() )); int recipesTotal = 0, recipesParsed = 0; HashSet <string> ingredientTypesUsed = new HashSet <string>(); HashSet <string> ingredientGroupsUsed = new HashSet <string>(); using (StreamWriter interpretedIngredients = new StreamWriter(ingredientsLogFileName)) { interpretedIngredients.WriteLine("url\tdeclaration\tdetail\tquantity\tname\tclass\ttype"); foreach (var source in sources) { IRecipeDownloader d = source.Key; IRecipeParser p = source.Value; foreach (RawWebPage rawWebPage in d.DownloadedAlready) { ++recipesTotal; rawWebPage.Name = RecipeDownloader.RemoveSpecialCharacters(rawWebPage.Name); RecipeParsingResult parsed = p.TryParseRawWebPage(rawWebPage); if (!parsed.Succeeded) { Console.Error.WriteLine("WARNING: failed to parse {0}:{1}:{2} ({3})", rawWebPage.Source, rawWebPage.Url, rawWebPage.FileName, parsed.ErrorMessage); if (!whyNotParsed.ContainsKey(parsed.ErrorMessage)) { whyNotParsed[parsed.ErrorMessage] = 0; } ++whyNotParsed[parsed.ErrorMessage]; continue; } ++recipesParsed; Recipe recipe = parsed.Result; ClassifiedRecipe makingSenseOfIt = classifier.ClassifyIngredients(recipe); if (makingSenseOfIt.Succeeded) { recipes.Add(makingSenseOfIt); } foreach (var ingredient in recipe.Ingredients) { // understand what kind of ingredient this is string ingredientClass = null; IngredientType ingredientType = null; if (null != ingredient.Name && !makingSenseOfIt.Classification.Classified.TryGetValue(ingredient.Name, out ingredientClass)) { ingredientClass = null; } if (null != ingredientClass && !types.ClassToType.TryGetValue(ingredientClass, out ingredientType)) { ingredientType = null; } if (null == ingredientClass) { ingredientClass = "[unknown]"; } if (null == ingredientType) { ingredientType = new IngredientType(); } interpretedIngredients.WriteLine("{0}\t{1}\t{2}\t=\"{3}\"\t{4}\t{5}\t{6}", d.UrlRoot + recipe.OriginalWebPage.Url, ingredient.Declaration, ingredient.Detail, ingredient.Quantity, ingredient.Name.Name, ingredientClass, ingredientType); ingredientTypesUsed.Add(ingredientType.Type); ingredientGroupsUsed.Add(ingredientType.Group); } } } } Console.WriteLine("Recipes: {0} total, {1} parsed, {2} distinct ingredient types, {3} distinct ingredient groups used", recipesTotal, recipesParsed, ingredientTypesUsed.Count, ingredientGroupsUsed.Count); List <KeyValuePair <string, int> > whyNot = new List <KeyValuePair <string, int> >(whyNotParsed); whyNot.Sort((KeyValuePair <string, int> x, KeyValuePair <string, int> y) => { return(-x.Value.CompareTo(y.Value)); }); foreach (var whyNotElement in whyNot) { Console.WriteLine("Failed to parse {0} recipe pages because {1}.", whyNotElement.Value, whyNotElement.Key); } }