static bool VerboseFlag = false; // Print more extensive results static void Main(string[] args) { Console.OutputEncoding = Encoding.UTF8; // Name of file with records of EPH data per category is the only argument // This file is formatted by a Perl script, so we can depend on it to be well-formed // Walk through any switches int argument = 0; bool error = false; while (!error && args.Length > argument && args[argument][0] == '-') { switch (args[argument]) { case "-nonreview": NonReviewFlag = true; break; case "-verbose": VerboseFlag = true; break; default: error = true; System.Environment.Exit(-1); break; } ++argument; } if (error || args.Length - argument != 2) { Console.Error.WriteLine("usage: {0} [-verbose -nonreview] blog.xml spreadsheet.xls\n", System.AppDomain.CurrentDomain.FriendlyName); System.Environment.Exit(-1); } // Get the name of the file that contains the blogger output String blogFileName = args[argument++]; String spreadsheetFileName = args[argument++]; // Read it and parse it Console.WriteLine("Reading Blogger backup feed"); Syndicator syn = new Syndicator(blogFileName); int synErrors = syn.Errors; var synDict = syn.Validate(NonReviewFlag, VerboseFlag); // Make a new dictionary to hold what we get from the spreadsheet Dictionary <Uri, SpreadsheetRecord> SheetDict = new Dictionary <Uri, SpreadsheetRecord>(synDict.Count); // Now open the spreadsheet Console.WriteLine("\r\nOpening Excel Spreadsheet"); SheetAccessor sheet = new SheetAccessor(spreadsheetFileName, "stories"); // Find out how many rows there are int rowCount = sheet.LastRow; Console.WriteLine("Compare Spreadsheet URLs vs. Blog"); // Now loop through all the rows, processing them as needed int notReviewed = 0; int reviewed = 0; int urlErrors = 0; for (sheet.Row = 3; sheet.Row <= rowCount; ++sheet.Row) { SpreadsheetRecord rec = new SpreadsheetRecord(sheet); if (rec.BloggerLink != null && !rec.Reprint) { if (synDict.ContainsKey(rec.BloggerLink)) { if (SheetDict.ContainsKey(rec.BloggerLink)) { Console.WriteLine("Unexpected Duplicate Url: {0} is used for {1} and {2}\n", rec.BloggerLink, rec.Title, SheetDict[rec.BloggerLink].Title); ++urlErrors; } else { SheetDict.Add(rec.BloggerLink, rec); ++reviewed; } } else { Console.WriteLine("Spreadsheet contains URL {0} not in blog: {1}", rec.BloggerLink, rec.Title); ++urlErrors; } } else { ++notReviewed; } } Console.WriteLine("Spreadsheet contains {0} records, {1} reviewed and {2} not reviewed", rowCount - 2, reviewed, notReviewed); Console.WriteLine("Compare Blog URLs vs. Spreadsheet"); // Now check for spreadsheet items not in the blog foreach (Uri blogUrl in synDict.Keys) { if (!SheetDict.ContainsKey(blogUrl)) { Console.WriteLine("Blog contains URL {0} not in spreadsheet: {1}!", blogUrl, synDict[blogUrl].Title); ++urlErrors; } } Console.WriteLine("\r\nErrors in labels"); // Now check for spreadsheet items not in the blog int labelErrors = 0; foreach (Uri blogUrl in synDict.Keys) { if (!SheetDict.ContainsKey(blogUrl)) { continue; } var sheetItem = SheetDict[blogUrl]; var blogItem = synDict[blogUrl]; IEnumerable <string> differenceQuery = blogItem.Labels.Except(sheetItem.BlogLabels); bool isDifference = false; foreach (string s in differenceQuery) { Console.WriteLine("Blog\t\t{0}", s); isDifference = true; ++labelErrors; } // We expect blog to be a subset of spreadsheet, normally, so we only print this when we learn it's not. if (isDifference) { IEnumerable <string> differenceQuery2 = sheetItem.BlogLabels.Except(blogItem.Labels); foreach (string s in differenceQuery2) { Console.WriteLine("Spreadsheet\t{0}", s); } Console.WriteLine("{0}", synDict[blogUrl].Title); Console.WriteLine("{0}", blogUrl); Console.WriteLine(); } } Console.WriteLine("\r\nErrors in Titles"); // Now check for spreadsheet items whose titles don't match int titleErrors = 0; foreach (Uri blogUrl in synDict.Keys) { // These are reported on elsewhere if (!SheetDict.ContainsKey(blogUrl)) { continue; } string sheetTitle = SheetDict[blogUrl].BlogTitle; string blogTitle = synDict[blogUrl].Title; if (sheetTitle != blogTitle) { Console.WriteLine("Blog Title:\t\t{0}", blogTitle); Console.WriteLine("SpreadSheet Title:\t{0}", sheetTitle); Console.WriteLine("{0}", blogUrl); ++titleErrors; } } Console.WriteLine("Syndication errors: {0}", synErrors); Console.WriteLine("URL errors: {0}", urlErrors); Console.WriteLine("Label errors: {0}", labelErrors); Console.WriteLine("Title errors: {0}", titleErrors); }
public SpreadsheetRecord(SheetAccessor sheet) { string s; _Title = sheet.GetCell("A"); Authors = MakeStringArray(sheet.GetCell("B"), sheet.GetCell("C")); Translators = MakeStringArray(sheet.GetCell("D"), sheet.GetCell("E")); Editors = MakeStringArray(sheet.GetCell("F"), sheet.GetCell("G")); s = sheet.GetCell("H"); _Reprint = false; if (s != null) { Year = int.Parse(s); if (Year < 2015) { Year += 100; _Reprint = true; } } Magazine = sheet.GetCell("I"); Issue = sheet.GetCell("J"); s = sheet.GetCell("K"); if (s != null && s != "0") { IssueLink = new Uri(s); } MagIssue = sheet.GetCell("L"); s = sheet.GetCell("N"); if (s != null) { StoryLink = new Uri(s); } s = sheet.GetCell("P"); if (s != null) { double x; if (Double.TryParse(s, out x)) { PublicationDate = DateTime.FromOADate((double)x); } } s = sheet.GetCell("Q"); if (s != null) { double x; if (Double.TryParse(s, out x)) { ReviewDate = DateTime.FromOADate((double)x); } } s = sheet.GetCell("Q"); if (s != null) { WordCount = int.Parse(s); } Category = sheet.GetCell("S"); SubGenre = sheet.GetCell("T"); Blurb = sheet.GetCell("U"); s = sheet.GetCell("V"); if (s != null) { Rating = int.Parse(s); } Note = sheet.GetCell("W"); Series = sheet.GetCell("X"); Pitch = sheet.GetCell("Y"); SffCat = sheet.GetCell("Z"); SettingTime = sheet.GetCell("AA"); SettingPlace = sheet.GetCell("AB"); Tone = sheet.GetCell("AC"); // Too much is packed into the keyword string s = sheet.GetCell("AD"); if (s != null) { string[] sa = s.Split('|'); if (sa[0].Length > 0) { Keywords = sa[0].Split(','); } if (sa.Length > 1) { Protagonist = sa[1]; } } _BlogTitle = sheet.GetCell("AE"); s = sheet.GetCell("AF"); if (s != null) { BlogLabels = s.Split(','); for (int i = 0; i < BlogLabels.Length; ++i) { BlogLabels[i] = BlogLabels[i].Trim(' '); } } PermaLink = sheet.GetCell("AG"); Body = sheet.GetCell("AR"); Review = sheet.GetCell("AS"); s = sheet.GetCell("AT"); if (s != null && s.Length > 0) { RSRLink = new Uri(s); } }