private void ParseHTML(CommandLineLogger logger, ParserVersionEnum version, string caseName, ref bool hasPreservation) { Stopwatch stopWatch = new Stopwatch(); logger.LogInfo("Begin parsing files."); stopWatch.Start(); foreach (ExtractFileInfo htmlx in _htmlToParse) { ParserManager parser = new ParserManager(logger) { CaseNumber = caseName, HtmlToLoad = htmlx.File_Path, HtmlToRecord = htmlx.IsTemporary ? htmlx.ParentFile_Path : htmlx.File_Path, IsPreservation = false, DatabasePreservationNoPrefix = string.Empty, Version = version, DefaultDirectory = _defaultDirectory }; logger.LogInfo("Processing " + htmlx.File_Path); try { if (htmlx.File_Path.ToLower().Contains("preservation")) { hasPreservation = true; /*Preservation-1, Preservation-2, Preservation-3 */ parser.IsPreservation = true; DirectoryInfo di = new DirectoryInfo(htmlx.File_Path); string p = di.Parent.Name; //goes up to parent directory, preservation if (!p.ToLower().Contains("preservation")) { p = di.Parent.Parent.Name; //goes up to parent directory, preservation\folderX\index.html } parser.DatabasePreservationNoPrefix = p.Replace("-", "_"); //sqllite doesn't like sql queries ref tables with a '-', change to '_' } if (!htmlx.File_Name.ToUpper().Contains("PRESERVATION")) { switch (htmlx.File_Name.ToUpper().Trim()) { case "ABOUT_ME.HTML": parser.AboutMeParse(htmlx); break; case "ACCOUNT_STATUS_HISTORY.HTML": parser.AccountStatusHistoryParse(htmlx); break; case "COMMENTS.HTML": parser.CommentsParse(htmlx); break; case "DEVICES.HTML": parser.DevicesParse(htmlx); break; case "DIRECT_SHARES.HTML": parser.DirectSharesParse(htmlx); break; case "DIRECT_STORIES.HTML": parser.DirectStoriesParse(htmlx); break; case "FOLLOWERS.HTML": parser.FollowersParse(htmlx); break; case "FOLLOWING.HTML": parser.FollowingParse(htmlx); break; case "GENDER.HTML": parser.GenderParse(htmlx); break; case "INCOMING_FOLLOW_REQUESTS.HTML": parser.IncomingFollowRequestsParse(htmlx); break; case "INDEX.HTML": parser.IndexParse(htmlx); break; case "LIKES.HTML": parser.LikesParse(htmlx); break; case "LINKED_ACCOUNTS.HTML": parser.LinkedAccountsParse(htmlx); break; case "LIVE_VIDEOS.HTML": parser.LiveVideosParse(htmlx); break; case "UNIFIED_MESSAGES.HTML": parser.UnifiedMessagesParse(htmlx); break; case "NAME_CHANGES.HTML": parser.NameChangesParse(htmlx); break; case "NCMEC_REPORTS.HTML": parser.NcmecReportsParse(htmlx); break; case "PHOTOS.HTML": parser.PhotosParse(htmlx); break; case "POPULAR_BLOCK.HTML": parser.PopularBlockParse(htmlx); break; case "PRIVACY_SETTINGS.HTML": parser.PrivacySettingsParse(htmlx); break; case "PROFILE_PICTURE.HTML": parser.ProfilePictureParse(htmlx); break; case "VANITY_CHANGES.HTML": parser.VanityChangesParse(htmlx); break; case "VIDEOS.HTML": parser.VideosParse(htmlx); break; case "WEBSITE.HTML": parser.WebsiteParse(htmlx); break; default: logger.LogWarning("Unknown Section - \"Unknown section:" + htmlx.File_Name + "\". Please contact NDCAC with section name and test data to improve parsing functionality"); #if DEBUG throw new ApplicationException(htmlx.File_Name); #endif break; } } if (parser.LocationData != null && parser.LocationData.Any()) { _locationData.AddRange(parser.LocationData); } AddSectionToPreservationList(htmlx.File_Name, parser.PreservationQueries); } catch (SectionEmptyException ex) { logger.LogWarning("Parsing " + ex.Message + " section complete - section contains no data: Excluding from database."); } catch (MissingTestDataException ex) { logger.LogWarning("Parsing " + ex.Message + " section skipped - parser not implemented: No test data available."); } catch (NotImplementedException ex) { logger.LogError("Parsing " + ex.Message + " section failed: parser not implemented.", ex); } } WriteLocationData(caseName); stopWatch.Stop(); logger.LogInfo("Parsing files complete (Time: " + stopWatch.Elapsed.GetFormattedElapsedTime() + ")... "); }