private string readFile(string filePath, Encoding encoding, bool needMarkChapterHeaders) { string text = File.ReadAllText(filePath, encoding); if (filePath.EndsWith("html") || filePath.EndsWith("htm") || filePath.EndsWith("asp") || filePath.EndsWith("aspx") || filePath.EndsWith("php")) { text = HtmlScrapper.GetChineseContent(text, needMarkChapterHeaders); } else if (needMarkChapterHeaders && text.Contains("\n")) { string text2 = text.Substring(0, text.IndexOf('\n')); if (text2.Contains("第") && text2.Contains("章")) { text = "$CHAPTER_HEADER$. " + text.TrimStart(" \u3000\t".ToCharArray()); } } return(text); }
public HtmlLinkHandler(HtmlScrapper scrapper) { _scrapper = scrapper; }
public ScrapTest() { _dateTimeProvider = new Mock <IDateTimeProvider>(); _sut = new HtmlScrapper(_dateTimeProvider.Object); }