/// <summary> /// Initializes a new instance of NReadabilityWebTranscoder. /// Allows passing in custom-constructed NReadabilityTranscoder, /// and a custom IUrlFetcher. /// </summary> /// <param name="transcoder">A NReadabilityTranscoder.</param> /// <param name="urlFetcher">IFetcher instance to download content.</param> /// <param name="pageSeparatorBuilder">A function that creates a HTML fragment for page separator. It takes the page number as an argument.</param> public NReadabilityWebTranscoder(NReadabilityTranscoder transcoder, IUrlFetcher urlFetcher, Func<int, string> pageSeparatorBuilder) { _transcoder = transcoder; _urlFetcher = urlFetcher; _sgmlDomSerializer = new SgmlDomSerializer(); _pageSeparatorBuilder = pageSeparatorBuilder; }
/// <summary> /// Initializes a new instance of NReadabilityWebTranscoder. /// Allows passing in custom-constructed NReadabilityTranscoder, /// and a custom IUrlFetcher. /// </summary> /// <param name="transcoder">A NReadabilityTranscoder.</param> /// <param name="urlFetcher">IFetcher instance to download content.</param> /// <param name="pageSeparatorBuilder">A function that creates a HTML fragment for page separator. It takes the page number as an argument.</param> public NReadabilityWebTranscoder(NReadabilityTranscoder transcoder, IUrlFetcher urlFetcher, Func <int, string> pageSeparatorBuilder) { _transcoder = transcoder; _urlFetcher = urlFetcher; _sgmlDomSerializer = new SgmlDomSerializer(); _pageSeparatorBuilder = pageSeparatorBuilder; }
/// <summary> /// Initializes a new instance of NReadabilityWebTranscoder. /// Allows passing in custom-constructed NReadabilityTranscoder, /// and a custom IUrlFetcher. This overload is mostly used for testing. /// </summary> /// <param name="transcoder">A NReadabilityTranscoder.</param> /// <param name="urlFetcher">IFetcher instance to download content.</param> public NReadabilityWebTranscoder(NReadabilityTranscoder transcoder, IUrlFetcher urlFetcher) { _transcoder = transcoder; _urlFetcher = urlFetcher; _sgmlDomSerializer = new SgmlDomSerializer(); _pageSeparatorBuilder = _DefaultPageSeparatorBuilder; }
/// <summary> /// Initializes a new instance of NReadabilityTranscoder. Allows setting all options. /// </summary> /// <param name="dontStripUnlikelys">Determines whether elements that are unlikely to be a part of main content will be removed.</param> /// <param name="dontNormalizeSpacesInTextContent">Determines whether spaces in InnerText properties of elements will be normalized automatically (eg. whether double spaces will be replaced with single spaces).</param> /// <param name="dontWeightClasses">Determines whether 'weight-class' algorithm will be used when cleaning content.</param> /// <param name="readingStyle">Styling for the extracted article.</param> /// <param name="readingMargin">Margin for the extracted article.</param> /// <param name="readingSize">Font size for the extracted article.</param> private NReadabilityTranscoder( bool dontStripUnlikelys, bool dontNormalizeSpacesInTextContent, bool dontWeightClasses, ReadingStyle readingStyle, ReadingMargin readingMargin, ReadingSize readingSize) { _dontStripUnlikelys = dontStripUnlikelys; _dontNormalizeSpacesInTextContent = dontNormalizeSpacesInTextContent; _dontWeightClasses = dontWeightClasses; _readingStyle = readingStyle; _readingMargin = readingMargin; _readingSize = readingSize; _sgmlDomBuilder = new SgmlDomBuilder(); _sgmlDomSerializer = new SgmlDomSerializer(); _elementsScores = new Dictionary<XElement, float>(); }
static NReadabilityTranscoderTests_Old() { _sgmlDomBuilder = new SgmlDomBuilder(); _sgmlDomSerializer = new SgmlDomSerializer(); }
/// <summary> /// Initializes a new instance of NReadabilityTranscoder. Allows setting all options. /// </summary> /// <param name="dontStripUnlikelys">Determines whether elements that are unlikely to be a part of main content will be removed.</param> /// <param name="dontNormalizeSpacesInTextContent">Determines whether spaces in InnerText properties of elements will be normalized automatically (eg. whether double spaces will be replaced with single spaces).</param> /// <param name="dontWeightClasses">Determines whether 'weight-class' algorithm will be used when cleaning content.</param> /// <param name="divIdHints">针对特定网站抽取特定div id=的节点作为内容块 </param> private NReadabilityTranscoder( bool dontStripUnlikelys, bool dontNormalizeSpacesInTextContent, bool dontWeightClasses, Dictionary<Regex, string> divIdHints = null) { _dontStripUnlikelys = dontStripUnlikelys; _dontNormalizeSpacesInTextContent = dontNormalizeSpacesInTextContent; _dontWeightClasses = dontWeightClasses; _sgmlDomBuilder = new SgmlDomBuilder(); _sgmlDomSerializer = new SgmlDomSerializer(); _elementsScores = new Dictionary<XElement, float>(); _articleContentDivIdHints2 = divIdHints; }