/// <summary>
 ///  Initializes a new instance of NReadabilityWebTranscoder.
 ///  Allows passing in custom-constructed NReadabilityTranscoder,
 ///  and a custom IUrlFetcher.
 /// </summary>
 /// <param name="transcoder">A NReadabilityTranscoder.</param>
 /// <param name="urlFetcher">IFetcher instance to download content.</param>
 /// <param name="pageSeparatorBuilder">A function that creates a HTML fragment for page separator. It takes the page number as an argument.</param>
 public NReadabilityWebTranscoder(NReadabilityTranscoder transcoder, IUrlFetcher urlFetcher, Func<int, string> pageSeparatorBuilder)
 {
     _transcoder = transcoder;
       _urlFetcher = urlFetcher;
       _sgmlDomSerializer = new SgmlDomSerializer();
       _pageSeparatorBuilder = pageSeparatorBuilder;
 }
 /// <summary>
 ///  Initializes a new instance of NReadabilityWebTranscoder.
 ///  Allows passing in custom-constructed NReadabilityTranscoder,
 ///  and a custom IUrlFetcher.
 /// </summary>
 /// <param name="transcoder">A NReadabilityTranscoder.</param>
 /// <param name="urlFetcher">IFetcher instance to download content.</param>
 /// <param name="pageSeparatorBuilder">A function that creates a HTML fragment for page separator. It takes the page number as an argument.</param>
 public NReadabilityWebTranscoder(NReadabilityTranscoder transcoder, IUrlFetcher urlFetcher, Func <int, string> pageSeparatorBuilder)
 {
     _transcoder           = transcoder;
     _urlFetcher           = urlFetcher;
     _sgmlDomSerializer    = new SgmlDomSerializer();
     _pageSeparatorBuilder = pageSeparatorBuilder;
 }
 /// <summary>
 ///  Initializes a new instance of NReadabilityWebTranscoder.
 ///  Allows passing in custom-constructed NReadabilityTranscoder,
 ///  and a custom IUrlFetcher.  This overload is mostly used for testing.
 /// </summary>
 /// <param name="transcoder">A NReadabilityTranscoder.</param>
 /// <param name="urlFetcher">IFetcher instance to download content.</param>
 public NReadabilityWebTranscoder(NReadabilityTranscoder transcoder, IUrlFetcher urlFetcher)
 {
     _transcoder = transcoder;
       _urlFetcher = urlFetcher;
       _sgmlDomSerializer = new SgmlDomSerializer();
       _pageSeparatorBuilder = _DefaultPageSeparatorBuilder;
 }
Example #4
0
 /// <summary>
 ///  Initializes a new instance of NReadabilityWebTranscoder.
 ///  Allows passing in custom-constructed NReadabilityTranscoder,
 ///  and a custom IUrlFetcher.  This overload is mostly used for testing.
 /// </summary>
 /// <param name="transcoder">A NReadabilityTranscoder.</param>
 /// <param name="urlFetcher">IFetcher instance to download content.</param>
 public NReadabilityWebTranscoder(NReadabilityTranscoder transcoder, IUrlFetcher urlFetcher)
 {
     _transcoder           = transcoder;
     _urlFetcher           = urlFetcher;
     _sgmlDomSerializer    = new SgmlDomSerializer();
     _pageSeparatorBuilder = _DefaultPageSeparatorBuilder;
 }
        /// <summary>
        /// Initializes a new instance of NReadabilityTranscoder. Allows setting all options.
        /// </summary>
        /// <param name="dontStripUnlikelys">Determines whether elements that are unlikely to be a part of main content will be removed.</param>
        /// <param name="dontNormalizeSpacesInTextContent">Determines whether spaces in InnerText properties of elements will be normalized automatically (eg. whether double spaces will be replaced with single spaces).</param>
        /// <param name="dontWeightClasses">Determines whether 'weight-class' algorithm will be used when cleaning content.</param>
        /// <param name="readingStyle">Styling for the extracted article.</param>
        /// <param name="readingMargin">Margin for the extracted article.</param>
        /// <param name="readingSize">Font size for the extracted article.</param>
        private NReadabilityTranscoder(
            bool dontStripUnlikelys,
            bool dontNormalizeSpacesInTextContent,
            bool dontWeightClasses,
            ReadingStyle readingStyle,
            ReadingMargin readingMargin,
            ReadingSize readingSize)
        {
            _dontStripUnlikelys = dontStripUnlikelys;
              _dontNormalizeSpacesInTextContent = dontNormalizeSpacesInTextContent;
              _dontWeightClasses = dontWeightClasses;
              _readingStyle = readingStyle;
              _readingMargin = readingMargin;
              _readingSize = readingSize;

              _sgmlDomBuilder = new SgmlDomBuilder();
              _sgmlDomSerializer = new SgmlDomSerializer();
              _elementsScores = new Dictionary<XElement, float>();
        }
 static NReadabilityTranscoderTests_Old()
 {
     _sgmlDomBuilder = new SgmlDomBuilder();
       _sgmlDomSerializer = new SgmlDomSerializer();
 }
        /// <summary>
        /// Initializes a new instance of NReadabilityTranscoder. Allows setting all options.
        /// </summary>
        /// <param name="dontStripUnlikelys">Determines whether elements that are unlikely to be a part of main content will be removed.</param>
        /// <param name="dontNormalizeSpacesInTextContent">Determines whether spaces in InnerText properties of elements will be normalized automatically (eg. whether double spaces will be replaced with single spaces).</param>
        /// <param name="dontWeightClasses">Determines whether 'weight-class' algorithm will be used when cleaning content.</param>
        /// <param name="divIdHints">针对特定网站抽取特定div id=的节点作为内容块 </param>
        private NReadabilityTranscoder(
            bool dontStripUnlikelys,
            bool dontNormalizeSpacesInTextContent,
            bool dontWeightClasses,
            Dictionary<Regex, string> divIdHints = null)
        {
            _dontStripUnlikelys = dontStripUnlikelys;
            _dontNormalizeSpacesInTextContent = dontNormalizeSpacesInTextContent;
            _dontWeightClasses = dontWeightClasses;

            _sgmlDomBuilder = new SgmlDomBuilder();
            _sgmlDomSerializer = new SgmlDomSerializer();
            _elementsScores = new Dictionary<XElement, float>();
            _articleContentDivIdHints2 = divIdHints;
        }