public void ExtractText(string html_string, AppendTextCallback append_text_cb, AddPropertyCallback add_prop_cb, AppendSpaceCallback append_white_cb, AppendSpaceCallback append_break_cb, HotCallback hot_up_cb, HotCallback hot_down_cb) { AppendText = append_text_cb; AppendWord = append_text_cb; AddProperty = add_prop_cb; AppendWhiteSpace = append_white_cb; AppendStructuralBreak = append_break_cb; HotUp = hot_up_cb; HotDown = hot_down_cb; HtmlDocument doc = new HtmlDocument(); doc.ReportNode += HandleNodeEvent; doc.StreamMode = true; try { doc.LoadHtml(html_string); } catch (Exception e) { Log.Debug(e, "Exception while filtering html string [{0}]", html_string); } }
public FilterHtml(bool register_filter) { if (register_filter) { base.SetVersion(version); SnippetMode = true; SetFileType("document"); AppendText = new AppendTextCallback(base.AppendText); AppendWord = new AppendTextCallback(base.AppendWord); AddProperty = new AddPropertyCallback(base.AddProperty); AppendWhiteSpace = new AppendSpaceCallback(base.AppendWhiteSpace); AppendStructuralBreak = new AppendSpaceCallback(base.AppendStructuralBreak); HotUp = new HotCallback(base.HotUp); HotDown = new HotCallback(base.HotDown); #if ENABLE_RDF_ADAPTER AddLink = new AddLinkCallback(base.AddLink); #endif } ignore_level = 0; building_text = false; builder = new StringBuilder(); }
public FilterHtml (bool register_filter) { if (register_filter) { base.SetVersion (version); SnippetMode = true; SetFileType ("document"); AppendText = new AppendTextCallback (base.AppendText); AppendWord = new AppendTextCallback (base.AppendWord); AddProperty = new AddPropertyCallback (base.AddProperty); AppendWhiteSpace = new AppendSpaceCallback (base.AppendWhiteSpace); AppendStructuralBreak = new AppendSpaceCallback (base.AppendStructuralBreak); HotUp = new HotCallback (base.HotUp); HotDown = new HotCallback (base.HotDown); #if ENABLE_RDF_ADAPTER AddLink = new AddLinkCallback (base.AddLink); #endif } ignore_level = 0; building_text = false; builder = new StringBuilder (); }
public void ExtractText (string html_string, AppendTextCallback append_text_cb, AddPropertyCallback add_prop_cb, AppendSpaceCallback append_white_cb, AppendSpaceCallback append_break_cb, HotCallback hot_up_cb, HotCallback hot_down_cb) { AppendText = append_text_cb; AppendWord = append_text_cb; AddProperty = add_prop_cb; AppendWhiteSpace = append_white_cb; AppendStructuralBreak = append_break_cb; HotUp = hot_up_cb; HotDown = hot_down_cb; HtmlDocument doc = new HtmlDocument (); doc.ReportNode += HandleNodeEvent; doc.StreamMode = true; try { doc.LoadHtml (html_string); } catch (Exception e) { Log.Debug (e, "Exception while filtering html string [{0}]", html_string); } }