Example #1
0
        public void ExtractText(string html_string,
                                AppendTextCallback append_text_cb,
                                AddPropertyCallback add_prop_cb,
                                AppendSpaceCallback append_white_cb,
                                AppendSpaceCallback append_break_cb,
                                HotCallback hot_up_cb,
                                HotCallback hot_down_cb)
        {
            AppendText            = append_text_cb;
            AppendWord            = append_text_cb;
            AddProperty           = add_prop_cb;
            AppendWhiteSpace      = append_white_cb;
            AppendStructuralBreak = append_break_cb;
            HotUp   = hot_up_cb;
            HotDown = hot_down_cb;

            HtmlDocument doc = new HtmlDocument();

            doc.ReportNode += HandleNodeEvent;

            doc.StreamMode = true;

            try {
                doc.LoadHtml(html_string);
            } catch (Exception e) {
                Log.Debug(e, "Exception while filtering html string [{0}]", html_string);
            }
        }
Example #2
0
        public FilterHtml(bool register_filter)
        {
            if (register_filter)
            {
                base.SetVersion(version);
                SnippetMode = true;
                SetFileType("document");

                AppendText            = new AppendTextCallback(base.AppendText);
                AppendWord            = new AppendTextCallback(base.AppendWord);
                AddProperty           = new AddPropertyCallback(base.AddProperty);
                AppendWhiteSpace      = new AppendSpaceCallback(base.AppendWhiteSpace);
                AppendStructuralBreak = new AppendSpaceCallback(base.AppendStructuralBreak);
                HotUp   = new HotCallback(base.HotUp);
                HotDown = new HotCallback(base.HotDown);
#if ENABLE_RDF_ADAPTER
                AddLink = new AddLinkCallback(base.AddLink);
#endif
            }

            ignore_level  = 0;
            building_text = false;
            builder       = new StringBuilder();
        }
Example #3
0
		public FilterHtml (bool register_filter)
		{
			if (register_filter) {
				base.SetVersion (version);
				SnippetMode = true;
				SetFileType ("document");

				AppendText = new AppendTextCallback (base.AppendText);
				AppendWord = new AppendTextCallback (base.AppendWord);
				AddProperty = new AddPropertyCallback (base.AddProperty);
				AppendWhiteSpace = new AppendSpaceCallback (base.AppendWhiteSpace);
				AppendStructuralBreak = new AppendSpaceCallback (base.AppendStructuralBreak);
				HotUp = new HotCallback (base.HotUp);
				HotDown = new HotCallback (base.HotDown);
#if ENABLE_RDF_ADAPTER
				AddLink = new AddLinkCallback (base.AddLink);
#endif
			}

			ignore_level = 0;
			building_text = false;
			builder = new StringBuilder ();
		}
Example #4
0
		public void ExtractText (string html_string,
					 AppendTextCallback append_text_cb,
					 AddPropertyCallback add_prop_cb,
					 AppendSpaceCallback append_white_cb,
					 AppendSpaceCallback append_break_cb,
					 HotCallback hot_up_cb,
					 HotCallback hot_down_cb)
		{
			AppendText = append_text_cb;
			AppendWord = append_text_cb;
			AddProperty = add_prop_cb;
			AppendWhiteSpace = append_white_cb;
			AppendStructuralBreak = append_break_cb;
			HotUp = hot_up_cb;
			HotDown = hot_down_cb;

			HtmlDocument doc = new HtmlDocument ();
			doc.ReportNode += HandleNodeEvent;

			doc.StreamMode = true;
	
			try {
				doc.LoadHtml (html_string);
			} catch (Exception e) {
				Log.Debug (e, "Exception while filtering html string [{0}]", html_string);
			}

		}