protected override void ImportStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm) { var doc = new HtmlDocument(); doc.Load(strm, Encoding.UTF8); //fixme: detect encoding selector.Process(ctx, new HtmlNodeWrapper((HtmlNodeNavigator)doc.CreateNavigator())); }
protected override void ImportStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm) { if (selector != null) { XmlHelper h = new XmlHelper(); h.Load(strm.CreateTextReader(), elt.FullName); selector.Process(ctx, new XmlNodeWrapper(h.DocumentElement)); return; } List <String> keys = new List <string>(); List <String> values = new List <String>(); int lvl = -1; XmlReader rdr = XmlReader.Create(strm); Logger l = ctx.DebugLog; while (rdr.Read()) { if (dumpReader) { l.Log("{0}: {1}, {2} [{3}]", rdr.Name, rdr.NodeType, rdr.IsEmptyElement, rdr.Value); } switch (rdr.NodeType) { case XmlNodeType.CDATA: case XmlNodeType.Text: case XmlNodeType.Whitespace: case XmlNodeType.SignificantWhitespace: if (lvl <= 0) { continue; } values[lvl] = values[lvl] + rdr.Value; continue; case XmlNodeType.Element: lvl++; if (lvl >= keys.Count) { keys.Add(null); values.Add(null); } if (lvl == 0) { keys[0] = rdr.Name; } else { keys[lvl] = keys[lvl - 1] + "/" + rdr.Name; if (lvl == 1) { ctx.IncrementEmitted(); } } //l.Log("{0}: [{1}, {2}]", lvl, keys[lvl], rdr.NodeType); bool isEmpty = rdr.IsEmptyElement; //cache this value: after reading the attribs its value is lost if (rdr.AttributeCount > 0) { String pfx = keys[lvl] + "/@"; for (int j = 0; j < rdr.AttributeCount; j++) { rdr.MoveToNextAttribute(); sink.HandleValue(ctx, pfx + rdr.Name, rdr.Value); } } if (!isEmpty) { continue; } //l.Log("{0}: [{1}]", keys[lvl], rdr.NodeType); sink.HandleValue(ctx, keys[lvl], null); lvl--; continue; case XmlNodeType.EndElement: //l.Log("{0}: [{1}]", keys[lvl], rdr.NodeType); sink.HandleValue(ctx, keys[lvl], values[lvl]); values[lvl] = null; lvl--; continue; } } rdr.Close(); }