protected virtual bool CheckNeedImport(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt) { ExistState existState = toExistState(sink.HandleValue(ctx, "record/_checkexist", elt)); //return true if we need to convert this file return((existState & (ExistState.ExistSame | ExistState.ExistNewer | ExistState.Exist)) == 0); }
private void importUrl(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt) { ctx.IncrementEmitted(); TikaAsyncWorker worker = new TikaAsyncWorker(this, elt); String fileName = elt.FullName; sink.HandleValue(ctx, "record/_start", fileName); sink.HandleValue(ctx, "record/lastmodutc", worker.LastModifiedUtc); sink.HandleValue(ctx, "record/virtualFilename", elt.VirtualName); //Check if we need to convert this file if ((ctx.ImportFlags & _ImportFlags.ImportFull) == 0) //Not a full import { if ((ctx.ImportFlags & _ImportFlags.RetryErrors) == 0 && worker.LastModifiedUtc < previousRun) { ctx.Skipped++; return; } ExistState existState = toExistState(sink.HandleValue(ctx, "record/_checkexist", elt)); if ((existState & (ExistState.ExistSame | ExistState.ExistNewer | ExistState.Exist)) != 0) { ctx.Skipped++; return; } } TikaAsyncWorker popped = pushPop(ctx, sink, worker); if (popped != null) { importUrl(ctx, sink, popped); } }
protected override void ImportStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm) { var doc = new HtmlDocument(); doc.Load(strm, Encoding.UTF8); //fixme: detect encoding selector.Process(ctx, new HtmlNodeWrapper((HtmlNodeNavigator)doc.CreateNavigator())); }
protected override void ImportStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm) { Workbook wb = null; var excel = new Microsoft.Office.Interop.Excel.Application(); try { wb = excel.Workbooks.Open(elt.FullName); foreach (Microsoft.Office.Interop.Excel.Worksheet sheet in wb.Worksheets) { String name = sheet.Name; sink.HandleValue(ctx, "_sheet/_start", name); if (selectedSheetsExpr == null || selectedSheetsExpr.IsMatch(name)) { importSheet(ctx, sink, elt, sheet); } sink.HandleValue(ctx, "_sheet/_stop", name); } } finally { closeWorkbook(ref wb); Utils.FreeAndNil(ref excel); } }
public void Import(PipelineContext ctx, IDatasourceSink sink) { DbConnection connection = null; try { connection = createConnection(); ctx.DebugLog.Log("Open SQL connection with [{0}], timeout={1} (sec).", connection.ConnectionString, connection.ConnectionTimeout); connection.Open(); if (Queries == null) { EmitTables(ctx, connection); } else { foreach (Query q in Queries) { EmitQuery(ctx, connection, q); } } dumpConversionErrors(ctx.ImportLog); } finally { Utils.FreeAndNil(ref connection); } }
public void Import(PipelineContext ctx, IDatasourceSink sink) { sink.HandleValue(ctx, "record/double", 123.45); sink.HandleValue(ctx, "record/date", DateTime.Now); sink.HandleValue(ctx, "record/utcdate", DateTime.UtcNow); sink.HandleValue(ctx, "record/int", -123); sink.HandleValue(ctx, "record/string", "foo bar"); sink.HandleValue(ctx, "record", null); }
public static void EmitToken(PipelineContext ctx, IDatasourceSink sink, JToken token, String key, int maxLevel) { if (token == null) { return; } Object value = token; maxLevel--; switch (token.Type) { case JTokenType.Array: if (maxLevel < 0) { break; } var arr = (JArray)token; String tmpKey = key + "/_v"; for (int i = 0; i < arr.Count; i++) { EmitToken(ctx, sink, arr[i], tmpKey, maxLevel); } sink.HandleValue(ctx, key, null); return; case JTokenType.None: case JTokenType.Null: case JTokenType.Undefined: value = null; break; case JTokenType.Date: value = (DateTime)token; break; case JTokenType.String: value = (String)token; break; case JTokenType.Float: value = (double)token; break; case JTokenType.Integer: value = (Int64)token; break; case JTokenType.Boolean: value = (bool)token; break; case JTokenType.Object: if (maxLevel < 0) { break; } JObject obj = (JObject)token; foreach (var kvp in obj) { EmitToken(ctx, sink, kvp.Value, key + "/" + generateObjectKey(kvp.Key), maxLevel); } sink.HandleValue(ctx, key, null); return; } sink.HandleValue(ctx, key, value); }
public static void SplitTokens(PipelineContext ctx, IDatasourceSink sink, JToken token, String key, int maxLevel) { if (token == null) { return; } String tmpKey; Object value = token; maxLevel--; switch (token.Type) { case JTokenType.None: case JTokenType.Null: case JTokenType.Undefined: value = null; break; case JTokenType.Date: case JTokenType.String: case JTokenType.Float: case JTokenType.Integer: case JTokenType.Boolean: break; case JTokenType.Array: if (maxLevel < 0) { break; } var arr = (JArray)token; tmpKey = key + "/_v"; for (int i = 0; i < arr.Count; i++) { SplitTokens(ctx, sink, arr[i], tmpKey, maxLevel); } sink.HandleValue(ctx, key, null); return; case JTokenType.Object: if (maxLevel < 0) { break; } JObject obj = (JObject)token; tmpKey = key + '/'; foreach (var kvp in obj) { SplitTokens(ctx, sink, kvp.Value, tmpKey + kvp.Key, maxLevel); } sink.HandleValue(ctx, key, null); return; } sink.HandleValue(ctx, key, value); }
private void emitSecurity(PipelineContext ctx, IDatasourceSink sink, String fileName) { FileInfo info = new FileInfo(fileName); var ac = info.GetAccessControl(); var rules = ac.GetAccessRules(true, true, typeof(NTAccount)); foreach (AuthorizationRule rule in rules) { FileSystemAccessRule fsRule = rule as FileSystemAccessRule; if (fsRule.AccessControlType == AccessControlType.Deny) { continue; } //ctx.ImportLog.Log("rule2 {0}: {1}", securityCache.GetAccount(rule.IdentityReference), fsRule.FileSystemRights); if ((fsRule.FileSystemRights & FileSystemRights.ReadData) == 0) { continue; } String access = null; switch (fsRule.AccessControlType) { case AccessControlType.Allow: access = "/allow"; break; case AccessControlType.Deny: access = "/deny"; break; default: access = "/" + fsRule.ToString().ToLowerInvariant(); break; } var account = securityCache.GetAccount(rule.IdentityReference); if (account.WellKnownSid != null) { WellKnownSidType sidType = (WellKnownSidType)account.WellKnownSid; //ctx.ImportLog.Log("wellksid={0}", sidType); switch (sidType) { case WellKnownSidType.AuthenticatedUserSid: case WellKnownSidType.WorldSid: break; default: continue; } } else { if (!account.IsGroup) { continue; } } sink.HandleValue(ctx, "record/security/group" + access, account); } }
private void importSheet(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Worksheet sheet) { Range used = sheet.UsedRange; Range usedCells = used.Cells; if (usedCells == null) { return; } Object[,] c = (Object[, ])used.Cells.Value2; if (c == null) { return; } int lo1 = c.GetLowerBound(0); int hi1 = c.GetUpperBound(0); int lo2 = c.GetLowerBound(1); int hi2 = c.GetUpperBound(1); List <String> headers = new List <string>(); if (headersAt >= 0) { int headersRow = lo1 + headersAt; if (headersRow <= hi1) { int h = 0; for (int j = lo2; j <= hi2; j++) { for (; h < j; h++) { headers.Add(null); } headers.Add(_toString(c[headersRow, j])); h++; } } } var keys = prepareEventKeys(sheet.Name, hi2 + 1, headers); for (int i = lo1 + startAt; i <= hi1; i++) { for (int j = lo2; j <= hi2; j++) { sink.HandleValue(ctx, keys[j], c[i, j]); } sink.HandleValue(ctx, keys[0], null); ctx.IncrementEmitted(); } }
protected override void ImportStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm) { CDO.IMessage msg = new CDO.Message(); msg.DataSource.OpenObject(new IStreamFromStream(strm), "IStream"); sink.HandleValue(ctx, "record/subject", msg.Subject); sink.HandleValue(ctx, "record/bcc", msg.BCC); sink.HandleValue(ctx, "record/cc", msg.CC); sink.HandleValue(ctx, "record/from", msg.From); sink.HandleValue(ctx, "record/to", msg.To); Utils.FreeAndNil(ref msg); sink.HandleValue(ctx, "record", null); }
private TikaAsyncWorker pushPop(PipelineContext ctx, IDatasourceSink sink, TikaAsyncWorker newElt) { try { return((TikaAsyncWorker)((newElt == null) ? workerQueue.Pop() : workerQueue.PushAndOptionalPop(newElt))); } catch (Exception e) { ctx.HandleException(e); return(null); } }
private void importUrl(PipelineContext ctx, IDatasourceSink sink, TikaAsyncWorker worker) { String fileName = worker.StreamElt.FullName; sink.HandleValue(ctx, "record/_start", fileName); sink.HandleValue(ctx, "record/lastmodutc", worker.LastModifiedUtc); sink.HandleValue(ctx, "record/virtualFilename", worker.StreamElt.VirtualName); sink.HandleValue(ctx, "record/virtualRoot", worker.StreamElt.VirtualRoot); try { var htmlProcessor = worker.HtmlProcessor; if (worker.StoredAs != null) { sink.HandleValue(ctx, "record/converted_file", worker.StoredAs); } //Write html properties foreach (var kvp in htmlProcessor.Properties) { sink.HandleValue(ctx, "record/" + kvp.Key, kvp.Value); } if (mustEmitSecurity) { emitSecurity(ctx, sink, fileName); } //Add dummy type to recognize the errors //if (error) // doc.AddField("content_type", "ConversionError"); //if (htmlProcessor.IsTextMail) sink.HandleValue(ctx, "record/_istextmail", htmlProcessor.IsTextMail); sink.HandleValue(ctx, "record/_numparts", htmlProcessor.numParts); sink.HandleValue(ctx, "record/_numattachments", htmlProcessor.Attachments.Count); foreach (var a in htmlProcessor.Attachments) { sink.HandleValue(ctx, "record/_attachment", a); } sink.HandleValue(ctx, "record/_filesize", worker.FileSize); sink.HandleValue(ctx, "record/shortcontent", htmlProcessor.GetAbstract(abstractLength, abstractDelta)); sink.HandleValue(ctx, "record/head", htmlProcessor.GetInnerHead()); sink.HandleValue(ctx, "record/content", htmlProcessor.GetInnerBody()); sink.HandleValue(ctx, "record/_end", fileName); sink.HandleValue(ctx, "record", null); } catch (Exception e) { ctx.HandleException(e); } }
public void Import(PipelineContext ctx, IDatasourceSink sink) { foreach (var elt in this.streamDirectory.GetProviders(ctx)) { try { importUrl(ctx, sink, elt); } catch (Exception e) { throw new BMException(e, e.Message + "\r\nUrl=" + elt.Uri + "."); } } }
public void Import(PipelineContext ctx, IDatasourceSink sink) { foreach (var elt in streamDirectory.GetProviders(ctx)) { try { importUrl(ctx, sink, elt); } catch (Exception e) { throw new BMException(e, WrapMessage(e, elt.ToString(), "{0}\r\nUrl={1}.")); } } }
protected virtual void ImportUrl(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt) { int orgEmitted = ctx.Emitted; if (addEmitted) { ctx.IncrementEmitted(); } DateTime dtFile = elt.LastModified; ctx.SendItemStart(elt); //TODO if ((ctx.ActionFlags & _ActionFlags.Skip) != 0 //Check if we need to import this file if ((ctx.ImportFlags & _ImportFlags.ImportFull) == 0) //Not a full import { if (!CheckNeedImport(ctx, sink, elt)) { goto SKIPPED; } } if (ctx.SkipUntilKey == "record") { goto SKIPPED; } using (Stream fs = _CreateStream(ctx, elt)) { ImportStream(ctx, sink, elt, fs); } if (!addEmitted && orgEmitted == ctx.Emitted) { ctx.IncrementEmitted(); } ctx.OptSendItemStop(); return; SKIPPED: ctx.Skipped++; if (!addEmitted && orgEmitted == ctx.Emitted) { ctx.IncrementEmitted(); } if (logSkips) { ctx.DebugLog.Log("Skipped: {0}. Date={1}", elt.FullName, elt.LastModified); } }
public void Import(PipelineContext ctx, IDatasourceSink sink) { workerQueue = AsyncRequestQueue.Create(maxParallel); ctx.ImportLog.Log("TikaDS starting. maxparallel={0}, dbgstore={1}, Q={2}", maxParallel, DbgStoreDir, workerQueue); if (maxParallel >= 2 && ServicePointManager.DefaultConnectionLimit < maxParallel) { ctx.ImportLog.Log("Updating connectionLimit for {0} to {1}", ServicePointManager.DefaultConnectionLimit, maxParallel); ServicePointManager.DefaultConnectionLimit = maxParallel; } ensureTikaServiceStarted(ctx); previousRun = ctx.RunAdministrations.GetLastOKRunDateShifted(ctx.DatasourceAdmin); ctx.ImportLog.Log("Previous (shifted) run was {0}.", previousRun); //GenericStreamProvider.DumpRoots(ctx, streamDirectory); try { if (this.mustEmitSecurity) { securityCache = new SecurityCache(TikaSecurityAccount.FactoryImpl); } foreach (var elt in streamDirectory.GetProviders(ctx)) { try { importUrl(ctx, sink, elt); } catch (Exception e) { throw new BMException(e, "{0}\r\nUrl={1}.", e.Message, elt); } } //Handle still queued workers while (true) { TikaAsyncWorker popped = pushPop(ctx, sink, null); if (popped == null) { break; } importUrl(ctx, sink, popped); } } finally { workerQueue.PopAllWithoutException(); Utils.FreeAndNil(ref securityCache); } }
public override Object HandleValue(PipelineContext ctx, String key, Object value) { IDatasourceSink sink = ctx.Pipeline; if (destination == Destination.Datasource) { sink = (IDatasourceSink)ctx.DatasourceAdmin.Datasource; } String reckey = (String)ctx.Pipeline.GetVariable("key"); if (reckey == null) { return(null); } this.endPoint.EmitRecord(ctx, reckey, recField, sink, eventKey, maxLevel); return(value); }
public static void SplitInnerTokens(PipelineContext ctx, IDatasourceSink sink, JToken token, String key, int maxLevel) { if (token == null) { return; } String tmpKey; maxLevel--; switch (token.Type) { case JTokenType.Array: if (maxLevel < 0) { break; } var arr = (JArray)token; tmpKey = key + "/_v"; for (int i = 0; i < arr.Count; i++) { SplitTokens(ctx, sink, arr[i], tmpKey, maxLevel); } sink.HandleValue(ctx, key, null); return; case JTokenType.Object: if (maxLevel < 0) { break; } JObject obj = (JObject)token; tmpKey = key + '/'; foreach (var kvp in obj) { SplitTokens(ctx, sink, kvp.Value, tmpKey + kvp.Key, maxLevel); } sink.HandleValue(ctx, key, null); return; } }
private void importRecord(PipelineContext ctx, IDatasourceSink sink, Stream strm, int splitUntil) { JsonTextReader rdr = new JsonTextReader(new StreamReader(strm, true)); JToken jt = JObject.ReadFrom(rdr); rdr.Close(); strm.Close(); if (jt.Type != JTokenType.Array) { Pipeline.EmitToken(ctx, sink, jt, "record", splitUntil); ctx.IncrementEmitted(); } else { foreach (var item in (JArray)jt) { Pipeline.EmitToken(ctx, sink, item, "record", splitUntil); ctx.IncrementEmitted(); } } }
//private Stream createInputStream (String fn) //{ // FileStream fs = new FileStream(fn, FileMode.Open, FileAccess.Read, FileShare.Read, 16 * 1024, false); // //_FileStream fs = new _FileStream(fn, _FileMode.Open, _FileAccess.Read, _FileShare.Read, 16 * 1024); // String ext = Path.GetExtension(fn); // if (!String.Equals(".gz", ext, StringComparison.OrdinalIgnoreCase)) goto NO_ZIP; // byte[] buf = new byte[2]; // fs.Read(buf, 0, 2); // if (buf[0] != 0x1f || buf[1] != 0x8b) goto NO_ZIP; // fs.Position = 0; // return new GZipStream(fs, CompressionMode.Decompress, false); //NO_ZIP: // return fs; //} protected override void ImportStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm) { if (oneBasedSortKey != 0) { ImportSortedStream(ctx, sink, elt, strm); return; } List <String> keys; CsvReader csvRdr = createReader(strm); optReadHeader(csvRdr); keys = createKeysForEmit(); int startAt = this.startAt; while (csvRdr.NextRecord()) { if (startAt > 0 && startAt > csvRdr.Line) { continue; } ctx.IncrementEmitted(); sink.HandleValue(ctx, "record/_start", null); var fields = csvRdr.Fields; int fieldCount = fields.Count; //ctx.DebugLog.Log("Record {0}. FC={1}", line, fieldCount); generateMissingKeysForEmit(keys, fieldCount); for (int i = 0; i < fieldCount; i++) { sink.HandleValue(ctx, keys[i], fields[i]); } sink.HandleValue(ctx, "record", null); } if (csvRdr.NumInvalidRecords > 0) { ctx.ImportLog.Log(_LogType.ltWarning, "Invalid records detected: {0}", csvRdr.NumInvalidRecords); } }
public void Import(PipelineContext ctx, IDatasourceSink sink) { _BeforeImport(ctx, sink); try { foreach (var elt in streamDirectory.GetProviders(ctx)) { try { ImportUrl(ctx, sink, elt); } catch (Exception e) { e = new BMException(e, WrapMessage(e, elt.ToString(), "{0}\r\nUrl={1}.")); ctx.HandleException(e); } } } finally { _AfterImport(ctx, sink); } }
public void EmitVariables(PipelineContext ctx, IDatasourceSink sink, String key, int maxLevel) { if (variables != null) { foreach (var kvp in variables) { var tmpkey = key + '/' + kvp.Value; if (maxLevel <= 0) { goto EMIT_RAW; } JToken tk = kvp.Value as JToken; if (tk != null) { SplitTokens(ctx, sink, tk, tmpkey, maxLevel); continue; } EMIT_RAW: sink.HandleValue(ctx, tmpkey, kvp.Value); } } }
public virtual void EmitRecord(PipelineContext ctx, String recordKey, String recordField, IDatasourceSink sink, String eventKey, int maxLevel) { }
public override void EmitRecord(PipelineContext ctx, String recordKey, String recordField, IDatasourceSink sink, String eventKey, int maxLevel) { JObject obj = DocType.LoadByKey(Connection, recordKey); if (obj == null) { return; } JToken token = (recordField == null) ? obj : obj.GetValue(recordField, StringComparison.InvariantCultureIgnoreCase); if (token != null) { Pipeline.EmitToken(ctx, sink, token, eventKey, maxLevel); } }
private void importUrl(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt) { int splitUntil = elt.ContextNode.ReadInt("@splituntil", this.splitUntil); bool objectPerLine = elt.ContextNode.ReadBool("@objectperline", this.objectPerLine); ctx.SendItemStart(elt); if ((ctx.ActionFlags & _ActionFlags.Skip) != 0) { return; } ExistState existState = ExistState.NotExist; if ((ctx.ImportFlags & _ImportFlags.ImportFull) == 0) //Not a full import { existState = toExistState(sink.HandleValue(ctx, "record/_checkexist", null)); } //Check if we need to convert this file if ((existState & (ExistState.ExistSame | ExistState.ExistNewer | ExistState.Exist)) != 0) { ctx.Skipped++; ctx.ImportLog.Log("Skipped: {0}. Date={1}", elt, 0);// dtFile); return; } List <String> keys = new List <string>(); List <String> values = new List <String>(); Stream fs = null; try { fs = elt.CreateStream(ctx); if (!this.objectPerLine) { importRecord(ctx, sink, fs, splitUntil); } else { byte[] buf = new byte[4096]; int offset = 0; MemoryStream tmp = new MemoryStream(); while (true) { int len = offset + fs.Read(buf, offset, buf.Length - offset); if (len == offset) { break; } int i = offset; for (; i < len; i++) { if (buf[i] == '\n') { break; } } tmp.Write(buf, offset, i - offset); if (i == offset) { offset = 0; continue; } if (tmp.Position > 0) { tmp.Position = 0; importRecord(ctx, sink, tmp, splitUntil); tmp.Position = 0; } if (i + 1 < offset) { tmp.Write(buf, i + 1, len - i - 1); } } if (offset > 0) { tmp.Write(buf, 0, offset); } if (tmp.Position > 0) { tmp.Position = 0; importRecord(ctx, sink, tmp, splitUntil); } } ctx.OptSendItemStop(); } catch (Exception e) { ctx.HandleException(e); } }
protected void ImportSortedStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm) { List <String[]> rows = new List <string[]>(); int maxFieldCount = 0; CsvReader csvRdr = createReader(strm); optReadHeader(csvRdr); int startAt = this.startAt; int zeroBasedSortKey = (oneBasedSortKey & ~SORTKEY_REVERSE) - 1; while (csvRdr.NextRecord()) { if (startAt > 0 && startAt > csvRdr.Line) { continue; } var fields = csvRdr.Fields; int fieldCount = fields.Count; if (fieldCount > maxFieldCount) { maxFieldCount = fieldCount; } String[] arr = new String[fieldCount + 1]; for (int i = 0; i < fieldCount; i++) { arr[i + 1] = fields[i]; } if (fieldCount > zeroBasedSortKey) { arr[0] = arr[zeroBasedSortKey + 1]; } rows.Add(arr); } ctx.DebugLog.Log("First 10 sortkeys:"); int N = rows.Count; if (N > 10) { N = 10; } for (int i = 0; i < N; i++) { ctx.DebugLog.Log("-- [{0}]: '{1}'", i, rows[i][0]); } if (zeroBasedSortKey >= 0) { rows.Sort(cbSortString); } ctx.DebugLog.Log("First 10 sortkeys after sort:"); for (int i = 0; i < N; i++) { ctx.DebugLog.Log("-- [{0}]: '{1}'", i, rows[i][0]); } //Fill pre-calculated keys List <String> keys = createKeysForEmit(); generateMissingKeysForEmit(keys, maxFieldCount); if ((oneBasedSortKey & SORTKEY_REVERSE) == 0) //Normal order { //Emit sorted records for (int r = 0; r < rows.Count; r++) { ctx.IncrementEmitted(); String[] arr = rows[r]; rows[r] = null; //Let this element be GC-ed sink.HandleValue(ctx, "record/_start", null); for (int i = 1; i < arr.Length; i++) //arr[0] is the sortkey { sink.HandleValue(ctx, keys[i - 1], arr[i]); } sink.HandleValue(ctx, "record", null); } } else { //Emit reverse sorted records for (int r = rows.Count - 1; r >= 0; r--) { ctx.IncrementEmitted(); String[] arr = rows[r]; rows[r] = null; //Let this element be GC-ed sink.HandleValue(ctx, "record/_start", null); for (int i = 1; i < arr.Length; i++) //arr[0] is the sortkey { sink.HandleValue(ctx, keys[i - 1], arr[i]); } sink.HandleValue(ctx, "record", null); } } }
protected override void ImportStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm) { int lineNo = -1; try { TextReader rdr = strm.CreateTextReader(encoding); int charsRead = 0; if ((mode & _Mode.lines) != 0) { while (charsRead < maxToRead) { lineNo++; String line = rdr.ReadLine(); if (line == null) { break; } if (line.Length == 0) { if ((mode & _Mode.stopAtEmpty) != 0) { break; } } sink.HandleValue(ctx, "record/line", line); charsRead += line.Length; } } else { lineNo++; String line = rdr.ReadLine(); if (line != null) { charsRead += line.Length; } String key, value; while (line != null) { lineNo++; String nextLine = rdr.ReadLine(); if (nextLine == null) { key = "record/" + splitKV(line, out value); sink.HandleValue(ctx, key, value); break; } charsRead += nextLine.Length; if (nextLine.Length == 0) { if ((mode & _Mode.stopAtEmpty) != 0) { break; } else { continue; } } int offs = 0; for (; offs < nextLine.Length; offs++) { switch (nextLine[offs]) { case ' ': case '\t': continue; } break; } if (offs > 0) { line = line + nextLine.Substring(offs); continue; } if (lenient && nextLine.IndexOf(':') < 0) { line = line + nextLine; continue; } key = "record/" + splitKV(line, out value); sink.HandleValue(ctx, key, value); line = nextLine; } } sink.HandleValue(ctx, "record", null); ctx.IncrementEmitted(); } catch (Exception e) { e = new BMException(e, "{0}\nLine={1}.", e.Message, lineNo); ctx.HandleException(e); } }
public void Import(PipelineContext ctx, IDatasourceSink sink) { }
protected override void ImportStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm) { if (selector != null) { XmlHelper h = new XmlHelper(); h.Load(strm.CreateTextReader(), elt.FullName); selector.Process(ctx, new XmlNodeWrapper(h.DocumentElement)); return; } List <String> keys = new List <string>(); List <String> values = new List <String>(); int lvl = -1; XmlReader rdr = XmlReader.Create(strm); Logger l = ctx.DebugLog; while (rdr.Read()) { if (dumpReader) { l.Log("{0}: {1}, {2} [{3}]", rdr.Name, rdr.NodeType, rdr.IsEmptyElement, rdr.Value); } switch (rdr.NodeType) { case XmlNodeType.CDATA: case XmlNodeType.Text: case XmlNodeType.Whitespace: case XmlNodeType.SignificantWhitespace: if (lvl <= 0) { continue; } values[lvl] = values[lvl] + rdr.Value; continue; case XmlNodeType.Element: lvl++; if (lvl >= keys.Count) { keys.Add(null); values.Add(null); } if (lvl == 0) { keys[0] = rdr.Name; } else { keys[lvl] = keys[lvl - 1] + "/" + rdr.Name; if (lvl == 1) { ctx.IncrementEmitted(); } } //l.Log("{0}: [{1}, {2}]", lvl, keys[lvl], rdr.NodeType); bool isEmpty = rdr.IsEmptyElement; //cache this value: after reading the attribs its value is lost if (rdr.AttributeCount > 0) { String pfx = keys[lvl] + "/@"; for (int j = 0; j < rdr.AttributeCount; j++) { rdr.MoveToNextAttribute(); sink.HandleValue(ctx, pfx + rdr.Name, rdr.Value); } } if (!isEmpty) { continue; } //l.Log("{0}: [{1}]", keys[lvl], rdr.NodeType); sink.HandleValue(ctx, keys[lvl], null); lvl--; continue; case XmlNodeType.EndElement: //l.Log("{0}: [{1}]", keys[lvl], rdr.NodeType); sink.HandleValue(ctx, keys[lvl], values[lvl]); values[lvl] = null; lvl--; continue; } } rdr.Close(); }