protected virtual bool CheckNeedImport(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt)
        {
            ExistState existState = toExistState(sink.HandleValue(ctx, "record/_checkexist", elt));

            //return true if we need to convert this file
            return((existState & (ExistState.ExistSame | ExistState.ExistNewer | ExistState.Exist)) == 0);
        }
Esempio n. 2
0
        private void importUrl(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt)
        {
            ctx.IncrementEmitted();
            TikaAsyncWorker worker   = new TikaAsyncWorker(this, elt);
            String          fileName = elt.FullName;

            sink.HandleValue(ctx, "record/_start", fileName);
            sink.HandleValue(ctx, "record/lastmodutc", worker.LastModifiedUtc);
            sink.HandleValue(ctx, "record/virtualFilename", elt.VirtualName);

            //Check if we need to convert this file
            if ((ctx.ImportFlags & _ImportFlags.ImportFull) == 0) //Not a full import
            {
                if ((ctx.ImportFlags & _ImportFlags.RetryErrors) == 0 && worker.LastModifiedUtc < previousRun)
                {
                    ctx.Skipped++;
                    return;
                }
                ExistState existState = toExistState(sink.HandleValue(ctx, "record/_checkexist", elt));
                if ((existState & (ExistState.ExistSame | ExistState.ExistNewer | ExistState.Exist)) != 0)
                {
                    ctx.Skipped++;
                    return;
                }
            }

            TikaAsyncWorker popped = pushPop(ctx, sink, worker);

            if (popped != null)
            {
                importUrl(ctx, sink, popped);
            }
        }
Esempio n. 3
0
        protected override void ImportStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm)
        {
            var doc = new HtmlDocument();

            doc.Load(strm, Encoding.UTF8); //fixme: detect encoding
            selector.Process(ctx, new HtmlNodeWrapper((HtmlNodeNavigator)doc.CreateNavigator()));
        }
Esempio n. 4
0
        protected override void ImportStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm)
        {
            Workbook wb    = null;
            var      excel = new Microsoft.Office.Interop.Excel.Application();

            try
            {
                wb = excel.Workbooks.Open(elt.FullName);
                foreach (Microsoft.Office.Interop.Excel.Worksheet sheet in wb.Worksheets)
                {
                    String name = sheet.Name;
                    sink.HandleValue(ctx, "_sheet/_start", name);
                    if (selectedSheetsExpr == null || selectedSheetsExpr.IsMatch(name))
                    {
                        importSheet(ctx, sink, elt, sheet);
                    }
                    sink.HandleValue(ctx, "_sheet/_stop", name);
                }
            }
            finally
            {
                closeWorkbook(ref wb);
                Utils.FreeAndNil(ref excel);
            }
        }
Esempio n. 5
0
        public void Import(PipelineContext ctx, IDatasourceSink sink)
        {
            DbConnection connection = null;

            try
            {
                connection = createConnection();
                ctx.DebugLog.Log("Open SQL connection with [{0}], timeout={1} (sec).", connection.ConnectionString, connection.ConnectionTimeout);
                connection.Open();

                if (Queries == null)
                {
                    EmitTables(ctx, connection);
                }
                else
                {
                    foreach (Query q in Queries)
                    {
                        EmitQuery(ctx, connection, q);
                    }
                }
                dumpConversionErrors(ctx.ImportLog);
            }
            finally
            {
                Utils.FreeAndNil(ref connection);
            }
        }
Esempio n. 6
0
 public void Import(PipelineContext ctx, IDatasourceSink sink)
 {
    sink.HandleValue(ctx, "record/double", 123.45);
    sink.HandleValue(ctx, "record/date", DateTime.Now);
    sink.HandleValue(ctx, "record/utcdate", DateTime.UtcNow);
    sink.HandleValue(ctx, "record/int", -123);
    sink.HandleValue(ctx, "record/string", "foo bar");
    sink.HandleValue(ctx, "record", null);
 }
Esempio n. 7
0
        public static void EmitToken(PipelineContext ctx, IDatasourceSink sink, JToken token, String key, int maxLevel)
        {
            if (token == null)
            {
                return;
            }
            Object value = token;

            maxLevel--;
            switch (token.Type)
            {
            case JTokenType.Array:
                if (maxLevel < 0)
                {
                    break;
                }
                var    arr    = (JArray)token;
                String tmpKey = key + "/_v";
                for (int i = 0; i < arr.Count; i++)
                {
                    EmitToken(ctx, sink, arr[i], tmpKey, maxLevel);
                }
                sink.HandleValue(ctx, key, null);
                return;

            case JTokenType.None:
            case JTokenType.Null:
            case JTokenType.Undefined:
                value = null;
                break;

            case JTokenType.Date: value = (DateTime)token; break;

            case JTokenType.String: value = (String)token; break;

            case JTokenType.Float: value = (double)token; break;

            case JTokenType.Integer: value = (Int64)token; break;

            case JTokenType.Boolean: value = (bool)token; break;

            case JTokenType.Object:
                if (maxLevel < 0)
                {
                    break;
                }
                JObject obj = (JObject)token;
                foreach (var kvp in obj)
                {
                    EmitToken(ctx, sink, kvp.Value, key + "/" + generateObjectKey(kvp.Key), maxLevel);
                }
                sink.HandleValue(ctx, key, null);
                return;
            }
            sink.HandleValue(ctx, key, value);
        }
Esempio n. 8
0
        public static void SplitTokens(PipelineContext ctx, IDatasourceSink sink, JToken token, String key, int maxLevel)
        {
            if (token == null)
            {
                return;
            }
            String tmpKey;
            Object value = token;

            maxLevel--;
            switch (token.Type)
            {
            case JTokenType.None:
            case JTokenType.Null:
            case JTokenType.Undefined:
                value = null;
                break;

            case JTokenType.Date:
            case JTokenType.String:
            case JTokenType.Float:
            case JTokenType.Integer:
            case JTokenType.Boolean: break;

            case JTokenType.Array:
                if (maxLevel < 0)
                {
                    break;
                }
                var arr = (JArray)token;
                tmpKey = key + "/_v";
                for (int i = 0; i < arr.Count; i++)
                {
                    SplitTokens(ctx, sink, arr[i], tmpKey, maxLevel);
                }
                sink.HandleValue(ctx, key, null);
                return;

            case JTokenType.Object:
                if (maxLevel < 0)
                {
                    break;
                }
                JObject obj = (JObject)token;
                tmpKey = key + '/';
                foreach (var kvp in obj)
                {
                    SplitTokens(ctx, sink, kvp.Value, tmpKey + kvp.Key, maxLevel);
                }
                sink.HandleValue(ctx, key, null);
                return;
            }
            sink.HandleValue(ctx, key, value);
        }
Esempio n. 9
0
        private void emitSecurity(PipelineContext ctx, IDatasourceSink sink, String fileName)
        {
            FileInfo info  = new FileInfo(fileName);
            var      ac    = info.GetAccessControl();
            var      rules = ac.GetAccessRules(true, true, typeof(NTAccount));

            foreach (AuthorizationRule rule in rules)
            {
                FileSystemAccessRule fsRule = rule as FileSystemAccessRule;
                if (fsRule.AccessControlType == AccessControlType.Deny)
                {
                    continue;
                }
                //ctx.ImportLog.Log("rule2 {0}: {1}", securityCache.GetAccount(rule.IdentityReference), fsRule.FileSystemRights);
                if ((fsRule.FileSystemRights & FileSystemRights.ReadData) == 0)
                {
                    continue;
                }

                String access = null;
                switch (fsRule.AccessControlType)
                {
                case AccessControlType.Allow: access = "/allow"; break;

                case AccessControlType.Deny: access = "/deny"; break;

                default: access = "/" + fsRule.ToString().ToLowerInvariant(); break;
                }

                var account = securityCache.GetAccount(rule.IdentityReference);
                if (account.WellKnownSid != null)
                {
                    WellKnownSidType sidType = (WellKnownSidType)account.WellKnownSid;
                    //ctx.ImportLog.Log("wellksid={0}", sidType);
                    switch (sidType)
                    {
                    case WellKnownSidType.AuthenticatedUserSid:
                    case WellKnownSidType.WorldSid:
                        break;

                    default: continue;
                    }
                }
                else
                {
                    if (!account.IsGroup)
                    {
                        continue;
                    }
                }
                sink.HandleValue(ctx, "record/security/group" + access, account);
            }
        }
Esempio n. 10
0
        private void importSheet(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Worksheet sheet)
        {
            Range used      = sheet.UsedRange;
            Range usedCells = used.Cells;

            if (usedCells == null)
            {
                return;
            }

            Object[,] c = (Object[, ])used.Cells.Value2;
            if (c == null)
            {
                return;
            }

            int lo1 = c.GetLowerBound(0);
            int hi1 = c.GetUpperBound(0);
            int lo2 = c.GetLowerBound(1);
            int hi2 = c.GetUpperBound(1);

            List <String> headers = new List <string>();

            if (headersAt >= 0)
            {
                int headersRow = lo1 + headersAt;
                if (headersRow <= hi1)
                {
                    int h = 0;
                    for (int j = lo2; j <= hi2; j++)
                    {
                        for (; h < j; h++)
                        {
                            headers.Add(null);
                        }
                        headers.Add(_toString(c[headersRow, j]));
                        h++;
                    }
                }
            }

            var keys = prepareEventKeys(sheet.Name, hi2 + 1, headers);

            for (int i = lo1 + startAt; i <= hi1; i++)
            {
                for (int j = lo2; j <= hi2; j++)
                {
                    sink.HandleValue(ctx, keys[j], c[i, j]);
                }
                sink.HandleValue(ctx, keys[0], null);
                ctx.IncrementEmitted();
            }
        }
Esempio n. 11
0
 protected override void ImportStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm)
 {
     CDO.IMessage msg = new CDO.Message();
     msg.DataSource.OpenObject(new IStreamFromStream(strm), "IStream");
     sink.HandleValue(ctx, "record/subject", msg.Subject);
     sink.HandleValue(ctx, "record/bcc", msg.BCC);
     sink.HandleValue(ctx, "record/cc", msg.CC);
     sink.HandleValue(ctx, "record/from", msg.From);
     sink.HandleValue(ctx, "record/to", msg.To);
     Utils.FreeAndNil(ref msg);
     sink.HandleValue(ctx, "record", null);
 }
Esempio n. 12
0
 private TikaAsyncWorker pushPop(PipelineContext ctx, IDatasourceSink sink, TikaAsyncWorker newElt)
 {
     try
     {
         return((TikaAsyncWorker)((newElt == null) ? workerQueue.Pop() : workerQueue.PushAndOptionalPop(newElt)));
     }
     catch (Exception e)
     {
         ctx.HandleException(e);
         return(null);
     }
 }
Esempio n. 13
0
        private void importUrl(PipelineContext ctx, IDatasourceSink sink, TikaAsyncWorker worker)
        {
            String fileName = worker.StreamElt.FullName;

            sink.HandleValue(ctx, "record/_start", fileName);
            sink.HandleValue(ctx, "record/lastmodutc", worker.LastModifiedUtc);
            sink.HandleValue(ctx, "record/virtualFilename", worker.StreamElt.VirtualName);
            sink.HandleValue(ctx, "record/virtualRoot", worker.StreamElt.VirtualRoot);

            try
            {
                var htmlProcessor = worker.HtmlProcessor;
                if (worker.StoredAs != null)
                {
                    sink.HandleValue(ctx, "record/converted_file", worker.StoredAs);
                }

                //Write html properties
                foreach (var kvp in htmlProcessor.Properties)
                {
                    sink.HandleValue(ctx, "record/" + kvp.Key, kvp.Value);
                }

                if (mustEmitSecurity)
                {
                    emitSecurity(ctx, sink, fileName);
                }
                //Add dummy type to recognize the errors
                //if (error)
                //   doc.AddField("content_type", "ConversionError");
                //if (htmlProcessor.IsTextMail)
                sink.HandleValue(ctx, "record/_istextmail", htmlProcessor.IsTextMail);
                sink.HandleValue(ctx, "record/_numparts", htmlProcessor.numParts);
                sink.HandleValue(ctx, "record/_numattachments", htmlProcessor.Attachments.Count);
                foreach (var a in htmlProcessor.Attachments)
                {
                    sink.HandleValue(ctx, "record/_attachment", a);
                }
                sink.HandleValue(ctx, "record/_filesize", worker.FileSize);
                sink.HandleValue(ctx, "record/shortcontent", htmlProcessor.GetAbstract(abstractLength, abstractDelta));

                sink.HandleValue(ctx, "record/head", htmlProcessor.GetInnerHead());
                sink.HandleValue(ctx, "record/content", htmlProcessor.GetInnerBody());

                sink.HandleValue(ctx, "record/_end", fileName);
                sink.HandleValue(ctx, "record", null);
            }
            catch (Exception e)
            {
                ctx.HandleException(e);
            }
        }
Esempio n. 14
0
 public void Import(PipelineContext ctx, IDatasourceSink sink)
 {
     foreach (var elt in this.streamDirectory.GetProviders(ctx))
     {
         try
         {
             importUrl(ctx, sink, elt);
         }
         catch (Exception e)
         {
             throw new BMException(e, e.Message + "\r\nUrl=" + elt.Uri + ".");
         }
     }
 }
Esempio n. 15
0
 public void Import(PipelineContext ctx, IDatasourceSink sink)
 {
     foreach (var elt in streamDirectory.GetProviders(ctx))
     {
         try
         {
             importUrl(ctx, sink, elt);
         }
         catch (Exception e)
         {
             throw new BMException(e, WrapMessage(e, elt.ToString(), "{0}\r\nUrl={1}."));
         }
     }
 }
        protected virtual void ImportUrl(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt)
        {
            int orgEmitted = ctx.Emitted;

            if (addEmitted)
            {
                ctx.IncrementEmitted();
            }
            DateTime dtFile = elt.LastModified;

            ctx.SendItemStart(elt);
            //TODO if ((ctx.ActionFlags & _ActionFlags.Skip) != 0

            //Check if we need to import this file
            if ((ctx.ImportFlags & _ImportFlags.ImportFull) == 0) //Not a full import
            {
                if (!CheckNeedImport(ctx, sink, elt))
                {
                    goto SKIPPED;
                }
            }
            if (ctx.SkipUntilKey == "record")
            {
                goto SKIPPED;
            }

            using (Stream fs = _CreateStream(ctx, elt))
            {
                ImportStream(ctx, sink, elt, fs);
            }
            if (!addEmitted && orgEmitted == ctx.Emitted)
            {
                ctx.IncrementEmitted();
            }
            ctx.OptSendItemStop();
            return;

SKIPPED:
            ctx.Skipped++;
            if (!addEmitted && orgEmitted == ctx.Emitted)
            {
                ctx.IncrementEmitted();
            }
            if (logSkips)
            {
                ctx.DebugLog.Log("Skipped: {0}. Date={1}", elt.FullName, elt.LastModified);
            }
        }
Esempio n. 17
0
        public void Import(PipelineContext ctx, IDatasourceSink sink)
        {
            workerQueue = AsyncRequestQueue.Create(maxParallel);
            ctx.ImportLog.Log("TikaDS starting. maxparallel={0}, dbgstore={1}, Q={2}", maxParallel, DbgStoreDir, workerQueue);
            if (maxParallel >= 2 && ServicePointManager.DefaultConnectionLimit < maxParallel)
            {
                ctx.ImportLog.Log("Updating connectionLimit for {0} to {1}", ServicePointManager.DefaultConnectionLimit, maxParallel);
                ServicePointManager.DefaultConnectionLimit = maxParallel;
            }

            ensureTikaServiceStarted(ctx);
            previousRun = ctx.RunAdministrations.GetLastOKRunDateShifted(ctx.DatasourceAdmin);
            ctx.ImportLog.Log("Previous (shifted) run was {0}.", previousRun);
            //GenericStreamProvider.DumpRoots(ctx, streamDirectory);
            try
            {
                if (this.mustEmitSecurity)
                {
                    securityCache = new SecurityCache(TikaSecurityAccount.FactoryImpl);
                }
                foreach (var elt in streamDirectory.GetProviders(ctx))
                {
                    try
                    {
                        importUrl(ctx, sink, elt);
                    }
                    catch (Exception e)
                    {
                        throw new BMException(e, "{0}\r\nUrl={1}.", e.Message, elt);
                    }
                }
                //Handle still queued workers
                while (true)
                {
                    TikaAsyncWorker popped = pushPop(ctx, sink, null);
                    if (popped == null)
                    {
                        break;
                    }
                    importUrl(ctx, sink, popped);
                }
            }
            finally
            {
                workerQueue.PopAllWithoutException();
                Utils.FreeAndNil(ref securityCache);
            }
        }
Esempio n. 18
0
        public override Object HandleValue(PipelineContext ctx, String key, Object value)
        {
            IDatasourceSink sink = ctx.Pipeline;

            if (destination == Destination.Datasource)
            {
                sink = (IDatasourceSink)ctx.DatasourceAdmin.Datasource;
            }
            String reckey = (String)ctx.Pipeline.GetVariable("key");

            if (reckey == null)
            {
                return(null);
            }

            this.endPoint.EmitRecord(ctx, reckey, recField, sink, eventKey, maxLevel);
            return(value);
        }
Esempio n. 19
0
        public static void SplitInnerTokens(PipelineContext ctx, IDatasourceSink sink, JToken token, String key, int maxLevel)
        {
            if (token == null)
            {
                return;
            }
            String tmpKey;

            maxLevel--;
            switch (token.Type)
            {
            case JTokenType.Array:
                if (maxLevel < 0)
                {
                    break;
                }
                var arr = (JArray)token;
                tmpKey = key + "/_v";
                for (int i = 0; i < arr.Count; i++)
                {
                    SplitTokens(ctx, sink, arr[i], tmpKey, maxLevel);
                }
                sink.HandleValue(ctx, key, null);
                return;

            case JTokenType.Object:
                if (maxLevel < 0)
                {
                    break;
                }
                JObject obj = (JObject)token;
                tmpKey = key + '/';
                foreach (var kvp in obj)
                {
                    SplitTokens(ctx, sink, kvp.Value, tmpKey + kvp.Key, maxLevel);
                }
                sink.HandleValue(ctx, key, null);
                return;
            }
        }
Esempio n. 20
0
        private void importRecord(PipelineContext ctx, IDatasourceSink sink, Stream strm, int splitUntil)
        {
            JsonTextReader rdr = new JsonTextReader(new StreamReader(strm, true));
            JToken         jt  = JObject.ReadFrom(rdr);

            rdr.Close();
            strm.Close();

            if (jt.Type != JTokenType.Array)
            {
                Pipeline.EmitToken(ctx, sink, jt, "record", splitUntil);
                ctx.IncrementEmitted();
            }
            else
            {
                foreach (var item in (JArray)jt)
                {
                    Pipeline.EmitToken(ctx, sink, item, "record", splitUntil);
                    ctx.IncrementEmitted();
                }
            }
        }
Esempio n. 21
0
        //private Stream createInputStream (String fn)
        //{
        //   FileStream fs = new FileStream(fn, FileMode.Open, FileAccess.Read, FileShare.Read, 16 * 1024, false);
        //   //_FileStream fs = new _FileStream(fn, _FileMode.Open, _FileAccess.Read, _FileShare.Read, 16 * 1024);
        //   String ext = Path.GetExtension(fn);
        //   if (!String.Equals(".gz", ext, StringComparison.OrdinalIgnoreCase)) goto NO_ZIP;
        //   byte[] buf = new byte[2];
        //   fs.Read(buf, 0, 2);
        //   if (buf[0] != 0x1f || buf[1] != 0x8b) goto NO_ZIP;
        //   fs.Position = 0;
        //   return new GZipStream(fs, CompressionMode.Decompress, false);

        //NO_ZIP:
        //   return fs;
        //}


        protected override void ImportStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm)
        {
            if (oneBasedSortKey != 0)
            {
                ImportSortedStream(ctx, sink, elt, strm);
                return;
            }

            List <String> keys;

            CsvReader csvRdr = createReader(strm);

            optReadHeader(csvRdr);
            keys = createKeysForEmit();
            int startAt = this.startAt;

            while (csvRdr.NextRecord())
            {
                if (startAt > 0 && startAt > csvRdr.Line)
                {
                    continue;
                }
                ctx.IncrementEmitted();
                sink.HandleValue(ctx, "record/_start", null);
                var fields     = csvRdr.Fields;
                int fieldCount = fields.Count;
                //ctx.DebugLog.Log("Record {0}. FC={1}", line, fieldCount);
                generateMissingKeysForEmit(keys, fieldCount);
                for (int i = 0; i < fieldCount; i++)
                {
                    sink.HandleValue(ctx, keys[i], fields[i]);
                }
                sink.HandleValue(ctx, "record", null);
            }
            if (csvRdr.NumInvalidRecords > 0)
            {
                ctx.ImportLog.Log(_LogType.ltWarning, "Invalid records detected: {0}", csvRdr.NumInvalidRecords);
            }
        }
 public void Import(PipelineContext ctx, IDatasourceSink sink)
 {
     _BeforeImport(ctx, sink);
     try
     {
         foreach (var elt in streamDirectory.GetProviders(ctx))
         {
             try
             {
                 ImportUrl(ctx, sink, elt);
             }
             catch (Exception e)
             {
                 e = new BMException(e, WrapMessage(e, elt.ToString(), "{0}\r\nUrl={1}."));
                 ctx.HandleException(e);
             }
         }
     }
     finally
     {
         _AfterImport(ctx, sink);
     }
 }
Esempio n. 23
0
        public void EmitVariables(PipelineContext ctx, IDatasourceSink sink, String key, int maxLevel)
        {
            if (variables != null)
            {
                foreach (var kvp in variables)
                {
                    var tmpkey = key + '/' + kvp.Value;
                    if (maxLevel <= 0)
                    {
                        goto EMIT_RAW;
                    }

                    JToken tk = kvp.Value as JToken;
                    if (tk != null)
                    {
                        SplitTokens(ctx, sink, tk, tmpkey, maxLevel);
                        continue;
                    }

EMIT_RAW:
                    sink.HandleValue(ctx, tmpkey, kvp.Value);
                }
            }
        }
Esempio n. 24
0
 public virtual void EmitRecord(PipelineContext ctx, String recordKey, String recordField, IDatasourceSink sink, String eventKey, int maxLevel)
 {
 }
Esempio n. 25
0
        public override void EmitRecord(PipelineContext ctx, String recordKey, String recordField, IDatasourceSink sink, String eventKey, int maxLevel)
        {
            JObject obj = DocType.LoadByKey(Connection, recordKey);

            if (obj == null)
            {
                return;
            }
            JToken token = (recordField == null) ? obj : obj.GetValue(recordField, StringComparison.InvariantCultureIgnoreCase);

            if (token != null)
            {
                Pipeline.EmitToken(ctx, sink, token, eventKey, maxLevel);
            }
        }
Esempio n. 26
0
        private void importUrl(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt)
        {
            int  splitUntil    = elt.ContextNode.ReadInt("@splituntil", this.splitUntil);
            bool objectPerLine = elt.ContextNode.ReadBool("@objectperline", this.objectPerLine);

            ctx.SendItemStart(elt);
            if ((ctx.ActionFlags & _ActionFlags.Skip) != 0)
            {
                return;
            }

            ExistState existState = ExistState.NotExist;

            if ((ctx.ImportFlags & _ImportFlags.ImportFull) == 0) //Not a full import
            {
                existState = toExistState(sink.HandleValue(ctx, "record/_checkexist", null));
            }

            //Check if we need to convert this file
            if ((existState & (ExistState.ExistSame | ExistState.ExistNewer | ExistState.Exist)) != 0)
            {
                ctx.Skipped++;
                ctx.ImportLog.Log("Skipped: {0}. Date={1}", elt, 0);// dtFile);
                return;
            }

            List <String> keys   = new List <string>();
            List <String> values = new List <String>();
            Stream        fs     = null;

            try
            {
                fs = elt.CreateStream(ctx);
                if (!this.objectPerLine)
                {
                    importRecord(ctx, sink, fs, splitUntil);
                }
                else
                {
                    byte[]       buf    = new byte[4096];
                    int          offset = 0;
                    MemoryStream tmp    = new MemoryStream();
                    while (true)
                    {
                        int len = offset + fs.Read(buf, offset, buf.Length - offset);
                        if (len == offset)
                        {
                            break;
                        }
                        int i = offset;
                        for (; i < len; i++)
                        {
                            if (buf[i] == '\n')
                            {
                                break;
                            }
                        }

                        tmp.Write(buf, offset, i - offset);
                        if (i == offset)
                        {
                            offset = 0;
                            continue;
                        }


                        if (tmp.Position > 0)
                        {
                            tmp.Position = 0;
                            importRecord(ctx, sink, tmp, splitUntil);
                            tmp.Position = 0;
                        }
                        if (i + 1 < offset)
                        {
                            tmp.Write(buf, i + 1, len - i - 1);
                        }
                    }
                    if (offset > 0)
                    {
                        tmp.Write(buf, 0, offset);
                    }
                    if (tmp.Position > 0)
                    {
                        tmp.Position = 0;
                        importRecord(ctx, sink, tmp, splitUntil);
                    }
                }
                ctx.OptSendItemStop();
            }
            catch (Exception e)
            {
                ctx.HandleException(e);
            }
        }
Esempio n. 27
0
        protected void ImportSortedStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm)
        {
            List <String[]> rows = new List <string[]>();

            int       maxFieldCount = 0;
            CsvReader csvRdr        = createReader(strm);

            optReadHeader(csvRdr);
            int startAt          = this.startAt;
            int zeroBasedSortKey = (oneBasedSortKey & ~SORTKEY_REVERSE) - 1;

            while (csvRdr.NextRecord())
            {
                if (startAt > 0 && startAt > csvRdr.Line)
                {
                    continue;
                }
                var fields     = csvRdr.Fields;
                int fieldCount = fields.Count;
                if (fieldCount > maxFieldCount)
                {
                    maxFieldCount = fieldCount;
                }
                String[] arr = new String[fieldCount + 1];

                for (int i = 0; i < fieldCount; i++)
                {
                    arr[i + 1] = fields[i];
                }
                if (fieldCount > zeroBasedSortKey)
                {
                    arr[0] = arr[zeroBasedSortKey + 1];
                }
                rows.Add(arr);
            }

            ctx.DebugLog.Log("First 10 sortkeys:");
            int N = rows.Count;

            if (N > 10)
            {
                N = 10;
            }
            for (int i = 0; i < N; i++)
            {
                ctx.DebugLog.Log("-- [{0}]: '{1}'", i, rows[i][0]);
            }

            if (zeroBasedSortKey >= 0)
            {
                rows.Sort(cbSortString);
            }

            ctx.DebugLog.Log("First 10 sortkeys after sort:");
            for (int i = 0; i < N; i++)
            {
                ctx.DebugLog.Log("-- [{0}]: '{1}'", i, rows[i][0]);
            }

            //Fill pre-calculated keys
            List <String> keys = createKeysForEmit();

            generateMissingKeysForEmit(keys, maxFieldCount);

            if ((oneBasedSortKey & SORTKEY_REVERSE) == 0) //Normal order
            {
                //Emit sorted records
                for (int r = 0; r < rows.Count; r++)
                {
                    ctx.IncrementEmitted();
                    String[] arr = rows[r];
                    rows[r] = null;                      //Let this element be GC-ed
                    sink.HandleValue(ctx, "record/_start", null);
                    for (int i = 1; i < arr.Length; i++) //arr[0] is the sortkey
                    {
                        sink.HandleValue(ctx, keys[i - 1], arr[i]);
                    }
                    sink.HandleValue(ctx, "record", null);
                }
            }
            else
            {
                //Emit reverse sorted records
                for (int r = rows.Count - 1; r >= 0; r--)
                {
                    ctx.IncrementEmitted();
                    String[] arr = rows[r];
                    rows[r] = null;                      //Let this element be GC-ed
                    sink.HandleValue(ctx, "record/_start", null);
                    for (int i = 1; i < arr.Length; i++) //arr[0] is the sortkey
                    {
                        sink.HandleValue(ctx, keys[i - 1], arr[i]);
                    }
                    sink.HandleValue(ctx, "record", null);
                }
            }
        }
Esempio n. 28
0
        protected override void ImportStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm)
        {
            int lineNo = -1;

            try
            {
                TextReader rdr = strm.CreateTextReader(encoding);

                int charsRead = 0;
                if ((mode & _Mode.lines) != 0)
                {
                    while (charsRead < maxToRead)
                    {
                        lineNo++;
                        String line = rdr.ReadLine();
                        if (line == null)
                        {
                            break;
                        }
                        if (line.Length == 0)
                        {
                            if ((mode & _Mode.stopAtEmpty) != 0)
                            {
                                break;
                            }
                        }
                        sink.HandleValue(ctx, "record/line", line);
                        charsRead += line.Length;
                    }
                }
                else
                {
                    lineNo++;
                    String line = rdr.ReadLine();
                    if (line != null)
                    {
                        charsRead += line.Length;
                    }
                    String key, value;
                    while (line != null)
                    {
                        lineNo++;
                        String nextLine = rdr.ReadLine();
                        if (nextLine == null)
                        {
                            key = "record/" + splitKV(line, out value);
                            sink.HandleValue(ctx, key, value);
                            break;
                        }
                        charsRead += nextLine.Length;
                        if (nextLine.Length == 0)
                        {
                            if ((mode & _Mode.stopAtEmpty) != 0)
                            {
                                break;
                            }
                            else
                            {
                                continue;
                            }
                        }

                        int offs = 0;
                        for (; offs < nextLine.Length; offs++)
                        {
                            switch (nextLine[offs])
                            {
                            case ' ':
                            case '\t': continue;
                            }
                            break;
                        }

                        if (offs > 0)
                        {
                            line = line + nextLine.Substring(offs);
                            continue;
                        }

                        if (lenient && nextLine.IndexOf(':') < 0)
                        {
                            line = line + nextLine;
                            continue;
                        }

                        key = "record/" + splitKV(line, out value);
                        sink.HandleValue(ctx, key, value);
                        line = nextLine;
                    }
                }
                sink.HandleValue(ctx, "record", null);
                ctx.IncrementEmitted();
            }
            catch (Exception e)
            {
                e = new BMException(e, "{0}\nLine={1}.", e.Message, lineNo);
                ctx.HandleException(e);
            }
        }
Esempio n. 29
0
 public void Import(PipelineContext ctx, IDatasourceSink sink)
 {
 }
Esempio n. 30
0
        protected override void ImportStream(PipelineContext ctx, IDatasourceSink sink, IStreamProvider elt, Stream strm)
        {
            if (selector != null)
            {
                XmlHelper h = new XmlHelper();
                h.Load(strm.CreateTextReader(), elt.FullName);

                selector.Process(ctx, new XmlNodeWrapper(h.DocumentElement));
                return;
            }


            List <String> keys   = new List <string>();
            List <String> values = new List <String>();
            int           lvl    = -1;

            XmlReader rdr = XmlReader.Create(strm);

            Logger l = ctx.DebugLog;

            while (rdr.Read())
            {
                if (dumpReader)
                {
                    l.Log("{0}: {1}, {2} [{3}]", rdr.Name, rdr.NodeType, rdr.IsEmptyElement, rdr.Value);
                }
                switch (rdr.NodeType)
                {
                case XmlNodeType.CDATA:
                case XmlNodeType.Text:
                case XmlNodeType.Whitespace:
                case XmlNodeType.SignificantWhitespace:
                    if (lvl <= 0)
                    {
                        continue;
                    }
                    values[lvl] = values[lvl] + rdr.Value;
                    continue;

                case XmlNodeType.Element:
                    lvl++;
                    if (lvl >= keys.Count)
                    {
                        keys.Add(null); values.Add(null);
                    }
                    if (lvl == 0)
                    {
                        keys[0] = rdr.Name;
                    }
                    else
                    {
                        keys[lvl] = keys[lvl - 1] + "/" + rdr.Name;
                        if (lvl == 1)
                        {
                            ctx.IncrementEmitted();
                        }
                    }

                    //l.Log("{0}: [{1}, {2}]", lvl, keys[lvl], rdr.NodeType);
                    bool isEmpty = rdr.IsEmptyElement; //cache this value: after reading the attribs its value is lost
                    if (rdr.AttributeCount > 0)
                    {
                        String pfx = keys[lvl] + "/@";
                        for (int j = 0; j < rdr.AttributeCount; j++)
                        {
                            rdr.MoveToNextAttribute();
                            sink.HandleValue(ctx, pfx + rdr.Name, rdr.Value);
                        }
                    }
                    if (!isEmpty)
                    {
                        continue;
                    }

                    //l.Log("{0}: [{1}]", keys[lvl], rdr.NodeType);
                    sink.HandleValue(ctx, keys[lvl], null);
                    lvl--;

                    continue;

                case XmlNodeType.EndElement:
                    //l.Log("{0}: [{1}]", keys[lvl], rdr.NodeType);
                    sink.HandleValue(ctx, keys[lvl], values[lvl]);
                    values[lvl] = null;
                    lvl--;
                    continue;
                }
            }
            rdr.Close();
        }