Beispiel #1
0
 public ESDataEndpoint(ESEndpoint endpoint, ESIndexDocType doctype)
     : base(endpoint)
 {
     this.Connection = endpoint.Connection;
     this.DocType    = doctype;
     this.cacheSize  = endpoint.CacheSize;
     if (endpoint.MaxParallel > 0)
     {
         asyncQ = AsyncRequestQueue.Create(endpoint.MaxParallel);
     }
 }
Beispiel #2
0
        public void Import(PipelineContext ctx, IDatasourceSink sink)
        {
            workerQueue = AsyncRequestQueue.Create(maxParallel);
            ctx.ImportLog.Log("TikaDS starting. maxparallel={0}, dbgstore={1}, Q={2}", maxParallel, DbgStoreDir, workerQueue);
            if (maxParallel >= 2 && ServicePointManager.DefaultConnectionLimit < maxParallel)
            {
                ctx.ImportLog.Log("Updating connectionLimit for {0} to {1}", ServicePointManager.DefaultConnectionLimit, maxParallel);
                ServicePointManager.DefaultConnectionLimit = maxParallel;
            }

            ensureTikaServiceStarted(ctx);
            previousRun = ctx.RunAdministrations.GetLastOKRunDateShifted(ctx.DatasourceAdmin);
            ctx.ImportLog.Log("Previous (shifted) run was {0}.", previousRun);
            //GenericStreamProvider.DumpRoots(ctx, streamDirectory);
            try
            {
                if (this.mustEmitSecurity)
                {
                    securityCache = new SecurityCache(TikaSecurityAccount.FactoryImpl);
                }
                foreach (var elt in streamDirectory.GetProviders(ctx))
                {
                    try
                    {
                        importUrl(ctx, sink, elt);
                    }
                    catch (Exception e)
                    {
                        throw new BMException(e, "{0}\r\nUrl={1}.", e.Message, elt);
                    }
                }
                //Handle still queued workers
                while (true)
                {
                    TikaAsyncWorker popped = pushPop(ctx, sink, null);
                    if (popped == null)
                    {
                        break;
                    }
                    importUrl(ctx, sink, popped);
                }
            }
            finally
            {
                workerQueue.PopAllWithoutException();
                Utils.FreeAndNil(ref securityCache);
            }
        }
Beispiel #3
0
 public MapReduceProcessor(PipelineContext ctx, MapReduceProcessor other, IDataEndpoint epOrnextProcessor)
     : base(other, epOrnextProcessor)
 {
     directory       = other.directory;
     hasher          = other.hasher;
     sorter          = other.sorter;
     undupper        = other.undupper;
     keepFiles       = other.keepFiles;
     fanOut          = other.fanOut;
     compress        = other.compress;
     maxNullIndex    = other.maxNullIndex;
     bufferSize      = other.bufferSize;
     readMaxParallel = other.readMaxParallel;
     if (other.undupActions != null)
     {
         undupActions = other.undupActions.Clone(ctx);
     }
     if (bufferSize > 0)
     {
         buffer = new List <JObject>(bufferSize);
         asyncQ = AsyncRequestQueue.Create(1);
     }
     ctx.ImportLog.Log("Postprocessor [{0}]: mapping to {1}. Fan-out={2}.", Name, directory == null ? "<memory>" : directory, fanOut);
 }
Beispiel #4
0
        public override int CallNextPostProcessor(PipelineContext ctx)
        {
            ctx.PostProcessor = this;
            ReportStart(ctx);
            if (mapper != null)
            {
                ctx.ImportLog.Log(_LogType.ltTimerStart, "Reduce phase maxparallel={0}, fanout={1}.", readMaxParallel, fanOut);
                AsyncRequestQueue q = (readMaxParallel == 0 || fanOut <= 1) ? null : AsyncRequestQueue.Create(readMaxParallel);

                MappedObjectEnumerator e;
                int i;
                if (q == null)
                {
                    for (i = 0; true; i++)
                    {
                        e = mapper.GetObjectEnumerator(i);
                        if (e == null)
                        {
                            break;
                        }
                        enumeratePartialAndClose(ctx, e, i);
                    }
                }
                else
                {
                    //Push enum requests into the Q and process the results
                    for (i = 0; true; i++)
                    {
                        var x = q.PushAndOptionalPop(new AsyncRequestElement(i, getEnum));
                        if (x == null)
                        {
                            continue;
                        }
                        e = (MappedObjectEnumerator)x.Result;
                        if (e == null)
                        {
                            break;
                        }

                        enumeratePartialAndClose(ctx, e, i);
                    }

                    //Pop all existing from the Q and process them
                    while (true)
                    {
                        var x = q.Pop();
                        if (x == null)
                        {
                            break;
                        }
                        e = (MappedObjectEnumerator)x.Result;
                        if (e == null)
                        {
                            continue;
                        }
                        ;

                        enumeratePartialAndClose(ctx, e, i++);
                    }
                }
            }
            ReportEnd(ctx);
            ctx.ImportLog.Log(_LogType.ltTimerStop, "Reduce phase ended.");
            Utils.FreeAndNil(ref mapper);
            return(base.CallNextPostProcessor(ctx));
        }