public ESDataEndpoint(ESEndpoint endpoint, ESIndexDocType doctype) : base(endpoint) { this.Connection = endpoint.Connection; this.DocType = doctype; this.cacheSize = endpoint.CacheSize; if (endpoint.MaxParallel > 0) { asyncQ = AsyncRequestQueue.Create(endpoint.MaxParallel); } }
public void Import(PipelineContext ctx, IDatasourceSink sink) { workerQueue = AsyncRequestQueue.Create(maxParallel); ctx.ImportLog.Log("TikaDS starting. maxparallel={0}, dbgstore={1}, Q={2}", maxParallel, DbgStoreDir, workerQueue); if (maxParallel >= 2 && ServicePointManager.DefaultConnectionLimit < maxParallel) { ctx.ImportLog.Log("Updating connectionLimit for {0} to {1}", ServicePointManager.DefaultConnectionLimit, maxParallel); ServicePointManager.DefaultConnectionLimit = maxParallel; } ensureTikaServiceStarted(ctx); previousRun = ctx.RunAdministrations.GetLastOKRunDateShifted(ctx.DatasourceAdmin); ctx.ImportLog.Log("Previous (shifted) run was {0}.", previousRun); //GenericStreamProvider.DumpRoots(ctx, streamDirectory); try { if (this.mustEmitSecurity) { securityCache = new SecurityCache(TikaSecurityAccount.FactoryImpl); } foreach (var elt in streamDirectory.GetProviders(ctx)) { try { importUrl(ctx, sink, elt); } catch (Exception e) { throw new BMException(e, "{0}\r\nUrl={1}.", e.Message, elt); } } //Handle still queued workers while (true) { TikaAsyncWorker popped = pushPop(ctx, sink, null); if (popped == null) { break; } importUrl(ctx, sink, popped); } } finally { workerQueue.PopAllWithoutException(); Utils.FreeAndNil(ref securityCache); } }
public MapReduceProcessor(PipelineContext ctx, MapReduceProcessor other, IDataEndpoint epOrnextProcessor) : base(other, epOrnextProcessor) { directory = other.directory; hasher = other.hasher; sorter = other.sorter; undupper = other.undupper; keepFiles = other.keepFiles; fanOut = other.fanOut; compress = other.compress; maxNullIndex = other.maxNullIndex; bufferSize = other.bufferSize; readMaxParallel = other.readMaxParallel; if (other.undupActions != null) { undupActions = other.undupActions.Clone(ctx); } if (bufferSize > 0) { buffer = new List <JObject>(bufferSize); asyncQ = AsyncRequestQueue.Create(1); } ctx.ImportLog.Log("Postprocessor [{0}]: mapping to {1}. Fan-out={2}.", Name, directory == null ? "<memory>" : directory, fanOut); }
public override int CallNextPostProcessor(PipelineContext ctx) { ctx.PostProcessor = this; ReportStart(ctx); if (mapper != null) { ctx.ImportLog.Log(_LogType.ltTimerStart, "Reduce phase maxparallel={0}, fanout={1}.", readMaxParallel, fanOut); AsyncRequestQueue q = (readMaxParallel == 0 || fanOut <= 1) ? null : AsyncRequestQueue.Create(readMaxParallel); MappedObjectEnumerator e; int i; if (q == null) { for (i = 0; true; i++) { e = mapper.GetObjectEnumerator(i); if (e == null) { break; } enumeratePartialAndClose(ctx, e, i); } } else { //Push enum requests into the Q and process the results for (i = 0; true; i++) { var x = q.PushAndOptionalPop(new AsyncRequestElement(i, getEnum)); if (x == null) { continue; } e = (MappedObjectEnumerator)x.Result; if (e == null) { break; } enumeratePartialAndClose(ctx, e, i); } //Pop all existing from the Q and process them while (true) { var x = q.Pop(); if (x == null) { break; } e = (MappedObjectEnumerator)x.Result; if (e == null) { continue; } ; enumeratePartialAndClose(ctx, e, i++); } } } ReportEnd(ctx); ctx.ImportLog.Log(_LogType.ltTimerStop, "Reduce phase ended."); Utils.FreeAndNil(ref mapper); return(base.CallNextPostProcessor(ctx)); }