/// <summary> /// Creates a dataflow block encapsulating raw integrated document reading -> feature extraction. /// </summary> /// <returns></returns> public IDonutBlock <TData> CreateDataflowBlock(IFeatureGenerator <TData> featureGen) { var featuresBlock = featureGen.CreateFeaturesBlock(); var metaBlock = new MemberVisitingBlock <TData>(ProcessRecord); var decodeBlock = new TransformFlowBlock <TData, TData>(new TransformBlock <TData, TData>(f => { Context.DecodeFields(f); return(f); })); decodeBlock.LinkTo(metaBlock.GetInputBlock()); return(new DonutBlock <TData>(decodeBlock, featuresBlock)); }
private async Task RunFeatureExtraction(TDonut donut) { //Don`t accept any more data donut.Complete(); try { //Prepare anything that we need to do, like running mongodb aggregate pipelines await donut.PrepareExtraction(); } catch (Exception ex) { Trace.WriteLine("Donut error while preparing extraction: " + ex.Message); } if (donut.ReplayInputOnFeatures && !donut.SkipFeatureExtraction) { var featuresFlow = new TransformFlowBlock <TData, FeaturesWrapper <TData> > (_featuresBlock); _harvester.Reset(); _harvester.SetDestination(featuresFlow); var featuresResult = await _harvester.Run(); } else { if (!donut.SkipFeatureExtraction) { await donut.CompleteExtraction(); } } try { await donut.OnFinished(); } catch (Exception ex) { Trace.WriteLine("Donut error while finishing: " + ex.Message); } }