static void Main(string[] args)
        {
            Console.Title = "Machine Intelligence (Text Analytics) with TPL Data Flows";

            // CONFIG
            // Instantiate new ML.NET Context
            // Note: MlContext is thread-safe
            var mlContext = new MLContext(100);

            // GET Current Environment Folder
            var currentEnrichmentFolder = System.IO.Path.Combine(Environment.CurrentDirectory, "EnrichedDocuments");

            System.IO.Directory.CreateDirectory(currentEnrichmentFolder);

            // SET language to English
            StopWordsRemovingEstimator.Language language = StopWordsRemovingEstimator.Language.English;

            // SET the max degree of parallelism
            // Note: Default is to use 75% of the workstation or server cores.
            // Note: If cores are hyperthreaded, adjust accordingly (i.e. multiply *2)
            var isHyperThreaded          = false;
            var executionDataFlowOptions = new ExecutionDataflowBlockOptions();

            executionDataFlowOptions.MaxDegreeOfParallelism =
                // Use 75% of the cores, if hyper-threading multiply cores *2
                Convert.ToInt32(Math.Ceiling((Environment.ProcessorCount * 0.75) *
                                             (isHyperThreaded ? 2: 1)));

            // SET the Data Flow Block Options
            // This controls the data flow from the Producer level
            var dataFlowBlockOptions = new DataflowBlockOptions {
                BoundedCapacity    = 5,
                MaxMessagesPerTask = 5
            };

            // SET the data flow pipeline options
            // Note: Set MaxMessages to the number of books to process
            // Note: For example, setting MaxMessages to 2 will run only two books through the pipeline
            var dataFlowLinkOptions = new DataflowLinkOptions {
                PropagateCompletion = true,
                //MaxMessages = 1
            };
Esempio n. 2
0
        public static IActionResult RunTokenizer(
            [HttpTrigger(AuthorizationLevel.Function, "post", Route = null)] HttpRequest req,
            ILogger log,
            ExecutionContext executionContext)
        {
            log.LogInformation("Tokenizer Custom Skill: C# HTTP trigger function processed a request.");

            string skillName = executionContext.FunctionName;
            IEnumerable <WebApiRequestRecord> requestRecords = WebApiSkillHelpers.GetRequestRecords(req);

            if (requestRecords == null)
            {
                return(new BadRequestObjectResult($"{skillName} - Invalid request record array."));
            }

            WebApiSkillResponse response = WebApiSkillHelpers.ProcessRequestRecords(skillName, requestRecords,
                                                                                    (inRecord, outRecord) =>
            {
                var text = new TextData {
                    Text = inRecord.Data["text"] as string
                };
                StopWordsRemovingEstimator.Language language =
                    MapToMlNetLanguage(inRecord.Data.TryGetValue("languageCode", out object languageCode) ? languageCode as string : "en");

                var mlContext           = new MLContext();
                IDataView emptyDataView = mlContext.Data.LoadFromEnumerable(new List <TextData>());
                EstimatorChain <StopWordsRemovingTransformer> textPipeline = mlContext.Transforms.Text
                                                                             .NormalizeText("Text", caseMode: TextNormalizingEstimator.CaseMode.Lower, keepDiacritics: true, keepPunctuations: false, keepNumbers: false)
                                                                             .Append(mlContext.Transforms.Text.TokenizeIntoWords("Words", "Text", separators: new[] { ' ' }))
                                                                             .Append(mlContext.Transforms.Text.RemoveDefaultStopWords("Words", language: language));
                TransformerChain <StopWordsRemovingTransformer> textTransformer   = textPipeline.Fit(emptyDataView);
                PredictionEngine <TextData, TransformedTextData> predictionEngine = mlContext.Model.CreatePredictionEngine <TextData, TransformedTextData>(textTransformer);

                outRecord.Data["words"] = predictionEngine.Predict(text).Words ?? Array.Empty <string>();
                return(outRecord);
            });

            return(new OkObjectResult(response));
        }
 /// <summary>
 /// Remove stop words from incoming text.
 /// </summary>
 /// <param name="input">The column to apply to.</param>
 /// <param name="language">Langauge of the input text.</param>
 public static VarVector <string> RemoveStopwords(this VarVector <string> input,
                                                  StopWordsRemovingEstimator.Language language = StopWordsRemovingEstimator.Language.English) => new OutPipelineColumn(input, language);
 public Reconciler(StopWordsRemovingEstimator.Language language)
 {
     _language = language;
 }
 public OutPipelineColumn(VarVector <string> input, StopWordsRemovingEstimator.Language language)
     : base(new Reconciler(language), input)
 {
     Input = input;
 }