public Mapper(TextNormalizingTransformer parent, DataViewSchema inputSchema)
     : base(parent.Host.Register(nameof(Mapper)), parent, inputSchema)
 {
     _parent = parent;
     _types  = new DataViewType[_parent.ColumnPairs.Length];
     for (int i = 0; i < _types.Length; i++)
     {
         inputSchema.TryGetColumnIndex(_parent.ColumnPairs[i].inputColumnName, out int srcCol);
         var srcType = inputSchema[srcCol].Type;
         _types[i] = srcType is VectorDataViewType ? new VectorDataViewType(TextDataViewType.Instance) : srcType;
     }
 }
Exemple #2
0
        private void InitializeTextNormalizer(TextNormalizingEstimator.CaseMode caseMode = TextNormalizingEstimator.CaseMode.Lower,
                                              bool keepDiacritics  = false,
                                              bool keepPuncuations = false,
                                              bool keepNumbers     = false)
        {
            _mlContext        = new MLContext();
            _emptySamplesList = new List <TextData>();
            _emptyDataView    = _mlContext.Data.LoadFromEnumerable(_emptySamplesList);

            // text normalizer
            _normTextPipeline = _mlContext.Transforms.Text.NormalizeText("NormalizedText", "Text",
                                                                         caseMode,
                                                                         keepDiacritics: keepDiacritics,
                                                                         keepPunctuations: keepPuncuations,
                                                                         keepNumbers: keepNumbers);
            _normTextTransformer = _normTextPipeline.Fit(_emptyDataView);
            _predictionEngine    = _mlContext.Model.CreatePredictionEngine <TextData, TransformedTextData>(_normTextTransformer);
        }