public Mapper(TextNormalizingTransformer parent, DataViewSchema inputSchema) : base(parent.Host.Register(nameof(Mapper)), parent, inputSchema) { _parent = parent; _types = new DataViewType[_parent.ColumnPairs.Length]; for (int i = 0; i < _types.Length; i++) { inputSchema.TryGetColumnIndex(_parent.ColumnPairs[i].inputColumnName, out int srcCol); var srcType = inputSchema[srcCol].Type; _types[i] = srcType is VectorDataViewType ? new VectorDataViewType(TextDataViewType.Instance) : srcType; } }
private void InitializeTextNormalizer(TextNormalizingEstimator.CaseMode caseMode = TextNormalizingEstimator.CaseMode.Lower, bool keepDiacritics = false, bool keepPuncuations = false, bool keepNumbers = false) { _mlContext = new MLContext(); _emptySamplesList = new List <TextData>(); _emptyDataView = _mlContext.Data.LoadFromEnumerable(_emptySamplesList); // text normalizer _normTextPipeline = _mlContext.Transforms.Text.NormalizeText("NormalizedText", "Text", caseMode, keepDiacritics: keepDiacritics, keepPunctuations: keepPuncuations, keepNumbers: keepNumbers); _normTextTransformer = _normTextPipeline.Fit(_emptyDataView); _predictionEngine = _mlContext.Model.CreatePredictionEngine <TextData, TransformedTextData>(_normTextTransformer); }