public static SuggestedTransform GetRemainingFeatures(List <string> newCols, IntermediateColumn[] existingColumns, Type currentType, bool includeFeaturesOverride) { // Pick up existing features columns, if they exist var featuresColumnsCount = existingColumns.Count(col => (col.Purpose == ColumnPurpose.NumericFeature) && (col.ColumnName == DefaultColumnNames.Features)); if (includeFeaturesOverride || featuresColumnsCount > 0) { newCols.Insert(0, DefaultColumnNames.Features); } return(InferenceHelpers.ConcatColumnsIntoOne(newCols, DefaultColumnNames.Features, currentType, true)); }
public override IEnumerable <SuggestedTransform> Apply(IntermediateColumn[] columns) { List <string> textColumnNames = columns.Where( column => column.Type.ItemType().IsText() && column.Purpose == ColumnPurpose.TextFeature) .Select(column => column.ColumnName).ToList(); if ((textColumnNames.Count == 0) || (columns.Count(col => col.Purpose == ColumnPurpose.Label) != 1)) { yield break; } //Concat text columns into one. string concatTextColumnName; if (textColumnNames.Count > 1) { concatTextColumnName = columns[0].GetTempColumnName("TextConcat"); yield return (InferenceHelpers.ConcatColumnsIntoOne(textColumnNames, concatTextColumnName, GetType(), false)); } else { concatTextColumnName = textColumnNames.First(); } //Get Unigram + Trichar for text transform on the concatenated text column. string featureTextColumn = columns[0].GetTempColumnName("FeaturesText"); yield return(InferenceHelpers.TextTransformUnigramTriChar(Env, concatTextColumnName, featureTextColumn)); //Concat text featurized column into feature column. List <string> featureCols = new List <string>(new[] { featureTextColumn }); if (columns.Any( col => (col.Purpose == ColumnPurpose.NumericFeature) || (col.Purpose == ColumnPurpose.CategoricalFeature))) { featureCols.Add(DefaultColumnNames.Features); } if (!ExcludeFeaturesConcatTransforms) { yield return(InferenceHelpers.ConcatColumnsIntoOne(featureCols, DefaultColumnNames.Features, GetType(), true)); } }