public Cursor(InputObjectDataView parent, bool wantsLabel, bool wantsText) { Schema = parent.Schema; _position = -1; _enumerator = parent._data.GetEnumerator(); _getters = new Delegate[] { wantsLabel ? (ValueGetter <bool>)LabelGetterImplementation : null, wantsText ? (ValueGetter <ReadOnlyMemory <char> >)TextGetterImplementation : null }; }
public static void Example() { // First we create an array of these objects, which we "present" as this IDataView implementation so that it // can be used in a simple ML.NET pipeline. var inputArray = new[] { new InputObject(false, "Hello my friend."), new InputObject(true, "Stay awhile and listen."), new InputObject(true, "Masterfully done hero!") }; var dataView = new InputObjectDataView(inputArray); // So, this is a very simple pipeline: a transformer that tokenizes Text, does nothing with the Label column // at all. var mlContext = new MLContext(); var transformedDataView = mlContext.Transforms.Text.TokenizeIntoWords( "TokenizedText", "Text").Fit(dataView).Transform(dataView); var textColumn = transformedDataView.Schema["Text"]; var tokensColumn = transformedDataView.Schema["TokenizedText"]; using (var cursor = transformedDataView.GetRowCursor(new[] { textColumn, tokensColumn })) { // Note that it is best to get the getters and values *before* iteration, so as to faciliate buffer // sharing (if applicable), and column-type validation once, rather than many times. ReadOnlyMemory <char> textValue = default; VBuffer <ReadOnlyMemory <char> > tokensValue = default; var textGetter = cursor.GetGetter <ReadOnlyMemory <char> >(textColumn); var tokensGetter = cursor.GetGetter <VBuffer <ReadOnlyMemory <char> > >(tokensColumn); while (cursor.MoveNext()) { textGetter(ref textValue); tokensGetter(ref tokensValue); Console.WriteLine($"{textValue} => {string.Join(", ", tokensValue.DenseValues())}"); } // The output to console is this: // Hello my friend. => Hello, my, friend. // Stay awhile and listen. => Stay, awhile, and, listen. // Masterfully done hero! => Masterfully, done, hero! // Note that it may be interesting to set a breakpoint on the Console.WriteLine, and explore // what is going on with the cursor, and the buffers. In particular, on the third iteration, // while `tokensValue` is logically presented as a three element array, internally you will // see that the arrays internal to that structure have (at least) four items, specifically: // `Masterfully`, `done`, `hero!`, `listen.`. In this way we see a simple example of the details // of how buffer sharing from one iteration to the next actually works. } }