public void MapInput(Input input, IntermediateInput intermediate) { ReadOnlyMemory <char> text = ReadOnlyMemoryUtils.TrimWhiteSpace(input.Text); // Handle comments, if any. If no comments are present, the value for that column // in this row will be empty. if (!ReadOnlyMemoryUtils.SplitOne(text, '#', out var left, out var right)) { right = ReadOnlyMemory <char> .Empty; } intermediate.Comment = right; var ator = ReadOnlyMemoryUtils.Split(left, _seps).GetEnumerator(); // Empty lines are filtered in the Input.MapComment step. var notEmpty = ator.MoveNext(); Contracts.Assert(notEmpty); ReadOnlyMemory <char> token = ator.Current; // Parse the label. if (_tryFloatParse(in token, out intermediate.Label)) { intermediate.Weight = 1; // Default weight is of course 1. }
internal static IEnumerable <int> GetColumnSet(this Schema schema, string metadataKind, string value) { for (int col = 0; col < schema.Count; col++) { var columnType = schema[col].Metadata.Schema.GetColumnOrNull(metadataKind)?.Type; if (columnType != null && columnType.IsText) { ReadOnlyMemory <char> val = default; schema[col].Metadata.GetValue(metadataKind, ref val); if (ReadOnlyMemoryUtils.EqualsStr(value, val)) { yield return(col); } } } }
/// <summary> /// Returns the set of column ids which match the value of specified annotation kind. /// The annotation type should be of type text. /// </summary> public static IEnumerable <int> GetColumnSet(this DataViewSchema schema, string annotationKind, string value) { for (int col = 0; col < schema.Count; col++) { var columnType = schema[col].Annotations.Schema.GetColumnOrNull(annotationKind)?.Type; if (columnType is TextDataViewType) { ReadOnlyMemory <char> val = default; schema[col].Annotations.GetValue(annotationKind, ref val); if (ReadOnlyMemoryUtils.EqualsStr(value, val)) { yield return(col); } } } }
#pragma warning restore 0649 public static void MapComment(Input input, CommentIndicator output) { // We expand a bit on the SVM-light comment strategy. In SVM-light, a comment line // must have the # as the first character, and a totally whitespace or empty line // is considered a parse error. However, for the purpose of detecting comments, // we detect # after trimming whitespace, and also consider totally blank lines // "comments" instead of whitespace. ReadOnlyMemory <char> text = ReadOnlyMemoryUtils.TrimWhiteSpace(input.Text); if (text.IsEmpty || text.Span[0] == '#') { output.IsComment = float.NaN; } else { output.IsComment = 0; } }