private void Evaluate(IndentedTextWriter wrt, Delegate del, DataViewType typeRes, DataViewType[] types, string text, int ichMin, int ichLim) { Contracts.AssertValue(del); Contracts.AssertNonEmpty(types); var args = new object[types.Length]; var getters = new Func <ReadOnlyMemory <char>, bool> [types.Length]; for (int i = 0; i < getters.Length; i++) { getters[i] = GetGetter(i, types[i], args); } StringBuilder sb = new StringBuilder(); Action <object> printer = GetPrinter(typeRes, sb); ReadOnlyMemory <char> chars = text.AsMemory().Slice(ichMin, ichLim - ichMin); for (bool more = true; more;) { ReadOnlyMemory <char> line; if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { more = ReadOnlyMemoryUtils.SplitOne(chars, '\x0D', out line, out chars); } else { more = ReadOnlyMemoryUtils.SplitOne(chars, '\x0A', out line, out chars); } line = ReadOnlyMemoryUtils.TrimWhiteSpace(line); if (line.IsEmpty) { continue; } // Note this "hack" to map _ to empty. It's easier than fully handling quoting and is sufficient // for these tests. var vals = ReadOnlyMemoryUtils.Split(line, new char[] { ',' }) .Select(x => ReadOnlyMemoryUtils.TrimWhiteSpace(x)) .Select(x => ReadOnlyMemoryUtils.EqualsStr("_", x) ? ReadOnlyMemory <char> .Empty : x) .ToArray(); Contracts.Assert(vals.Length == getters.Length); for (int i = 0; i < getters.Length; i++) { if (!getters[i](vals[i])) { wrt.Write("*** Parsing {0} Failed *** ", vals[i]); } } var res = del.DynamicInvoke(args); printer(res); wrt.WriteLine(sb); } }
private void LoadStopWords(IHostEnvironment env, IChannel ch, ArgumentsBase loaderArgs, out NormStr.Pool stopWordsMap) { Contracts.AssertValue(env); env.AssertValue(ch); ch.AssertValue(loaderArgs); if ((!string.IsNullOrEmpty(loaderArgs.Stopwords) || Utils.Size(loaderArgs.Stopword) > 0) && (!string.IsNullOrWhiteSpace(loaderArgs.DataFile) || loaderArgs.Loader != null || !string.IsNullOrWhiteSpace(loaderArgs.StopwordsColumn))) { ch.Warning("Explicit stopwords list specified. Data file arguments will be ignored"); } var src = default(ReadOnlyMemory <char>); stopWordsMap = new NormStr.Pool(); var buffer = new StringBuilder(); var stopwords = loaderArgs.Stopwords.AsMemory(); stopwords = ReadOnlyMemoryUtils.TrimSpaces(stopwords); if (!stopwords.IsEmpty) { bool warnEmpty = true; for (bool more = true; more;) { ReadOnlyMemory <char> stopword; more = ReadOnlyMemoryUtils.SplitOne(stopwords, ',', out stopword, out stopwords); stopword = ReadOnlyMemoryUtils.TrimSpaces(stopword); if (!stopword.IsEmpty) { buffer.Clear(); ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(stopword.Span, buffer); stopWordsMap.Add(buffer); } else if (warnEmpty) { ch.Warning("Empty strings ignored in 'stopwords' specification"); warnEmpty = false; } } ch.CheckUserArg(stopWordsMap.Count > 0, nameof(Arguments.Stopwords), "stopwords is empty"); } else if (Utils.Size(loaderArgs.Stopword) > 0) { bool warnEmpty = true; foreach (string word in loaderArgs.Stopword) { var stopword = word.AsSpan(); stopword = stopword.Trim(' '); if (!stopword.IsEmpty) { buffer.Clear(); ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(stopword, buffer); stopWordsMap.Add(buffer); } else if (warnEmpty) { ch.Warning("Empty strings ignored in 'stopword' specification"); warnEmpty = false; } } } else { string srcCol = loaderArgs.StopwordsColumn; var loader = LoadStopwords(env, ch, loaderArgs.DataFile, loaderArgs.Loader, ref srcCol); int colSrc; if (!loader.Schema.TryGetColumnIndex(srcCol, out colSrc)) { throw ch.ExceptUserArg(nameof(Arguments.StopwordsColumn), "Unknown column '{0}'", srcCol); } var typeSrc = loader.Schema[colSrc].Type; ch.CheckUserArg(typeSrc.IsText, nameof(Arguments.StopwordsColumn), "Must be a scalar text column"); // Accumulate the stopwords. using (var cursor = loader.GetRowCursor(col => col == colSrc)) { bool warnEmpty = true; var getter = cursor.GetGetter <ReadOnlyMemory <char> >(colSrc); while (cursor.MoveNext()) { getter(ref src); if (!src.IsEmpty) { buffer.Clear(); ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(src.Span, buffer); stopWordsMap.Add(buffer); } else if (warnEmpty) { ch.Warning("Empty rows ignored in data file"); warnEmpty = false; } } } ch.CheckUserArg(stopWordsMap.Count > 0, nameof(Arguments.DataFile), "dataFile is empty"); } }