private void Evaluate(IndentedTextWriter wrt, Delegate del, DataViewType typeRes, DataViewType[] types,
                              string text, int ichMin, int ichLim)
        {
            Contracts.AssertValue(del);
            Contracts.AssertNonEmpty(types);
            var args    = new object[types.Length];
            var getters = new Func <ReadOnlyMemory <char>, bool> [types.Length];

            for (int i = 0; i < getters.Length; i++)
            {
                getters[i] = GetGetter(i, types[i], args);
            }

            StringBuilder   sb      = new StringBuilder();
            Action <object> printer = GetPrinter(typeRes, sb);

            ReadOnlyMemory <char> chars = text.AsMemory().Slice(ichMin, ichLim - ichMin);

            for (bool more = true; more;)
            {
                ReadOnlyMemory <char> line;
                if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                {
                    more = ReadOnlyMemoryUtils.SplitOne(chars, '\x0D', out line, out chars);
                }
                else
                {
                    more = ReadOnlyMemoryUtils.SplitOne(chars, '\x0A', out line, out chars);
                }
                line = ReadOnlyMemoryUtils.TrimWhiteSpace(line);
                if (line.IsEmpty)
                {
                    continue;
                }

                // Note this "hack" to map _ to empty. It's easier than fully handling quoting and is sufficient
                // for these tests.
                var vals = ReadOnlyMemoryUtils.Split(line, new char[] { ',' })
                           .Select(x => ReadOnlyMemoryUtils.TrimWhiteSpace(x))
                           .Select(x => ReadOnlyMemoryUtils.EqualsStr("_", x) ? ReadOnlyMemory <char> .Empty : x)
                           .ToArray();

                Contracts.Assert(vals.Length == getters.Length);
                for (int i = 0; i < getters.Length; i++)
                {
                    if (!getters[i](vals[i]))
                    {
                        wrt.Write("*** Parsing {0} Failed *** ", vals[i]);
                    }
                }
                var res = del.DynamicInvoke(args);
                printer(res);
                wrt.WriteLine(sb);
            }
        }
Beispiel #2
0
        private void LoadStopWords(IHostEnvironment env, IChannel ch, ArgumentsBase loaderArgs, out NormStr.Pool stopWordsMap)
        {
            Contracts.AssertValue(env);
            env.AssertValue(ch);
            ch.AssertValue(loaderArgs);

            if ((!string.IsNullOrEmpty(loaderArgs.Stopwords) || Utils.Size(loaderArgs.Stopword) > 0) &&
                (!string.IsNullOrWhiteSpace(loaderArgs.DataFile) || loaderArgs.Loader != null ||
                 !string.IsNullOrWhiteSpace(loaderArgs.StopwordsColumn)))
            {
                ch.Warning("Explicit stopwords list specified. Data file arguments will be ignored");
            }

            var src = default(ReadOnlyMemory <char>);

            stopWordsMap = new NormStr.Pool();
            var buffer = new StringBuilder();

            var stopwords = loaderArgs.Stopwords.AsMemory();

            stopwords = ReadOnlyMemoryUtils.TrimSpaces(stopwords);
            if (!stopwords.IsEmpty)
            {
                bool warnEmpty = true;
                for (bool more = true; more;)
                {
                    ReadOnlyMemory <char> stopword;
                    more     = ReadOnlyMemoryUtils.SplitOne(stopwords, ',', out stopword, out stopwords);
                    stopword = ReadOnlyMemoryUtils.TrimSpaces(stopword);
                    if (!stopword.IsEmpty)
                    {
                        buffer.Clear();
                        ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(stopword.Span, buffer);
                        stopWordsMap.Add(buffer);
                    }
                    else if (warnEmpty)
                    {
                        ch.Warning("Empty strings ignored in 'stopwords' specification");
                        warnEmpty = false;
                    }
                }
                ch.CheckUserArg(stopWordsMap.Count > 0, nameof(Arguments.Stopwords), "stopwords is empty");
            }
            else if (Utils.Size(loaderArgs.Stopword) > 0)
            {
                bool warnEmpty = true;
                foreach (string word in loaderArgs.Stopword)
                {
                    var stopword = word.AsSpan();
                    stopword = stopword.Trim(' ');
                    if (!stopword.IsEmpty)
                    {
                        buffer.Clear();
                        ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(stopword, buffer);
                        stopWordsMap.Add(buffer);
                    }
                    else if (warnEmpty)
                    {
                        ch.Warning("Empty strings ignored in 'stopword' specification");
                        warnEmpty = false;
                    }
                }
            }
            else
            {
                string srcCol = loaderArgs.StopwordsColumn;
                var    loader = LoadStopwords(env, ch, loaderArgs.DataFile, loaderArgs.Loader, ref srcCol);
                int    colSrc;
                if (!loader.Schema.TryGetColumnIndex(srcCol, out colSrc))
                {
                    throw ch.ExceptUserArg(nameof(Arguments.StopwordsColumn), "Unknown column '{0}'", srcCol);
                }
                var typeSrc = loader.Schema[colSrc].Type;
                ch.CheckUserArg(typeSrc.IsText, nameof(Arguments.StopwordsColumn), "Must be a scalar text column");

                // Accumulate the stopwords.
                using (var cursor = loader.GetRowCursor(col => col == colSrc))
                {
                    bool warnEmpty = true;
                    var  getter    = cursor.GetGetter <ReadOnlyMemory <char> >(colSrc);
                    while (cursor.MoveNext())
                    {
                        getter(ref src);
                        if (!src.IsEmpty)
                        {
                            buffer.Clear();
                            ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(src.Span, buffer);
                            stopWordsMap.Add(buffer);
                        }
                        else if (warnEmpty)
                        {
                            ch.Warning("Empty rows ignored in data file");
                            warnEmpty = false;
                        }
                    }
                }
                ch.CheckUserArg(stopWordsMap.Count > 0, nameof(Arguments.DataFile), "dataFile is empty");
            }
        }