private void LoadStopWords(IHostEnvironment env, IChannel ch, ArgumentsBase loaderArgs, out NormStr.Pool stopWordsMap) { Contracts.AssertValue(env); env.AssertValue(ch); ch.AssertValue(loaderArgs); if ((!string.IsNullOrEmpty(loaderArgs.Stopwords) || Utils.Size(loaderArgs.Stopword) > 0) && (!string.IsNullOrWhiteSpace(loaderArgs.DataFile) || loaderArgs.Loader.IsGood() || !string.IsNullOrWhiteSpace(loaderArgs.StopwordsColumn))) { ch.Warning("Explicit stopwords list specified. Data file arguments will be ignored"); } var src = default(DvText); stopWordsMap = new NormStr.Pool(); var buffer = new StringBuilder(); var stopwords = new DvText(loaderArgs.Stopwords); stopwords = stopwords.Trim(); if (stopwords.HasChars) { bool warnEmpty = true; for (bool more = true; more;) { DvText stopword; more = stopwords.SplitOne(',', out stopword, out stopwords); stopword = stopword.Trim(); if (stopword.HasChars) { buffer.Clear(); stopword.AddLowerCaseToStringBuilder(buffer); stopWordsMap.Add(buffer); } else if (warnEmpty) { ch.Warning("Empty strings ignored in 'stopwords' specification"); warnEmpty = false; } } ch.CheckUserArg(stopWordsMap.Count > 0, nameof(Arguments.Stopwords), "stopwords is empty"); } else if (Utils.Size(loaderArgs.Stopword) > 0) { bool warnEmpty = true; foreach (string word in loaderArgs.Stopword) { var stopword = new DvText(word); stopword = stopword.Trim(); if (stopword.HasChars) { buffer.Clear(); stopword.AddLowerCaseToStringBuilder(buffer); stopWordsMap.Add(buffer); } else if (warnEmpty) { ch.Warning("Empty strings ignored in 'stopword' specification"); warnEmpty = false; } } } else { string srcCol = loaderArgs.StopwordsColumn; var loader = LoadStopwords(env, ch, loaderArgs.DataFile, loaderArgs.Loader, ref srcCol); int colSrc; if (!loader.Schema.TryGetColumnIndex(srcCol, out colSrc)) { throw ch.ExceptUserArg(nameof(Arguments.StopwordsColumn), "Unknown column '{0}'", srcCol); } var typeSrc = loader.Schema.GetColumnType(colSrc); ch.CheckUserArg(typeSrc.IsText, nameof(Arguments.StopwordsColumn), "Must be a scalar text column"); // Accumulate the stopwords. using (var cursor = loader.GetRowCursor(col => col == colSrc)) { bool warnEmpty = true; var getter = cursor.GetGetter <DvText>(colSrc); while (cursor.MoveNext()) { getter(ref src); if (src.HasChars) { buffer.Clear(); src.AddLowerCaseToStringBuilder(buffer); stopWordsMap.Add(buffer); } else if (warnEmpty) { ch.Warning("Empty rows ignored in data file"); warnEmpty = false; } } } ch.CheckUserArg(stopWordsMap.Count > 0, nameof(Arguments.DataFile), "dataFile is empty"); } }