/// <summary> /// Init processor by passed options /// </summary> /// <param name="options"></param> /// <returns></returns> public bool Init(ReportOptions options) { stopWordsList = new List <string>() { "and", "or", "a", "the", "on", "in", "from", "to", "of", "is", "are", "there", "it", "by", "that", "but", "this" }; currentOptions = options; string sourceString = currentOptions.SourceString; //1. Validate some passed options if (String.IsNullOrWhiteSpace(sourceString)) { isInited = false; return(false); } sourceString = sourceString.Trim(); //2. Determine text if (isUrl(sourceString)) { this.currentUrl = sourceString; string urlContent = getUrlContent(sourceString); //Make a single line urlContent = urlContent.Replace(Environment.NewLine, "").Replace("\n", " ").Replace("\t", " "); //Checking for html markup if (String.IsNullOrWhiteSpace(urlContent) || !isHtml(urlContent)) { throw new Exception("Downloaded url content is empty or is not html markup or has wrong encoding!"); } ; currentText = urlContent.ToLower().Trim(); } else { //Source string is text currentText = sourceString.ToLower().Trim().Replace(Environment.NewLine, " ").Replace("\n", "").Replace("\t", " "); } isInited = true; return(isInited); }
public SeoTextProcessor() { this.currentOptions = new ReportOptions(); this.currentText = string.Empty; this.currentUrl = string.Empty; }