TextCleaner C# (CSharp)代码示例

示例#1

0

显示文件

        public IActionResult SearchByName(string pokemon)
        {
            string      poke = TextCleaner.NormalInput(pokemon);
            PokemonRoot p    = new PokemonRoot();

            if (poke == null)
            {
                TempData["error"] = "Please enter a valid entry";
                return(RedirectToAction("Index"));
            }


            try
            {
                p = pk.GetPokemon(poke);
            }
            catch (Exception e)
            {
                TempData["error"] = "Please enter a valid entry";
                return(RedirectToAction("Index"));
            }

            TempData.Remove("moveerror");
            TempData.Remove("error");

            return(View(p));
        }

示例#2

0

显示文件

文件： CrawlSina.cs 项目： Ailsa0910026911/Palas

        private Tweet[] FillUserTweet(UserTweet result, string content)
        {
            var          matches   = Regex.Matches(content, RegexContent, RegexOptions.Multiline | RegexOptions.IgnoreCase);
            List <Tweet> tweetList = new List <Tweet>();

            try
            {
                foreach (Match match in matches)
                {
                    Tweet tweet = new Tweet();
                    int   comment;
                    int.TryParse(match.Groups["Reply"].Value, out comment);
                    int forward;
                    int.TryParse(match.Groups["Forward"].Value, out forward);
                    tweet.Comment = comment;
                    tweet.Content = TextCleaner.FullClean(match.Groups["Content"].Value);
                    tweet.Mid     = match.Groups["Mid"].Value;
                    tweet.Forward = forward;
                    tweet.Source  = match.Groups["Source"].Value;
                    tweet.PubDate = DateTimeParser.Parser(match.Groups["PubDate"].Value) ?? DateTime.MinValue;
                    tweet.Url     = RegexParser.AbsoluteUrl(match.Groups["Url"].Value, result.Url, true);
                    result.Tweets.Add(tweet);
                    tweetList.Add(tweet);
                }
            }
            catch {}

            return(tweetList.ToArray());
        }

示例#3

0

显示文件

文件： MainWindow.xaml.cs 项目： hdkesting/RichTextCleaner

        private async Task PlainTextAndCopy()
        {
            string html = this.SourceValue;

            string text;

#pragma warning disable CA1031 // Do not catch general exception types
            try
            {
                text = TextCleaner.HtmlToPlainText(html);
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, nameof(PlainTextAndCopy), "Error cleaning HTML to text:" + Environment.NewLine + html, ex);
                MessageBox.Show("There was an error getting text from the HTML", "error", MessageBoxButton.OK, MessageBoxImage.Error);
                return;
            }

            try
            {
                ClipboardHelper.CopyPlainTextToClipboard(text);
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, nameof(PlainTextAndCopy), "Error writing TEXT to clipboard", ex);
                MessageBox.Show("There was an error writing the TEXT to the clipboard", "error", MessageBoxButton.OK, MessageBoxImage.Error);
                return;
            }
#pragma warning restore CA1031 // Do not catch general exception types

            this.SourceValue = text;
            await this.SetStatus("The plain TEXT is on the clipboard, use Ctrl-V to paste.").ConfigureAwait(false);
        }

示例#4

0

显示文件

文件： MainWindow.xaml.cs 项目： hdkesting/RichTextCleaner

        private async Task ClearStylingAndCopy()
        {
            string html = this.SourceValue;

#pragma warning disable CA1031 // Do not catch general exception types
            try
            {
                html = TextCleaner.ClearStylingFromHtml(
                    html,
                    CleanerSettings.Instance);
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, nameof(ClearStylingAndCopy), "Error cleaning HTML:" + Environment.NewLine + html, ex);
                MessageBox.Show("There was an error cleaning the HTML", "error", MessageBoxButton.OK, MessageBoxImage.Error);
                return;
            }

            try
            {
                ClipboardHelper.CopyToClipboard(html, html);
                this.SourceValue = html;
                Logger.Log(LogLevel.Debug, nameof(ClearStylingAndCopy), "Cleaned HTML and copied to clipboard");
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, nameof(ClearStylingAndCopy), "Error writing HTML to clipboard", ex);
                MessageBox.Show("There was an error writing the cleand HTML to the clipboard", "error", MessageBoxButton.OK, MessageBoxImage.Error);
                return;
            }
#pragma warning restore CA1031 // Do not catch general exception types

            await this.SetStatus("The cleaned HTML is on the clipboard, use Ctrl-V to paste.").ConfigureAwait(false);
        }

示例#5

0

显示文件

        public string GetFileName(AuthorText authorText)
        {
            var parts = authorText.Link.Split("/".ToCharArray());
            var path  = parts[parts.Length - 1];

            return(TextCleaner.MakeFileAcceptableName(path));
        }

示例#6

0

显示文件

        public void QuotesInBrackets_ShouldBeConverted()
        {
            var source = "<p>Something ('me') something</p>";
            var html   = DocTester.ProcessSource(source, doc => TextCleaner.UpdateQuotes(doc, QuoteProcessing.ChangeToSmartQuotes));

            Assert.AreEqual("<p>Something (&lsquo;me&rsquo;) something</p>", html);
        }

示例#7

0

显示文件

        public Sentiment GetTextSentiment <TEntity>(string text, bool isRetweet, int ngramCardinality,
                                                    decimal smoothingFactor, bool isStemmed, IDictionary <String, String> dictionary, DbSet <TEntity> ngramDbSet,
                                                    IOclumenContext oclumenContext, Dictionary <string, List <KeyValuePair <Sentiment, decimal> > > ngramDictionary = null) where TEntity : NgramBase
        {
            text = new TextCleaner(text).StripPunctuation().RemoveExcessSpaces().ToLower().ToString();

            IList <string> ngrams = NgramGenerator.GenerateNgrams(text, ngramCardinality);

            if (isStemmed)
            {
                ngrams = Processor.StemNgram(ngrams, dictionary);
            }

            var ngramCounts = new List <IList <KeyValuePair <Sentiment, decimal> > >(ngrams.Count);
            IList <KeyValuePair <Sentiment, decimal> > classCounts = GetClassCount(isRetweet, ngramCardinality,
                                                                                   smoothingFactor, ngramDbSet,
                                                                                   oclumenContext);

            // get the raw counts for each of the ngrams
            foreach (string ngram in ngrams)
            {
                ngramCounts.Add(GetNgramCount(ngram, isRetweet, ngramCardinality, smoothingFactor, ngramDbSet,
                                              oclumenContext, ngramDictionary));

                //Debug.WriteLine(ngram + " " + ngramCounts.Last().First(x => x.Key == Sentiment.Positive).Value + ", " + ngramCounts.Last().First(x => x.Key == Sentiment.Neutral).Value + ", " + ngramCounts.Last().First(x => x.Key == Sentiment.Negative).Value);
            }

            int vocabularySize = GetVocabularySize(isRetweet, ngramCardinality, ngramDbSet, oclumenContext);

            // ok now let's get the probabilities, combining the individual ngram probabilities
            // witht he probability of a given sentiment class
            var sentimentProb = GetNgramSentimentProbabilities(vocabularySize, ngramCounts, classCounts);

            return(sentimentProb.Last().Key);
        }

示例#8

0

显示文件

文件： TestOfficeMarkup.cs 项目： hdkesting/RichTextCleaner

        public void RemoveOfficeMarkup()
        {
            var source = @"
<p class=MsoNormal><b><span lang=EN-US style='mso-ansi-language:EN-US'>HU-HU</span><u5:p></u5:p></b><span
lang=EN-US style='mso-ansi-language:EN-US'><o:p></o:p></span></p>

<ul style='margin-top:0cm' type=disc>
 <li class=MsoListParagraph style='margin-left:0cm;mso-list:l0 level1 lfo1'><span
     lang=EN-US style='mso-fareast-font-family:""Times New Roman"";mso-ansi-language:
     EN-US'>text in header should be in Hungarian: Olvassa el a Wolters Kluwer
     legújabb Megfelelőségi szakértői betekintéseit – Cikk, whitepaper,
     kutatás, esettanulmány és podcast.<o:p></o:p></span><u5:p></u5:p></li>
 <li class=MsoListParagraph style='margin-left:0cm;mso-list:l0 level1 lfo1'><span
     lang=EN-US style='mso-fareast-font-family:""Times New Roman"";mso-ansi-language:
     EN-US'>Read More button&nbsp;– see row 107 for local translation <o:p></o:p></span><u5:p></u5:p></li>
 <li class=MsoListParagraph style='color:black;margin-left:0cm;mso-list:l0 level1 lfo1'><span
     lang=EN-US style='mso-fareast-font-family:""Times New Roman"";color:windowtext;
     mso-ansi-language:EN-US'>add dynanic card for expert insights </span><span
     lang=EN-US style='mso-fareast-font-family:""Times New Roman"";mso-ansi-language:
     EN-US'><o:p></o:p></span></li>
</ul>

<u5:p></u5:p><u5:p>        ";

            var html = DocTester.ProcessSource(source, doc => TextCleaner.RemoveOfficeMarkup(doc));

            Assert.IsFalse(html.Contains("<o:p>"), "Office markup should have been removed, like <o:p>");
            Assert.IsFalse(html.Contains("<u5:p>"), "Office markup should have been removed, like <u5:p>");
        }

示例#9

0

显示文件

        public void ToSmartQuotes_ShouldChangeSimpleQuotes()
        {
            var source = "<a target=\"_blank\">&ldquo;some remark&rdquo; said the so-called \"chief.\"</a>";

            var html = DocTester.ProcessSource(source, doc => TextCleaner.UpdateQuotes(doc, QuoteProcessing.ChangeToSmartQuotes));

            Assert.AreEqual("<a target=\"_blank\">&ldquo;some remark&rdquo; said the so-called &ldquo;chief.&rdquo;</a>", html);
        }

示例#10

0

显示文件

文件： TestAnchorsAndLinks.cs 项目： hdkesting/RichTextCleaner

        public void LinksToRemoteWithRel2_ShouldGetTargetAndNoOpener()
        {
            var source = "<a href=\"https://www.example.com\" rel=\"noreferrer\">link</a>";

            var html = DocTester.ProcessSource(source, doc => TextCleaner.AddBlankLinkTargets(doc, true));

            Assert.AreEqual("<a href=\"https://www.example.com\" rel=\"noreferrer noopener\" target=\"_blank\">link</a>", html);
        }

示例#11

0

显示文件

文件： TestAnchorsAndLinks.cs 项目： hdkesting/RichTextCleaner

        public void LinksToLocal_ShouldNotGetTargetOrOpener()
        {
            var source = "<a href=\"/default.html\">link</a>";

            var html = DocTester.ProcessSource(source, doc => TextCleaner.AddBlankLinkTargets(doc, true));

            Assert.AreEqual("<a href=\"/default.html\">link</a>", html);
        }

示例#12

0

显示文件

        public void NoChange_ShouldNotChange()
        {
            var source = "<a target=\"_blank\">&ldquo;some remark&rdquo; said the so-called \"chief\"</a>";

            var html = DocTester.ProcessSource(source, doc => TextCleaner.UpdateQuotes(doc, QuoteProcessing.NoChange));

            Assert.AreEqual(source, html);
        }

示例#13

0

显示文件

文件： TestAnchorsAndLinks.cs 项目： hdkesting/RichTextCleaner

        public void LinksToRemoteWithTarget_ShouldNotChangeTargetButAddOpener()
        {
            var source = "<a href=\"https://www.example.com\" target=\"_self\">link</a>";

            var html = DocTester.ProcessSource(source, doc => TextCleaner.AddBlankLinkTargets(doc, true));

            Assert.AreEqual("<a href=\"https://www.example.com\" target=\"_self\" rel=\"noopener\">link</a>", html);
        }

示例#14

0

显示文件

文件： TestAnchorsAndLinks.cs 项目： hdkesting/RichTextCleaner

        public void LinksToRemote_ShouldGetTarget()
        {
            var source = "<a href=\"https://www.example.com\">link</a>";

            var html = DocTester.ProcessSource(source, doc => TextCleaner.AddBlankLinkTargets(doc, false));

            Assert.AreEqual("<a href=\"https://www.example.com\" target=\"_blank\">link</a>", html);
        }

示例#15

0

显示文件

        /// <summary>
        /// Processes the source using the supplied processor and returns the result.
        /// </summary>
        /// <param name="source">The (html) source.</param>
        /// <param name="processor">The processor method.</param>
        /// <returns>The resulting HTML.</returns>
        public static string ProcessSource(string source, Action <HtmlDocument> processor)
        {
            var doc = TextCleaner.CreateHtmlDocument(source);

            processor(doc);
            var html = TextCleaner.GetHtmlSource(doc, false);

            return(html);
        }

示例#16

0

显示文件

文件： TextCleanerTests.cs 项目： st-00/EngHowToSay

        public void Clean_Text_Fragment()
        {
            var cleanedText = TextCleaner.Clean(TestData.TextWithOddCharacters);

            Assert.DoesNotContain(Environment.NewLine, cleanedText);
            Assert.DoesNotContain("  ", cleanedText);
            Assert.DoesNotContain("\t", cleanedText);
            Assert.False(cleanedText.StartsWith(" "));
        }

示例#17

0

显示文件

        private void CrawlDailyReport(Worksheet dailyWorksheet, Workbook dailybook, ref int dailyStartRow, string categoryName,
                                      string[] categoryUrls)
        {
            bool isFirst = true;

            foreach (string url in categoryUrls)
            {
                var dailycontent = WebRequestProcessor.DownloadHTTPString(url);
                Thread.Sleep(2000);
                var dailyMatches = Regex.Matches(dailycontent, baiduRegex,
                                                 RegexOptions.IgnoreCase | RegexOptions.Multiline);
                foreach (Match dailyMatch in dailyMatches)
                {
                    if (!dailyMatch.Groups["PubDate"].Value.Contains("前"))
                    {
                        continue;
                    }
                    if (isFirst)
                    {
                        dailyWorksheet.Cells[dailyStartRow, 2].PutValue(categoryName);
                        isFirst = false;
                    }
                    var resultUrl = dailyMatch.Groups["Url"].Value;
                    try
                    {
                        Uri uri    = new Uri(resultUrl);
                        var domain = GetUrlDomain(uri.Host);
                        //匹配媒体名
                        dailyWorksheet.Cells[dailyStartRow, 1].PutValue(domain);
                    }
                    catch (Exception)
                    {
                    }

                    var title = TextCleaner.FullClean(dailyMatch.Groups["Title"].Value) + Environment.NewLine +
                                TextCleaner.FullClean(dailyMatch.Groups["Text"].Value);
                    var colorstyle = dailyWorksheet.Cells[dailyStartRow, 6].GetDisplayStyle();
                    colorstyle.Font.Color = Color.Blue;
                    var currentExcelRow = dailyStartRow + 1;
                    dailyWorksheet.Cells[dailyStartRow, 0].PutValue(resultUrl);
                    dailyWorksheet.Cells[dailyStartRow, 5].Formula = "=VLOOKUP(B" + currentExcelRow + ",Sheet2!A:B,2,FALSE)";


                    dailyWorksheet.Cells[dailyStartRow, 6].SetStyle(colorstyle);
                    dailyWorksheet.Cells[dailyStartRow, 6].PutValue(title);


                    dailyWorksheet.Hyperlinks.Add(dailyStartRow, 6, 1, 1, resultUrl);
                    dailyWorksheet.Cells[dailyStartRow, 7].PutValue(DateTime.Now.ToString("yyyy-MM-dd"));
                    dailyWorksheet.Cells[dailyStartRow, 8].PutValue("负面舆情");
                    dailyStartRow++;
                }
            }
            dailybook.Save(@"D:\dailyreport\日报.xlsx");
        }

示例#18

0

显示文件

        /// <summary>
        /// 根据相对路径XPath从单一Item的BaseNode节点提取某一个字段的Node的InnerText
        /// </summary>
        /// <param name="BaseNode">一个Item的根节点</param>
        /// <param name="RelXPath">相对XPath路径</param>
        /// <param name="CleanConnectionMark">是否清洗文本</param>
        /// <returns></returns>
        internal static string ExtractInnerTextFromBaseNode(HtmlNode BaseNode, string RelXPath, int postion, bool CleanConnectionMark = true)
        {
            if (BaseNode == null)
            {
                return(null);
            }

            if (string.IsNullOrWhiteSpace(RelXPath))
            {
                if (CleanConnectionMark)
                {
                    return(TextCleaner.FullClean(XPathUtility.InnerTextNonDescendants(BaseNode)));
                }
                else
                {
                    return(TextCleaner.FullClean(XPathUtility.InnerTextNonDescendants(BaseNode), true, true, true, false, true, false));
                }
            }

            string innerTextValue = "";

            try
            {
                HtmlNodeNavigator navigator = (HtmlNodeNavigator)BaseNode.CreateNavigator();
                var node = navigator.SelectSingleNode(RelXPath);
                innerTextValue = node.Value;
            }
            catch (Exception ex)
            { }

            if (string.IsNullOrWhiteSpace(innerTextValue))
            {
                IEnumerable <HtmlNode> MatchNodes = BaseNode.SelectNodes(RelXPath);
                if (MatchNodes != null)
                {
                    MatchNodes = MatchNodes.Where(n => !string.IsNullOrEmpty(XPathUtility.InnerTextNonDescendants(n)));
                }
                if (!string.IsNullOrWhiteSpace(RelXPath) && (MatchNodes == null || MatchNodes.Count() == 0))
                {
                    return(null);
                }

                innerTextValue = XPathUtility.InnerTextNonDescendants(MatchNodes.First());
            }

            if (CleanConnectionMark)
            {
                return(TextCleaner.FullClean(innerTextValue));
            }
            else
            {
                return(TextCleaner.FullClean(innerTextValue, true, true, true, false, true, false));
            }
        }

示例#19

0

显示文件

文件： BOT.cs 项目： RonaldoJonson/Oli-Olist

    IEnumerator GetQuestion()
    {
        WaitForSeconds wait = new WaitForSeconds(5);

        yield return(wait);

        Item item = new Item();

        while (true)
        {
            QuestionStructure question = board.GetQuestion();
            awnsers.Clear();
            tags.Clear();
            item = new Item();

            if (question != null)
            {
                question.pergunta_texto = question.pergunta_texto.ToLower();
                question.pergunta_texto = TextCleaner.CleanText(question.pergunta_texto);

                Debug.Log("Respondendo: " + question.pergunta_texto);

                if (item.LoadItem(question.produto_nome))
                {
                    Debug.Log("Verficando Tags");

                    foreach (string key in item.intents.Keys)
                    {
                        if (question.pergunta_texto.Contains(key))
                        {
                            tags.Add(key);
                            Debug.Log("Tag Encontrada: " + key);
                            awnsers.Add(item.intents[key]);
                        }
                    }

                    if (awnsers.Count > 0)
                    {
                        CreateAwnser();
                    }
                    else
                    {
                        board.Next();
                    }
                }
                else
                {
                    Debug.Log("Item nao cadastrado");
                }
            }

            yield return(wait);
        }
    }

示例#20

0

显示文件

文件： CleanerTests.cs 项目： Matt8109/TwitterSentimentAnalyzer

        public void TestToLower()
        {
            const string firstString = "Hello World";

            var textCleaner = new TextCleaner(firstString);

            Assert.AreEqual(textCleaner.ToLower().ToString(), firstString.ToLower());

            Assert.AreEqual(new TextCleaner("").ToLower().ToString(), "");
            Assert.AreEqual(new TextCleaner("hello").ToLower().ToString(), "hello");
        }

示例#21

0

显示文件

文件： TextCLeanerTests.cs 项目： anatolitrifonov/Opinions

        public void TextCleanerTests_IndexKey_Indexes()
        {
            Assert.Null(TextCleaner.Clean(null));
            Assert.Equal(TextCleaner.Clean("    "), "    ");

            var test = ((char)147).ToString() + ((char)148).ToString() +
                       ((char)8220).ToString() + ((char)8221).ToString() + " " +
                       ((char)133).ToString() + ((char)8230).ToString() + " " +
                       ((char)146).ToString() + ((char)8217).ToString() +
                       ((char)145).ToString() + ((char)8216).ToString() + " " +
                       ((char)8211).ToString();
            var result = TextCleaner.Clean(test);

            Assert.Equal(result, "\"\"\"\" ...... '''' -");
        }

示例#22

0

显示文件

        public IActionResult SearchByType(string type, [FromQuery] int pageNumber = 1, [FromQuery] int pageSize = 10)
        {
            string t = TextCleaner.NormalInput(type);

            TempData.Remove("error");
            TempData.Remove("moveerror");

            TempData["typeName"] = t;
            List <Pokemon> pokemon = pk.GetType(t);

            List <Pokemon> pagedPokemon = pokemon.Skip((pageNumber - 1) * pageSize).Take(pageSize).ToList();

            TempData["pageNumber"] = pageNumber;
            TempData["pageSize"]   = pageSize;

            return(View(pagedPokemon));
        }

示例#23

0

显示文件

文件： TextCleanerTests.cs 项目： propes/hackasm-csharp

        public void RemoveCommentsAndWhitespace_WorksCorrectly()
        {
            var lines = new string[]
            {
                " foo ",
                "",
                "// some comment",
                "bar // another comment",
                ""
            };

            var result = new TextCleaner().RemoveCommentsAndWhitespace(lines);

            Assert.Equal(2, result.Length);
            Assert.Equal("foo", result[0]);
            Assert.Equal("bar", result[1]);
        }

示例#24

0

显示文件

        private async Task ClearStylingAndCopyAsync()
        {
            Logger.Log(LogLevel.Debug, nameof(MainPage), "Start clearing styling");
            string html = this.SourceValue;

            Logger.Log(LogLevel.Debug, nameof(MainPage), $"HTML size before processing: {html?.Length ?? 0}");

#pragma warning disable CA1031 // Do not catch general exception types
            try
            {
                html = TextCleaner.ClearStylingFromHtml(
                    html,
                    CleanerSettings.Instance);
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, nameof(ClearStylingAndCopyAsync), "Error cleaning HTML:" + Environment.NewLine + html, ex);
                await this.SetStatusAsync("There was an error cleaning the HTML");

                return;
            }

            Logger.Log(LogLevel.Debug, nameof(MainPage), $"HTML size after processing: {html?.Length ?? 0}");

            try
            {
                ClipboardHelper.CopyToClipboard(html, html);
                this.SourceValue = html;
                Logger.Log(LogLevel.Debug, nameof(ClearStylingAndCopyAsync), "Cleaned HTML and copied to clipboard");
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, nameof(ClearStylingAndCopyAsync), "Error writing HTML to clipboard", ex);
                await this.SetStatusAsync("There was an error writing the cleand HTML to the clipboard");

                return;
            }
#pragma warning restore CA1031 // Do not catch general exception types

            await this.SetStatusAsync("The cleaned HTML is on the clipboard, use Ctrl-V to paste.").ConfigureAwait(false);

            Logger.Log(LogLevel.Debug, nameof(MainPage), "Done clearing styling");
        }

示例#25

0

显示文件

文件： RegexParser.cs 项目： mlzboy/list_discovery

        /// <summary>
        /// Match2s the item.
        /// </summary>
        /// <param name="m">M.</param>
        /// <param name="Item">Item.</param>
        /// <param name="BaseUrl">Base URL.</param>
        /// <param name="ItemUrlCaseSensitive">If set to <c>true</c> item URL case sensitive.</param>
        public static void Match2Item(Match m, ref Article Item, string BaseUrl, bool ItemUrlCaseSensitive = false)
        {
            //url
            Item.Url = new Uri(new Uri(BaseUrl), RegexUtility.TryGetString(m, "Url", Item.Url, false)).AbsoluteUri;

            //title
            Item.Title = RegexUtility.TryGetString(m, "Title", Item.Title);
            //降低Clean级别
            if (string.IsNullOrEmpty(Item.Title))
            {
                Item.Title = HTMLCleaner.CleanHTML(Item.Title, true);
            }

            //text
            Item.HtmlContent = RegexUtility.TryGetString(m, "Text", Item.HtmlContent, false);

            //Author Info
            Item.Author = RegexUtility.TryGetString(m, "AuthorName", Item.Author);
            Item.Source = RegexUtility.TryGetString(m, "Source", Item.Source);

            if (!String.IsNullOrWhiteSpace(Item.Source))
            {
                Item.Source = TextCleaner.FullClean(Item.Source);
            }

            //Media Info
            Item.MediaName = RegexUtility.TryGetString(m, "MediaName", Item.MediaName);
            //time


            if (m.Groups["PubDate"].Success)
            {
                Item.PubDate = DateTimeParser.Parser(HTMLCleaner.CleanHTML(m.Groups["PubDate"].Value, true));
            }

            if (Item.PubDate <= DateTime.MinValue)
            {
                Item.PubDate = DateTime.Now;
            }

            Match2ItemCount(m, Item.ViewDataList);
        }

示例#26

0

显示文件

    public void Register(string id, string description)
    {
        description = description.Replace(" : ", ":");
        string[] content = description.Split(':');
        Item     item    = new Item();

        item.ID = id;

        for (int i = 0; i < content.Length - 1; i++)
        {
            if (i % 2 == 0)
            {
                content[i] = TextCleaner.CleanText(content[i]);
                content[i] = TextCleaner.RemovePonctuation(content[i]);

                item.AddIntent(content[i], content[i + 1]);
            }
        }

        item.SaveItem();
    }

示例#27

0

显示文件

        public IActionResult SearchByMove(string move, [FromQuery] int pageNumber = 1, [FromQuery] int pageSize = 10)
        {
            //Normalizes search string
            string search = TextCleaner.NormalInput(move);

            TempData["moveName"] = search;

            if (search == null)
            {
                TempData["error"] = "Please enter a valid entry";
                return(RedirectToAction("Index"));
            }


            //Deserializes move object
            MoveRoot m = new MoveRoot();

            try
            {
                m = pk.GetMove(search);
            }
            catch (Exception e)
            {
                TempData["moveerror"] = "Please enter a valid entry";
                return(RedirectToAction("Index"));
            }

            List <Learned_By_Pokemon> pokemonByUrl = new List <Learned_By_Pokemon>();

            pokemonByUrl = m.learned_by_pokemon.Skip((pageNumber - 1) * pageSize).Take(pageSize).ToList();

            TempData["pageNumber"] = pageNumber;
            TempData["pageSize"]   = pageSize;

            TempData.Remove("error");
            TempData.Remove("moveerror");

            //Passing the list into the view
            return(View(pokemonByUrl));
        }

示例#28

0

显示文件

文件： XpathParser.cs 项目： mlzboy/list_discovery

        /// <summary>
        /// 根据相对路径XPath从单一Item的BaseNode节点提取某一个字段的Node的InnerText
        /// </summary>
        /// <param name="BaseNode">一个Item的根节点</param>
        /// <param name="RelXPath">相对XPath路径</param>
        /// <param name="CleanConnectionMark">是否清洗文本</param>
        /// <returns></returns>
        internal static string ExtractInnerTextFromBaseNode(HtmlNode BaseNode, string RelXPath, int postion, bool CleanConnectionMark = true)
        {
            if (BaseNode == null)
            {
                return(null);
            }

            if (string.IsNullOrWhiteSpace(RelXPath) && postion == 0)
            {
                if (CleanConnectionMark)
                {
                    return(TextCleaner.FullClean(XPathUtility.InnerTextNonDescendants(BaseNode)));
                }
                else
                {
                    return(TextCleaner.FullClean(XPathUtility.InnerTextNonDescendants(BaseNode), true, true, true, false, true, false));
                }
            }

            IEnumerable <HtmlNode> MatchNodes = BaseNode.SelectNodes(RelXPath);

            if (MatchNodes != null)
            {
                MatchNodes = MatchNodes.Where(n => !string.IsNullOrEmpty(XPathUtility.InnerTextNonDescendants(n)));
            }
            if (!string.IsNullOrWhiteSpace(RelXPath) && (MatchNodes == null || MatchNodes.Count() <= postion))
            {
                return(null);
            }

            if (CleanConnectionMark)
            {
                return(TextCleaner.FullClean(XPathUtility.InnerTextNonDescendants(MatchNodes.ElementAt(postion))));
            }
            else
            {
                return(TextCleaner.FullClean(XPathUtility.InnerTextNonDescendants(MatchNodes.ElementAt(postion)), true, true, true, false, true, false));
            }
        }

示例#29

0

显示文件

        /// <summary>
        /// 验证标题是否合法
        /// </summary>
        /// <param name="Title"></param>
        /// <returns></returns>
        public bool ValidateTitle(string Title)
        {
            if (string.IsNullOrWhiteSpace(Title))
            {
                return(false);
            }
            string CleanTitle = TextCleaner.FullClean(Title);

            switch (Language)
            {
            default:
            case Enums.Language.CHINESE:
                //中文：标题长度够长，且数字字符占比不超
                return((MinLenTitle <= 0 || CleanTitle.Length >= MinLenTitle) &&
                       (MaxRateTitleDigits >= 1 || CleanTitle.Length * MaxRateTitleDigits > TextCleaner.CountDigitChars(CleanTitle)));

            case Enums.Language.ENGLISH:
                //英文：标题单词够多，且数字字符占比不超
                return(MinWordCountTitle <= 0 || CleanTitle.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).Length > MinWordCountTitle &&
                       (MaxRateTitleDigits >= 1 || CleanTitle.Length * MaxRateTitleDigits > TextCleaner.CountDigitChars(CleanTitle)));
            }
        }

示例#30

0

显示文件

文件： NGramBuilder.cs 项目： starship-consulting/Starship.Language

        private NGramItem GetNGrams(string line)
        {
            var segments  = TextCleaner.CleanSplit(" ", line);
            var frequency = long.Parse(segments[0]);

            if (English.IllegalTokens.Any(line.Contains))
            {
                return(null);
            }

            if (frequency < MinimumFrequency)
            {
                return(null);
            }

            var words = segments.Skip(1).ToArray();

            return(new NGramItem {
                Text = string.Join(" ", words),
                Frequency = frequency,
                Words = words
            });
        }

C# (CSharp) TextCleaner示例