public void Match_TwoWordsTextMatchesPattern() { var text = "buch katze"; var pattern = "23"; Assert.IsTrue(WordPattern.TextMatchesPattern(text, pattern)); }
public void Given_string_same_When_match_Then_return_false() { var pattern = "abba"; var str = "dog dog dog dog"; Assert.AreEqual(false, WordPattern.FindWordPattern(pattern, str)); }
public Dictionary <string, int> CountWords() { return(WordPattern.Matches(_text).Cast <Match>() .Select(m => m.Value.ToLower()) .GroupBy(word => word) .ToDictionary(g => g.Key, g => g.Count())); }
public void Given_pattern_str_When_match_Then_return() { var pattern = "abba"; var str = "dog cat cat dog"; Assert.AreEqual(true, WordPattern.FindWordPattern(pattern, str)); }
public void DoesntMatch_PatternLongerThenText() { var text = "buch hund"; var pattern = "123"; Assert.IsFalse(WordPattern.TextMatchesPattern(text, pattern)); }
public void DoesntMatch_TextLongerThenPattern() { var text = "buch hund Katze"; var pattern = "12"; Assert.IsFalse(WordPattern.TextMatchesPattern(text, pattern)); }
public void DoesntMatch_TwoWordsTextMatchesPattern() { var text = "buch hund"; var pattern = "22"; Assert.IsFalse(WordPattern.TextMatchesPattern(text, pattern)); }
private string ElementMatchHandler(Definition definition, Match match) { if (definition == null) { throw new ArgumentNullException(nameof(definition)); } if (match == null) { throw new ArgumentNullException(nameof(match)); } var pattern = definition.Patterns.First ( x => match.Groups[x.Key] .Success ) .Value; return(pattern switch { BlockPattern blockPattern => ProcessBlockPatternMatch(definition, blockPattern, match), MarkupPattern markupPattern => ProcessMarkupPatternMatch(definition, markupPattern, match), WordPattern wordPattern => ProcessWordPatternMatch(definition, wordPattern, match), _ => match.Value, });
public void Match_OneWordTextMatchesPattern() { var text = "buch"; var pattern = "5"; Assert.IsTrue(WordPattern.TextMatchesPattern(text, pattern)); }
static void Main(string[] args) { WordPattern a = new WordPattern(); Console.Out.WriteLine(a.countWords("ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJ")); Console.In.ReadLine(); // // TODO: Add code to start application here // }
// END CUT HERE // BEGIN CUT HERE public static void Main() { try { WordPattern ___test = new WordPattern(); ___test.run_test(-1); } catch(Exception e) { //Console.WriteLine(e.StackTrace); Console.WriteLine(e.ToString()); } }
// END CUT HERE // BEGIN CUT HERE public static void Main() { try { WordPattern ___test = new WordPattern(); ___test.run_test(-1); } catch (Exception e) { //Console.WriteLine(e.StackTrace); Console.WriteLine(e.ToString()); } }
// recursive function that progressively increases the depth if we get a too high word count // but maximum 3 permutations (abc???) private void ReadWordsByWordPermutationsRecursive(IWebDriver driver, WordPattern wordPattern, WordHintDbContext db, User adminUser) { // get word count var(wordCount, node, url, page) = GetWordCountByPattern(driver, wordPattern); if (wordCount == 0) { return; } else { Log.Information("Found {0} words when searching for '{1}' on page {2}", wordCount, wordPattern.Pattern, page + 1); writer.WriteLine("Found {0} words when searching for '{1}' on page {2}", wordCount, wordPattern.Pattern, page + 1); } // if we get too many words back, try to increase the pattern depth // but maximum 4 levels if (wordCount <= 108 || wordPattern.Depth > 3) { if (wordPattern.Depth > 3) { Log.Error("Warning! Pattern search depth is now {0}. Found {1} words when searching for '{2}' on page {3}", wordPattern.Depth, wordCount, wordPattern.Pattern, page + 1); } // process each word found using the specified word pattern ProcessWordsUntilEmpty(wordPattern, driver, db, adminUser, page, node, url); } else { // increment pattern var childPatterns = wordPattern.GetWordPatternChildren(); // recursively process children patterns foreach (var childPattern in childPatterns) { if (hasMissedLastWord) { return; } // if we have a last word - make sure to skip until the pattern matches if (!hasFoundLastWord && childPattern.LastWord != null && !childPattern.IsMatchLastWord) { // skip pattern Log.Information("Skipping pattern '{0}'.", childPattern.Pattern); } else { Log.Information("Processing pattern '{0}'.", childPattern.Pattern); ReadWordsByWordPermutationsRecursive(driver, childPattern, db, adminUser); } } } }
protected override string ProcessWordPatternMatch(Definition definition, WordPattern pattern, Match match) { if (!UseCss) { var patternStyle = HtmlEngineHelper.CreatePatternStyle(pattern.Style); return(string.Format(STYLE_SPAN_FORMAT, patternStyle, match.Value)); } var cssClassName = HtmlEngineHelper.CreateCssClassName(definition.Name, pattern.Name); return(string.Format(CLASS_SPAN_FORMAT, cssClassName, match.Value)); }
public override void Save() { if (!string.IsNullOrEmpty(SkipPattern)) { try { new Regex(SkipPattern, RegexOptions.IgnorePatternWhitespace); } catch (ArgumentException ex) { throw new ModuleException("Invalid skip pattern: " + ex.Message); } } if (WordPattern == null || WordPattern.Trim().Length == 0) { throw new ModuleException("Empty word pattern is invalid."); } try { new Regex(WordPattern, RegexOptions.IgnorePatternWhitespace); } catch (ArgumentException ex) { throw new ModuleException("Invalid word pattern: " + ex.Message); } base.Save(); }
public void Generate(MemoryStream ms, params object[] param) { //制作生成模板文件需要的实例 var obj = GetObjInstance(param); var patternFilePath = GetPatternFilePath(); //配置文件生成参数 var setting = new Setting(); setting.PatternTextStyle.SignWithColor = true; setting.PatternTextStyle.ForecolorName = System.Drawing.Color.Red.Name; //文件生成 var wordPattern = new WordPattern(setting); if (!wordPattern.Generate(patternFilePath, obj, ms)) { throw new ApplicationException("Generate file failed."); } }
/// <summary> /// Give a pattern for the unknown word with known letters filled in and the UnknownToken used for missing letters /// </summary> /// <param name="pattern"></param> public UnknownWord(string pattern) { if (pattern == null) { throw new ArgumentNullException(nameof(pattern)); } if (string.IsNullOrWhiteSpace(pattern)) { throw new ArgumentException($"{nameof(pattern)} cannot be only whitespace"); } pattern = pattern.Trim(); if (pattern.Any(char.IsWhiteSpace)) { throw new ArgumentException($"The word pattern had some unexpected white space in it (ie it is several words): {pattern}"); } WordPattern = pattern.ToLower(); _newWordTemplate = WordPattern.ToCharArray(); UnknownCharCount = WordPattern.Count(c => c == UnknownToken); }
protected override Span[] ProcessWordPatternMatch(Definition definition, WordPattern pattern, Match match) { if (definition == null) { throw new ArgumentNullException(nameof(definition)); } if (pattern == null) { throw new ArgumentNullException(nameof(pattern)); } if (match == null) { throw new ArgumentNullException(nameof(match)); } return(new[] { new Span() { Text = match.Value, Style = GetSpanStyle(pattern.Style) } }); }
private void ParseText_DoWork(object sender, DoWorkEventArgs e) { Lexicon.Clear(); var expElementPattern = new Regex($"({PunctuationPattern.ToString()})|({WordPattern.ToString()})"); var whiteSpacePattern = new Regex(@"[\s\n\r]+", RegexOptions.Singleline | RegexOptions.Multiline); var worker = sender as BackgroundWorker; var text = e.Argument as string; int progress = 0; string state = string.Empty; // TODO: handling of paragraph breaks and section headers, etc foreach (Match p in ParagraphPattern.Matches(text)) { string paragraphText = p.Value.Trim(); paragraphText = whiteSpacePattern.Replace(paragraphText, " "); var paragraph = new Lx.Discourse(); Text.Discourse.AddLast(paragraph); foreach (Match l in LinePattern.Matches(paragraphText)) { //store line, section up into words and punctuation string cleanedLine = l.Value.Trim(); //cleanedLine = whiteSpacePattern.Replace(cleanedLine, " "); state = cleanedLine; var expression = new Lx.Expression(cleanedLine); paragraph.Expressions.AddLast(expression); foreach (Match m in expElementPattern.Matches(expression.Graph)) { if (m.Groups.Count > 0) { // string m => List<Glyphs> var glyphs = Script.AddGlyphs(m.Value.ToCharArray()); // List<Glyph> => List<Grapheme> // Pre-analysis, graphemes are 1:1 with glyphs var graphemes = Orthography.AddGraphemes(glyphs); // List<Grapheme> => Morpheme if (string.IsNullOrEmpty(m.Groups[1].Value)) { //var morph = Text.Lexicon.Add(m.Groups[2].Value); //morph.GraphemeChain.Add(Lx.SegmentChain<Lx.Grapheme>.NewSegmentChain(graphemes)); Lx.Morpheme morph = Text.Lexicon.Add(graphemes); expression.Sequence.AddLast(morph); } else { expression.Sequence.AddLast(Text.Paralexicon.Add(m.Groups[1].Value)); } } } worker.ReportProgress(++progress, state); } } UpdateLocalLexicon(); }
protected override string ProcessWordPatternMatch(Definition definition, WordPattern pattern, Match match) { var style = CreateRtfPatternStyle(pattern.Style.Colors.ForeColor, pattern.Style.Colors.BackColor, pattern.Style.Font); return("{" + String.Format(RtfFormat, style, match.Value) + "}"); }
protected abstract string ProcessWordPatternMatch(Definition definition, WordPattern pattern, Match match);
public void BeforeEach() { WordPattern = new WordPattern(); }
protected override string ProcessWordPatternMatch(Definition definition, WordPattern pattern, Match match) { HighlightedSpans.Add(new FormattedString(match.Value, pattern.Style.Colors.ForeColor, pattern.Style.Font.IsBold, pattern.Style.Font.IsItalic)); return(match.Value); }
private Tuple <int, HtmlNode, string, int> GetWordCountByPattern(IWebDriver driver, WordPattern wordPattern) { // go to search result page var query = ""; int page = 0; string url = string.Format("{0}?a={1}&b={2}&p={3}", "https://www.kryssord.org/search.php", query, wordPattern.Pattern, page); var(count, node) = GetWordCountByWordPattern(driver, url); return(new Tuple <int, HtmlNode, string, int>(count, node, url, page)); }
private void ProcessWordsUntilEmpty(WordPattern wordPattern, IWebDriver driver, WordHintDbContext db, User adminUser, int page, HtmlNode documentNode, string url) { while (true) { Log.Information("Processing pattern search for '{0}' on page {1}", wordPattern.Pattern, page + 1); writer.WriteLine("Processing pattern search for '{0}' on page {1}", wordPattern.Pattern, page + 1); // parse all words var words = ReadWordsAgilityPack(documentNode, adminUser); foreach (var word in words) { if (wordPattern.IsMatchLastWord) { Log.Information("The current pattern matches the last-word: {0} = {1}. Current word: {2}", wordPattern.Pattern, wordPattern.LastWord, word.Value); hasFoundPattern = true; var wordRemoveDiacriticsToNorwegian = word.Value.RemoveDiacriticsToNorwegian(); // we might have had to add question marks at the end of the string to fix the length bug at the site if (wordRemoveDiacriticsToNorwegian == wordPattern.LastWord.TrimEnd('?')) { Log.Information("The current word matches the last-word: {0} = {1}", word.Value, wordPattern.LastWord); hasFoundLastWord = true; } } else { if (!hasFoundLastWord && hasFoundPattern) { // if the pattern not any longer match, we never found the word - has it been deleted? Log.Error("Warning! The current pattern does not any longer match the last-word: {0} = {1}. Current word: {2}", wordPattern.Pattern, wordPattern.LastWord, word.Value); writer.WriteLine("Warning! The current pattern does not any longer match the last-word: {0} = {1}. Current word: {2}", wordPattern.Pattern, wordPattern.LastWord, word.Value); hasMissedLastWord = true; return; } } if (hasFoundLastWord) { string currentValue = word.Value; // check if this is one of the buggy words from their site where the words found don't have the same length as the pattern says it should have if (wordPattern.Length != word.Value.Length) { Log.Error("Warning! The current word doesn't match the length of the query pattern: {0} = {1}", word.Value, wordPattern.Pattern); writer.WriteLine("Warning! The current word doesn't match the length of the query pattern: {0} = {1}", word.Value, wordPattern.Pattern); if (wordPattern.Length > word.Value.Length) { currentValue = currentValue + new string('?', wordPattern.Length - word.Value.Length); } else { currentValue = currentValue.Substring(0, wordPattern.Length); } } else { // everything is OK } // update that we are processing this word, ignore length and comment WordDatabaseService.UpdateState(db, source, new Word() { Value = currentValue.ToUpper(), Source = source, CreatedDate = DateTime.Now }, writer); GetWordSynonyms(word, driver, db, adminUser); } } // go to next page if exist var(hasFoundNextPage, pageNumber, pageUrl, pageNode) = NavigateToNextPageIfExist(driver, documentNode); if (hasFoundNextPage) { url = pageUrl; page = pageNumber; documentNode = pageNode; } else { break; } } }
protected override string ProcessWordPatternMatch(Definition definition, WordPattern pattern, Match match) { return(ProcessPatternMatch(pattern, match)); }
private List <List <List <string> > > AnalysePatterns(string[] blocks) { // Find occurance indeces for each word Dictionary <string, List <int> > histogram = new Dictionary <string, List <int> >(); for (int blockIndex = 0; blockIndex < blocks.Length; blockIndex++) { string blockText = blocks[blockIndex]; string[] blockParts = blockText.Split(' '); foreach (string blockPart in blockParts) { if (!histogram.ContainsKey(blockPart)) { histogram.Add(blockPart, new List <int>()); } histogram[blockPart].Add(blockIndex); } } /* * Count length of lists => list length that occures the most will be the one we're looking for * Dictionary * Key => length of word occurence list * Value * - times this list length was found * - All words that have this length in occurences */ var metaHistogram = new Dictionary <int, WordPattern>(); foreach (KeyValuePair <string, List <int> > wordOccurence in histogram) { if (!metaHistogram.ContainsKey(wordOccurence.Value.Count)) { metaHistogram.Add(wordOccurence.Value.Count, new WordPattern(0, new List <string>())); } metaHistogram[wordOccurence.Value.Count].NumOfOccurences++; metaHistogram[wordOccurence.Value.Count].Words.Add(wordOccurence.Key); } // Remove lists that only occur once var keys = metaHistogram.Keys.ToArray(); for (var i = keys.Length - 1; i >= 0; i--) { if (keys[i] == 1) { metaHistogram.Remove(keys[i]); } } // Remove lists that span more than 100 wordblocks keys = metaHistogram.Keys.ToArray(); for (var i = keys.Length - 1; i >= 0; i--) { List <string> words = metaHistogram[keys[i]].Words; int firstWordIndex = histogram[words[0]][0]; int lastWordIndex = histogram[words[words.Count - 1]][0]; if (lastWordIndex - firstWordIndex > 100) { metaHistogram.Remove(keys[i]); } } List <KeyValuePair <int, WordPattern> > metaHistogramSorted = (from metaHistoryEntry in metaHistogram orderby metaHistoryEntry.Key * metaHistoryEntry.Value.NumOfOccurences descending select metaHistoryEntry).ToList(); List <List <List <string> > > AllWordBlocks = new List <List <List <string> > >(); foreach (KeyValuePair <int, WordPattern> metaHistoryEntry in metaHistogramSorted) { // Get the blocks for all repetitive patterns that were found in the pdf WordPattern wordPattern = metaHistoryEntry.Value; List <List <int> > wordInces = new List <List <int> >(); foreach (string word in wordPattern.Words) { wordInces.Add(histogram[word]); } // Run over first word in best pattern series List <List <string> > wordBlocks = new List <List <string> >(); for (int patternIndex = 0; patternIndex < wordInces[0].Count; patternIndex++) { List <string> wordBlock = new List <string>(); // Run over each text block between the first best word idex and the last best word index int firstBlockIndex = wordInces[0][patternIndex]; int lastBlockIndex = wordInces[wordInces.Count - 1][patternIndex]; for (int wordBlockIndex = firstBlockIndex; wordBlockIndex < lastBlockIndex; wordBlockIndex++) { wordBlock.Add(blocks[wordBlockIndex]); } if (wordBlock.Count > 0) { wordBlocks.Add(wordBlock); } } if (wordBlocks.Count > 0) { AllWordBlocks.Add(wordBlocks); } } return(AllWordBlocks); }
public static int GetWordCount(string content) { var wordsMatches = WordPattern.Matches(content); return(wordsMatches.Count); }
public void IsPatternMatch_EmptyStringPattern_ReturnsTrue() { Assert.IsTrue(WordPattern.IsPatternMatch("", "")); }
public void IsPatternMatch_MatchingPattern_ReturnsTrue() { Assert.IsTrue(WordPattern.IsPatternMatch("abba", "dog cat cat dog")); }
public void IsPatternMatch_NoPattern_ReturnsFalse() { Assert.IsFalse(WordPattern.IsPatternMatch("abba", "dog cat dog cat")); }