Пример #1
0
        public void Match_TwoWordsTextMatchesPattern()
        {
            var text    = "buch katze";
            var pattern = "23";

            Assert.IsTrue(WordPattern.TextMatchesPattern(text, pattern));
        }
Пример #2
0
        public void Given_string_same_When_match_Then_return_false()
        {
            var pattern = "abba";
            var str     = "dog dog dog dog";

            Assert.AreEqual(false, WordPattern.FindWordPattern(pattern, str));
        }
Пример #3
0
 public Dictionary <string, int> CountWords()
 {
     return(WordPattern.Matches(_text).Cast <Match>()
            .Select(m => m.Value.ToLower())
            .GroupBy(word => word)
            .ToDictionary(g => g.Key, g => g.Count()));
 }
Пример #4
0
        public void Given_pattern_str_When_match_Then_return()
        {
            var pattern = "abba";
            var str     = "dog cat cat dog";

            Assert.AreEqual(true, WordPattern.FindWordPattern(pattern, str));
        }
Пример #5
0
        public void DoesntMatch_PatternLongerThenText()
        {
            var text    = "buch hund";
            var pattern = "123";

            Assert.IsFalse(WordPattern.TextMatchesPattern(text, pattern));
        }
Пример #6
0
        public void DoesntMatch_TextLongerThenPattern()
        {
            var text    = "buch hund Katze";
            var pattern = "12";

            Assert.IsFalse(WordPattern.TextMatchesPattern(text, pattern));
        }
Пример #7
0
        public void DoesntMatch_TwoWordsTextMatchesPattern()
        {
            var text    = "buch hund";
            var pattern = "22";

            Assert.IsFalse(WordPattern.TextMatchesPattern(text, pattern));
        }
Пример #8
0
        private string ElementMatchHandler(Definition definition, Match match)
        {
            if (definition == null)
            {
                throw new ArgumentNullException(nameof(definition));
            }

            if (match == null)
            {
                throw new ArgumentNullException(nameof(match));
            }

            var pattern = definition.Patterns.First
                          (
                x => match.Groups[x.Key]
                .Success
                          )
                          .Value;

            return(pattern switch
            {
                BlockPattern blockPattern => ProcessBlockPatternMatch(definition, blockPattern, match),
                MarkupPattern markupPattern => ProcessMarkupPatternMatch(definition, markupPattern, match),
                WordPattern wordPattern => ProcessWordPatternMatch(definition, wordPattern, match),
                _ => match.Value,
            });
Пример #9
0
        public void Match_OneWordTextMatchesPattern()
        {
            var text    = "buch";
            var pattern = "5";

            Assert.IsTrue(WordPattern.TextMatchesPattern(text, pattern));
        }
Пример #10
0
		static void Main(string[] args)
		{
			WordPattern a = new WordPattern();
			Console.Out.WriteLine(a.countWords("ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJ"));
			Console.In.ReadLine();
			//
			// TODO: Add code to start application here
			//
		}
Пример #11
0
 // END CUT HERE
 // BEGIN CUT HERE
 public static void Main()
 {
     try {
     WordPattern ___test = new WordPattern();
     ___test.run_test(-1);
     } catch(Exception e) {
     //Console.WriteLine(e.StackTrace);
     Console.WriteLine(e.ToString());
     }
 }
Пример #12
0
// END CUT HERE
// BEGIN CUT HERE
    public static void Main()
    {
        try {
            WordPattern ___test = new WordPattern();
            ___test.run_test(-1);
        } catch (Exception e) {
//Console.WriteLine(e.StackTrace);
            Console.WriteLine(e.ToString());
        }
    }
Пример #13
0
        // recursive function that progressively increases the depth if we get a too high word count
        // but maximum 3 permutations (abc???)
        private void ReadWordsByWordPermutationsRecursive(IWebDriver driver, WordPattern wordPattern, WordHintDbContext db, User adminUser)
        {
            // get word count
            var(wordCount, node, url, page) = GetWordCountByPattern(driver, wordPattern);

            if (wordCount == 0)
            {
                return;
            }
            else
            {
                Log.Information("Found {0} words when searching for '{1}' on page {2}", wordCount, wordPattern.Pattern, page + 1);
                writer.WriteLine("Found {0} words when searching for '{1}' on page {2}", wordCount, wordPattern.Pattern, page + 1);
            }

            // if we get too many words back, try to increase the pattern depth
            // but maximum 4 levels
            if (wordCount <= 108 || wordPattern.Depth > 3)
            {
                if (wordPattern.Depth > 3)
                {
                    Log.Error("Warning! Pattern search depth is now {0}. Found {1} words when searching for '{2}' on page {3}", wordPattern.Depth, wordCount, wordPattern.Pattern, page + 1);
                }

                // process each word found using the specified word pattern
                ProcessWordsUntilEmpty(wordPattern, driver, db, adminUser, page, node, url);
            }
            else
            {
                // increment pattern
                var childPatterns = wordPattern.GetWordPatternChildren();

                // recursively process children patterns
                foreach (var childPattern in childPatterns)
                {
                    if (hasMissedLastWord)
                    {
                        return;
                    }

                    // if we have a last word - make sure to skip until the pattern matches
                    if (!hasFoundLastWord && childPattern.LastWord != null && !childPattern.IsMatchLastWord)
                    {
                        // skip pattern
                        Log.Information("Skipping pattern '{0}'.", childPattern.Pattern);
                    }
                    else
                    {
                        Log.Information("Processing pattern '{0}'.", childPattern.Pattern);
                        ReadWordsByWordPermutationsRecursive(driver, childPattern, db, adminUser);
                    }
                }
            }
        }
Пример #14
0
        protected override string ProcessWordPatternMatch(Definition definition, WordPattern pattern, Match match)
        {
            if (!UseCss)
            {
                var patternStyle = HtmlEngineHelper.CreatePatternStyle(pattern.Style);

                return(string.Format(STYLE_SPAN_FORMAT, patternStyle, match.Value));
            }

            var cssClassName = HtmlEngineHelper.CreateCssClassName(definition.Name, pattern.Name);

            return(string.Format(CLASS_SPAN_FORMAT, cssClassName, match.Value));
        }
Пример #15
0
        public override void Save()
        {
            if (!string.IsNullOrEmpty(SkipPattern))
            {
                try { new Regex(SkipPattern, RegexOptions.IgnorePatternWhitespace); }
                catch (ArgumentException ex) { throw new ModuleException("Invalid skip pattern: " + ex.Message); }
            }

            if (WordPattern == null || WordPattern.Trim().Length == 0)
            {
                throw new ModuleException("Empty word pattern is invalid.");
            }

            try { new Regex(WordPattern, RegexOptions.IgnorePatternWhitespace); }
            catch (ArgumentException ex) { throw new ModuleException("Invalid word pattern: " + ex.Message); }

            base.Save();
        }
Пример #16
0
        public void Generate(MemoryStream ms, params object[] param)
        {
            //制作生成模板文件需要的实例
            var obj = GetObjInstance(param);

            var patternFilePath = GetPatternFilePath();

            //配置文件生成参数
            var setting = new Setting();

            setting.PatternTextStyle.SignWithColor = true;
            setting.PatternTextStyle.ForecolorName = System.Drawing.Color.Red.Name;

            //文件生成
            var wordPattern = new WordPattern(setting);

            if (!wordPattern.Generate(patternFilePath, obj, ms))
            {
                throw new ApplicationException("Generate file failed.");
            }
        }
Пример #17
0
        /// <summary>
        /// Give a pattern for the unknown word with known letters filled in and the UnknownToken used for missing letters
        /// </summary>
        /// <param name="pattern"></param>
        public UnknownWord(string pattern)
        {
            if (pattern == null)
            {
                throw new ArgumentNullException(nameof(pattern));
            }

            if (string.IsNullOrWhiteSpace(pattern))
            {
                throw new ArgumentException($"{nameof(pattern)} cannot be only whitespace");
            }

            pattern = pattern.Trim();
            if (pattern.Any(char.IsWhiteSpace))
            {
                throw new ArgumentException($"The word pattern had some unexpected white space in it (ie it is several words): {pattern}");
            }

            WordPattern      = pattern.ToLower();
            _newWordTemplate = WordPattern.ToCharArray();
            UnknownCharCount = WordPattern.Count(c => c == UnknownToken);
        }
Пример #18
0
        protected override Span[] ProcessWordPatternMatch(Definition definition, WordPattern pattern, Match match)
        {
            if (definition == null)
            {
                throw new ArgumentNullException(nameof(definition));
            }

            if (pattern == null)
            {
                throw new ArgumentNullException(nameof(pattern));
            }

            if (match == null)
            {
                throw new ArgumentNullException(nameof(match));
            }

            return(new[] { new Span()
                           {
                               Text = match.Value,
                               Style = GetSpanStyle(pattern.Style)
                           } });
        }
Пример #19
0
        private void ParseText_DoWork(object sender, DoWorkEventArgs e)
        {
            Lexicon.Clear();
            var expElementPattern = new Regex($"({PunctuationPattern.ToString()})|({WordPattern.ToString()})");
            var whiteSpacePattern = new Regex(@"[\s\n\r]+", RegexOptions.Singleline | RegexOptions.Multiline);

            var    worker   = sender as BackgroundWorker;
            var    text     = e.Argument as string;
            int    progress = 0;
            string state    = string.Empty;

            // TODO: handling of paragraph breaks and section headers, etc
            foreach (Match p in ParagraphPattern.Matches(text))
            {
                string paragraphText = p.Value.Trim();
                paragraphText = whiteSpacePattern.Replace(paragraphText, " ");
                var paragraph = new Lx.Discourse();
                Text.Discourse.AddLast(paragraph);

                foreach (Match l in LinePattern.Matches(paragraphText))
                {
                    //store line, section up into words and punctuation
                    string cleanedLine = l.Value.Trim();
                    //cleanedLine = whiteSpacePattern.Replace(cleanedLine, " ");
                    state = cleanedLine;

                    var expression = new Lx.Expression(cleanedLine);
                    paragraph.Expressions.AddLast(expression);

                    foreach (Match m in expElementPattern.Matches(expression.Graph))
                    {
                        if (m.Groups.Count > 0)
                        {
                            // string m => List<Glyphs>
                            var glyphs = Script.AddGlyphs(m.Value.ToCharArray());

                            // List<Glyph> => List<Grapheme>
                            // Pre-analysis, graphemes are 1:1 with glyphs
                            var graphemes = Orthography.AddGraphemes(glyphs);

                            // List<Grapheme> => Morpheme
                            if (string.IsNullOrEmpty(m.Groups[1].Value))
                            {
                                //var morph = Text.Lexicon.Add(m.Groups[2].Value);
                                //morph.GraphemeChain.Add(Lx.SegmentChain<Lx.Grapheme>.NewSegmentChain(graphemes));
                                Lx.Morpheme morph = Text.Lexicon.Add(graphemes);
                                expression.Sequence.AddLast(morph);
                            }
                            else
                            {
                                expression.Sequence.AddLast(Text.Paralexicon.Add(m.Groups[1].Value));
                            }
                        }
                    }

                    worker.ReportProgress(++progress, state);
                }
            }

            UpdateLocalLexicon();
        }
Пример #20
0
        protected override string ProcessWordPatternMatch(Definition definition, WordPattern pattern, Match match)
        {
            var style = CreateRtfPatternStyle(pattern.Style.Colors.ForeColor, pattern.Style.Colors.BackColor, pattern.Style.Font);

            return("{" + String.Format(RtfFormat, style, match.Value) + "}");
        }
Пример #21
0
 protected abstract string ProcessWordPatternMatch(Definition definition, WordPattern pattern, Match match);
Пример #22
0
 public void BeforeEach()
 {
     WordPattern = new WordPattern();
 }
Пример #23
0
 protected override string ProcessWordPatternMatch(Definition definition, WordPattern pattern, Match match)
 {
     HighlightedSpans.Add(new FormattedString(match.Value, pattern.Style.Colors.ForeColor, pattern.Style.Font.IsBold, pattern.Style.Font.IsItalic));
     return(match.Value);
 }
Пример #24
0
        private Tuple <int, HtmlNode, string, int> GetWordCountByPattern(IWebDriver driver, WordPattern wordPattern)
        {
            // go to search result page
            var    query = "";
            int    page  = 0;
            string url   = string.Format("{0}?a={1}&b={2}&p={3}", "https://www.kryssord.org/search.php", query, wordPattern.Pattern, page);

            var(count, node) = GetWordCountByWordPattern(driver, url);
            return(new Tuple <int, HtmlNode, string, int>(count, node, url, page));
        }
Пример #25
0
        private void ProcessWordsUntilEmpty(WordPattern wordPattern, IWebDriver driver, WordHintDbContext db, User adminUser, int page, HtmlNode documentNode, string url)
        {
            while (true)
            {
                Log.Information("Processing pattern search for '{0}' on page {1}", wordPattern.Pattern, page + 1);
                writer.WriteLine("Processing pattern search for '{0}' on page {1}", wordPattern.Pattern, page + 1);

                // parse all words
                var words = ReadWordsAgilityPack(documentNode, adminUser);

                foreach (var word in words)
                {
                    if (wordPattern.IsMatchLastWord)
                    {
                        Log.Information("The current pattern matches the last-word: {0} = {1}. Current word: {2}", wordPattern.Pattern, wordPattern.LastWord, word.Value);
                        hasFoundPattern = true;

                        var wordRemoveDiacriticsToNorwegian = word.Value.RemoveDiacriticsToNorwegian();

                        // we might have had to add question marks at the end of the string to fix the length bug at the site
                        if (wordRemoveDiacriticsToNorwegian == wordPattern.LastWord.TrimEnd('?'))
                        {
                            Log.Information("The current word matches the last-word: {0} = {1}", word.Value, wordPattern.LastWord);
                            hasFoundLastWord = true;
                        }
                    }
                    else
                    {
                        if (!hasFoundLastWord && hasFoundPattern)
                        {
                            // if the pattern not any longer match, we never found the word - has it been deleted?
                            Log.Error("Warning! The current pattern does not any longer match the last-word: {0} = {1}. Current word: {2}", wordPattern.Pattern, wordPattern.LastWord, word.Value);
                            writer.WriteLine("Warning! The current pattern does not any longer match the last-word: {0} = {1}. Current word: {2}", wordPattern.Pattern, wordPattern.LastWord, word.Value);
                            hasMissedLastWord = true;
                            return;
                        }
                    }

                    if (hasFoundLastWord)
                    {
                        string currentValue = word.Value;

                        // check if this is one of the buggy words from their site where the words found don't have the same length as the pattern says it should have
                        if (wordPattern.Length != word.Value.Length)
                        {
                            Log.Error("Warning! The current word doesn't match the length of the query pattern: {0} = {1}", word.Value, wordPattern.Pattern);
                            writer.WriteLine("Warning! The current word doesn't match the length of the query pattern: {0} = {1}", word.Value, wordPattern.Pattern);

                            if (wordPattern.Length > word.Value.Length)
                            {
                                currentValue = currentValue + new string('?', wordPattern.Length - word.Value.Length);
                            }
                            else
                            {
                                currentValue = currentValue.Substring(0, wordPattern.Length);
                            }
                        }
                        else
                        {
                            // everything is OK
                        }

                        // update that we are processing this word, ignore length and comment
                        WordDatabaseService.UpdateState(db, source, new Word()
                        {
                            Value = currentValue.ToUpper(), Source = source, CreatedDate = DateTime.Now
                        }, writer);

                        GetWordSynonyms(word, driver, db, adminUser);
                    }
                }

                // go to next page if exist
                var(hasFoundNextPage, pageNumber, pageUrl, pageNode) = NavigateToNextPageIfExist(driver, documentNode);
                if (hasFoundNextPage)
                {
                    url          = pageUrl;
                    page         = pageNumber;
                    documentNode = pageNode;
                }
                else
                {
                    break;
                }
            }
        }
Пример #26
0
 protected override string ProcessWordPatternMatch(Definition definition, WordPattern pattern, Match match)
 {
     return(ProcessPatternMatch(pattern, match));
 }
Пример #27
0
        private List <List <List <string> > > AnalysePatterns(string[] blocks)
        {
            // Find occurance indeces for each word
            Dictionary <string, List <int> > histogram = new Dictionary <string, List <int> >();

            for (int blockIndex = 0; blockIndex < blocks.Length; blockIndex++)
            {
                string   blockText  = blocks[blockIndex];
                string[] blockParts = blockText.Split(' ');
                foreach (string blockPart in blockParts)
                {
                    if (!histogram.ContainsKey(blockPart))
                    {
                        histogram.Add(blockPart, new List <int>());
                    }
                    histogram[blockPart].Add(blockIndex);
                }
            }

            /*
             * Count length of lists => list length that occures the most will be the one we're looking for
             * Dictionary
             *    Key => length of word occurence list
             *    Value
             *        - times this list length was found
             *        - All words that have this length in occurences
             */
            var metaHistogram = new Dictionary <int, WordPattern>();

            foreach (KeyValuePair <string, List <int> > wordOccurence in histogram)
            {
                if (!metaHistogram.ContainsKey(wordOccurence.Value.Count))
                {
                    metaHistogram.Add(wordOccurence.Value.Count, new WordPattern(0, new List <string>()));
                }
                metaHistogram[wordOccurence.Value.Count].NumOfOccurences++;
                metaHistogram[wordOccurence.Value.Count].Words.Add(wordOccurence.Key);
            }

            // Remove lists that only occur once
            var keys = metaHistogram.Keys.ToArray();

            for (var i = keys.Length - 1; i >= 0; i--)
            {
                if (keys[i] == 1)
                {
                    metaHistogram.Remove(keys[i]);
                }
            }

            // Remove lists that span more than 100 wordblocks
            keys = metaHistogram.Keys.ToArray();
            for (var i = keys.Length - 1; i >= 0; i--)
            {
                List <string> words          = metaHistogram[keys[i]].Words;
                int           firstWordIndex = histogram[words[0]][0];
                int           lastWordIndex  = histogram[words[words.Count - 1]][0];
                if (lastWordIndex - firstWordIndex > 100)
                {
                    metaHistogram.Remove(keys[i]);
                }
            }

            List <KeyValuePair <int, WordPattern> > metaHistogramSorted = (from metaHistoryEntry in metaHistogram orderby metaHistoryEntry.Key * metaHistoryEntry.Value.NumOfOccurences descending select metaHistoryEntry).ToList();

            List <List <List <string> > > AllWordBlocks = new List <List <List <string> > >();

            foreach (KeyValuePair <int, WordPattern> metaHistoryEntry in metaHistogramSorted)
            {
                // Get the blocks for all repetitive patterns that were found in the pdf
                WordPattern        wordPattern = metaHistoryEntry.Value;
                List <List <int> > wordInces   = new List <List <int> >();
                foreach (string word in wordPattern.Words)
                {
                    wordInces.Add(histogram[word]);
                }

                // Run over first word in best pattern series
                List <List <string> > wordBlocks = new List <List <string> >();
                for (int patternIndex = 0; patternIndex < wordInces[0].Count; patternIndex++)
                {
                    List <string> wordBlock = new List <string>();
                    // Run over each text block between the first best word idex and the last best word index
                    int firstBlockIndex = wordInces[0][patternIndex];
                    int lastBlockIndex  = wordInces[wordInces.Count - 1][patternIndex];

                    for (int wordBlockIndex = firstBlockIndex; wordBlockIndex < lastBlockIndex; wordBlockIndex++)
                    {
                        wordBlock.Add(blocks[wordBlockIndex]);
                    }
                    if (wordBlock.Count > 0)
                    {
                        wordBlocks.Add(wordBlock);
                    }
                }
                if (wordBlocks.Count > 0)
                {
                    AllWordBlocks.Add(wordBlocks);
                }
            }
            return(AllWordBlocks);
        }
Пример #28
0
        public static int GetWordCount(string content)
        {
            var wordsMatches = WordPattern.Matches(content);

            return(wordsMatches.Count);
        }
Пример #29
0
 public void IsPatternMatch_EmptyStringPattern_ReturnsTrue()
 {
     Assert.IsTrue(WordPattern.IsPatternMatch("", ""));
 }
Пример #30
0
 public void IsPatternMatch_MatchingPattern_ReturnsTrue()
 {
     Assert.IsTrue(WordPattern.IsPatternMatch("abba", "dog cat cat dog"));
 }
Пример #31
0
 public void IsPatternMatch_NoPattern_ReturnsFalse()
 {
     Assert.IsFalse(WordPattern.IsPatternMatch("abba", "dog cat dog cat"));
 }