Esempio n. 1
0
 public DictionaryMetaObject(string DictName, string DictDescript, string DictPrefix, string DictContents)
 {
     DictionaryName           = DictName;
     DictionaryDescription    = DictDescript;
     DictionaryRawText        = DictContents;
     DictionaryCategoryPrefix = DictPrefix;
     UseDictionary            = true;
     DictData = new DictionaryData();
 }
        public DictionaryData ParseDict(DictionaryMetaObject DictionaryToParse)
        {
            DictionaryData DictData = DictionaryToParse.DictData;

            //  ____                   _       _         ____  _      _   ____        _           ___  _     _           _
            // |  _ \ ___  _ __  _   _| | __ _| |_ ___  |  _ \(_) ___| |_|  _ \  __ _| |_ __ _   / _ \| |__ (_) ___  ___| |_
            // | |_) / _ \| '_ \| | | | |/ _` | __/ _ \ | | | | |/ __| __| | | |/ _` | __/ _` | | | | | '_ \| |/ _ \/ __| __|
            // |  __/ (_) | |_) | |_| | | (_| | ||  __/ | |_| | | (__| |_| |_| | (_| | || (_| | | |_| | |_) | |  __/ (__| |_
            // |_|   \___/| .__/ \__,_|_|\__,_|\__\___| |____/|_|\___|\__|____/ \__,_|\__\__,_|  \___/|_.__// |\___|\___|\__|
            //            |_|                                                                             |__/



            //parse out the the dictionary file
            DictData.MaxWords = 0;

            //yeah, there's levels to this thing
            DictData.FullDictionary = new Dictionary <string, Dictionary <int, Dictionary <string, string[]> > >();

            DictData.FullDictionary.Add("Wildcards", new Dictionary <int, Dictionary <string, string[]> >());
            DictData.FullDictionary.Add("Standards", new Dictionary <int, Dictionary <string, string[]> >());

            DictData.WildCardArrays       = new Dictionary <int, string[]>();
            DictData.PrecompiledWildcards = new Dictionary <string, Regex>();

            List <string> AllEntriesInFile = new List <string>();


            Dictionary <int, List <string> > WildCardLists = new Dictionary <int, List <string> >();


            string[] DicSplit = DictionaryToParse.DictionaryRawText.Split(new char[] { '%' }, 3, StringSplitOptions.None);

            string[] HeaderLines = DicSplit[1].Split(new[] { "\r\n", "\r", "\n" }, StringSplitOptions.RemoveEmptyEntries);
            string[] EntryLines  = DicSplit[2].Split(new[] { "\r\n", "\r", "\n" }, StringSplitOptions.RemoveEmptyEntries);

            DictData.NumCats = HeaderLines.Length;

            //now that we know the number of categories, we can fill out the arrays
            DictData.CatNames  = new string[DictData.NumCats];
            DictData.CatValues = new string[DictData.NumCats];


            //Map Out the Categories
            for (int i = 0; i < DictData.NumCats; i++)
            {
                string[] HeaderRow = HeaderLines[i].Trim().Split(new char[] { '\t' }, 2);

                DictData.CatValues[i] = HeaderRow[0];
                DictData.CatNames[i]  = HeaderRow[1];
            }


            //Map out the dictionary entries
            for (int i = 0; i < EntryLines.Length; i++)
            {
                string EntryLine = EntryLines[i].Trim();
                while (EntryLine.Contains("  "))
                {
                    EntryLine.Replace("  ", " ");
                }

                string[] EntryRow = EntryLine.Trim().Split(new char[] { '\t' }, StringSplitOptions.RemoveEmptyEntries);

                if (EntryRow.Length > 1 && !String.IsNullOrWhiteSpace(EntryRow[0]))
                {
                    int Words_In_Entry = EntryRow[0].Split(' ').Length;
                    if (Words_In_Entry > DictData.MaxWords)
                    {
                        DictData.MaxWords = Words_In_Entry;
                    }

                    //this is something special added to this version of the "parse dictionary" method
                    //allows us to keep track of each entry within the dictionary file
                    AllEntriesInFile.Add(EntryRow[0].ToLower());


                    if (EntryRow[0].Contains("*"))
                    {
                        if (DictData.FullDictionary["Wildcards"].ContainsKey(Words_In_Entry))
                        {
                            if (!DictData.FullDictionary["Wildcards"][Words_In_Entry].ContainsKey(EntryRow[0].ToLower()))
                            {
                                DictData.FullDictionary["Wildcards"][Words_In_Entry].Add(EntryRow[0].ToLower(), EntryRow.Skip(1).ToArray());
                                WildCardLists[Words_In_Entry].Add(EntryRow[0].ToLower());
                                DictData.PrecompiledWildcards.Add(EntryRow[0].ToLower(), new Regex("^" + Regex.Escape(EntryRow[0].ToLower()).Replace("\\*", ".*"), RegexOptions.Compiled));
                            }
                        }
                        else
                        {
                            DictData.FullDictionary["Wildcards"].Add(Words_In_Entry, new Dictionary <string, string[]> {
                                { EntryRow[0].ToLower(), EntryRow.Skip(1).ToArray() }
                            });
                            WildCardLists.Add(Words_In_Entry, new List <string>(new string[] { EntryRow[0].ToLower() }));
                            DictData.PrecompiledWildcards.Add(EntryRow[0].ToLower(), new Regex("^" + Regex.Escape(EntryRow[0].ToLower()).Replace("\\*", ".*"), RegexOptions.Compiled));
                        }
                    }
                    else
                    {
                        if (DictData.FullDictionary["Standards"].ContainsKey(Words_In_Entry))
                        {
                            if (!DictData.FullDictionary["Standards"][Words_In_Entry].ContainsKey(EntryRow[0].ToLower()))
                            {
                                DictData.FullDictionary["Standards"][Words_In_Entry].Add(EntryRow[0].ToLower(), EntryRow.Skip(1).ToArray());
                            }
                        }
                        else
                        {
                            DictData.FullDictionary["Standards"].Add(Words_In_Entry, new Dictionary <string, string[]> {
                                { EntryRow[0].ToLower(), EntryRow.Skip(1).ToArray() }
                            });
                        }
                    }
                }
            }


            for (int i = DictData.MaxWords; i > 0; i--)
            {
                if (WildCardLists.ContainsKey(i))
                {
                    DictData.WildCardArrays.Add(i, WildCardLists[i].ToArray());
                }
            }
            WildCardLists.Clear();
            DictData.DictionaryLoaded = true;
            DictData.AllEntries       = AllEntriesInFile;
            DictData.AllEntriesArray  = AllEntriesInFile.ToArray();

            //MessageBox.Show("Your dictionary has been successfully loaded.", "Success", MessageBoxButtons.OK, MessageBoxIcon.Information);

            return(DictData);
        }
        private Dictionary <string, ulong[]> AnalyzeText(DictionaryData DictData, string[] Words)
        {
            //this matches the EntryFreqTracker dictionary from the main part of the plugin
            //we'll build this up here, then aggregate it back into the main version once we return
            //the results from this method
            Dictionary <string, ulong[]> WordsCaptured_Raw = new Dictionary <string, ulong[]>();



            int TotalStringLength = Words.Length;

            Dictionary <string, int> DictionaryResults = new Dictionary <string, int>();

            for (int i = 0; i < DictData.NumCats; i++)
            {
                DictionaryResults.Add(DictData.CatValues[i], 0);
            }

            for (int i = 0; i < TotalStringLength; i++)
            {
                //iterate over n-grams, starting with the largest possible n-gram (derived from the user's dictionary file)
                for (int NumberOfWords = DictData.MaxWords; NumberOfWords > 0; NumberOfWords--)
                {
                    //make sure that we don't overextend past the array
                    if (i + NumberOfWords - 1 >= TotalStringLength)
                    {
                        continue;
                    }

                    //make the target string

                    string TargetString;

                    if (NumberOfWords > 1)
                    {
                        TargetString = String.Join(" ", Words.Skip(i).Take(NumberOfWords).ToArray());
                    }
                    else
                    {
                        TargetString = Words[i];
                    }


                    //look for an exact match

                    if (DictData.FullDictionary["Standards"].ContainsKey(NumberOfWords))
                    {
                        //this is what we do when a word is captured
                        if (DictData.FullDictionary["Standards"][NumberOfWords].ContainsKey(TargetString))
                        {
                            //make sure that the word is contained in our tracking dictionary
                            if (!WordsCaptured_Raw.ContainsKey(TargetString))
                            {
                                WordsCaptured_Raw.Add(TargetString, new ulong[UserLoadedDictionary.DictData.NumCats]);
                            }

                            //we iterate over each category that the word belongs to, and we increment it accordingly
                            for (int j = 0; j < DictData.FullDictionary["Standards"][NumberOfWords][TargetString].Length; j++)
                            {
                                int CategoryOutputPosition = OutputDataMap[DictData.FullDictionary["Standards"][NumberOfWords][TargetString][j]];
                                //right now I'm just adding 1, but later
                                //i might revisit and add "NumberOfWords" instead
                                //there's no objectively right answer
                                WordsCaptured_Raw[TargetString][CategoryOutputPosition] += 1;
                            }

                            //manually increment the for loop so that we're not testing on words that have already been picked up
                            i += NumberOfWords - 1;
                            //break out of the lower level for loop back to moving on to new words altogether
                            break;
                        }
                    }



                    //if there isn't an exact match, we have to go through the wildcards
                    if (DictData.WildCardArrays.ContainsKey(NumberOfWords))
                    {
                        for (int j = 0; j < DictData.WildCardArrays[NumberOfWords].Length; j++)
                        {
                            if (DictData.PrecompiledWildcards[DictData.WildCardArrays[NumberOfWords][j]].Matches(TargetString).Count > 0)
                            {
                                //make sure that the word is contained in our tracking dictionary
                                if (!WordsCaptured_Raw.ContainsKey(DictData.WildCardArrays[NumberOfWords][j]))
                                {
                                    WordsCaptured_Raw.Add(DictData.WildCardArrays[NumberOfWords][j], new ulong[UserLoadedDictionary.DictData.NumCats]);
                                }

                                for (int k = 0; k < DictData.FullDictionary["Wildcards"][NumberOfWords][DictData.WildCardArrays[NumberOfWords][j]].Length; k++)
                                {
                                    //if (DictionaryResults.ContainsKey(DictData.FullDictionary["Wildcards"][NumberOfWords][DictData.WildCardArrays[NumberOfWords][j]][k])) DictionaryResults[DictData.FullDictionary["Wildcards"][NumberOfWords][DictData.WildCardArrays[NumberOfWords][j]][k]] += NumberOfWords;
                                    //we iterate over each category that the word belongs to, and we increment it accordingly

                                    int CategoryOutputPosition = OutputDataMap[DictData.FullDictionary["Wildcards"][NumberOfWords][DictData.WildCardArrays[NumberOfWords][j]][k]];
                                    //right now I'm just adding 1, but later
                                    //i might revisit and add "NumberOfWords" instead
                                    //there's no objectively right answer
                                    WordsCaptured_Raw[DictData.WildCardArrays[NumberOfWords][j]][CategoryOutputPosition] += 1;
                                }
                                //manually increment the for loop so that we're not testing on words that have already been picked up
                                i += NumberOfWords - 1;
                                //break out of the lower level for loop back to moving on to new words altogether
                                break;
                            }
                        }
                    }
                }
            }


            return(WordsCaptured_Raw);
        }