Esempio n. 1
0
        private void ToolStripButtonGoClick(object sender, EventArgs e)
        {
            IsRunning = true;

            string       path         = FolderTree.SelectedPath;
            Language     language     = ByLanguageFactory.GetLanguageFromString(toolStripComboBoxLanguage.Text);
            FileIterator fileIterator = ByLanguageFactory.GetFileIterator(language);

            IBlacklist   blacklist = ByLanguageFactory.GetBlacklist(language);
            IWordStemmer stemmer   = ByLanguageFactory.GetStemmer(language);

            SetCaptionText("Estimating ...");

            string[] files = fileIterator
                             .GetFiles(path)
                             .ToArray();

            ToolStripProgressBar.Maximum = files.Length;

            m_CloudControl.WeightedWords = new List <IWord>();

            //Note do not dispose m_CancelSource it will be disposed by task
            //TODO need to find correct way to work with CancelationToken
            //http://stackoverflow.com/questions/6960520/when-to-dispose-cancellationtokensource
            m_CancelSource = new CancellationTokenSource();
            Task.Factory
            .StartNew(
                () => GetWordsParallely(files, language, blacklist, stemmer), m_CancelSource.Token)
            .ContinueWith(
                ApplyResults);
        }
Esempio n. 2
0
        /// <summary>
        ///
        /// </summary>
        private void ShowBugReportSummaryWordCloud()
        {
            try
            {
                // Create the interop host control.
                System.Windows.Forms.Integration.WindowsFormsHost host =
                    new System.Windows.Forms.Integration.WindowsFormsHost();

                // Create the MaskedTextBox control.
                Gma.CodeCloud.Controls.CloudControl abc = new Gma.CodeCloud.Controls.CloudControl();

                System.Windows.Forms.ProgressBar abcd = new System.Windows.Forms.ProgressBar();

                //IBlacklist blacklist = ComponentFactory.CreateBlacklist(false);

                var currDir = System.Environment.GetFolderPath(System.Environment.SpecialFolder.ApplicationData);

                // Combine the base folder with your specific folder....
                string specificFolder = System.IO.Path.Combine(currDir, "MARC 3.0");

                // Check if folder exists and if not, create it
                if (!Directory.Exists(specificFolder))
                {
                    Directory.CreateDirectory(specificFolder);
                }

                IBlacklist blacklist = CommonBlacklist.CreateFromTextFile(specificFolder + "\\InputData\\stopwords_en.txt");


                var preProcessedList = ApplyStopwordsRemoval(Model.BugReportSummaryList);

                InputType            inputType = ComponentFactory.DetectInputType(String.Join(",", preProcessedList.ToArray()));
                IProgressIndicator   progress  = ComponentFactory.CreateProgressBar(inputType, abcd);
                IEnumerable <string> terms     = ComponentFactory.CreateExtractor(inputType, String.Join(",", preProcessedList.ToArray()), progress);
                IWordStemmer         stemmer   = ComponentFactory.CreateWordStemmer(false);

                IEnumerable <IWord> words = terms
                                            .Filter(blacklist)
                                            .CountOccurences();

                abc.WeightedWords =
                    words
                    .GroupByStem(stemmer)
                    .SortByOccurences()
                    .Cast <IWord>();

                // Assign the MaskedTextBox control as the host control's child.
                host.Child = abc;

                this.bugReportSummaryWordCloudGrid.Children.Add(host);
            }
            catch (Exception)
            {
                MessageBox.Show("Something went wrong in the word cloud engine.", "Unexpected Error", MessageBoxButton.OK, MessageBoxImage.Information);
            }
        }
Esempio n. 3
0
        public CachedWordStemmer(IWordStemmer backend)
        {
            this.backend = backend ?? throw new ArgumentNullException(nameof(backend));
            if (backend is CachedWordStemmer)
            {
                throw new ArgumentException("Nesting cached word stemmers is not allowed", nameof(backend));
            }

            cache = new ConcurrentDictionary <string, string>();
        }
Esempio n. 4
0
        /// <summary>
        ///
        /// </summary>
        private void ShowUserRequirementsSummaryWordCloud()
        {
            try
            {
                // Create the interop host control.
                System.Windows.Forms.Integration.WindowsFormsHost host =
                    new System.Windows.Forms.Integration.WindowsFormsHost();

                // Create the MaskedTextBox control.
                Gma.CodeCloud.Controls.CloudControl abc = new Gma.CodeCloud.Controls.CloudControl();

                System.Windows.Forms.ProgressBar abcd = new System.Windows.Forms.ProgressBar();

                IBlacklist blacklist = ComponentFactory.CreateBlacklist(false);
                //IBlacklist customBlacklist = CommonBlacklist.CreateFromTextFile(s_BlacklistTxtFileName);

                var preProcessedList = ApplyStopwordsRemoval(Model.UserRequirementsSummaryList);

                InputType            inputType = ComponentFactory.DetectInputType(String.Join(",", preProcessedList.ToArray()));
                IProgressIndicator   progress  = ComponentFactory.CreateProgressBar(inputType, abcd);
                IEnumerable <string> terms     = ComponentFactory.CreateExtractor(inputType, String.Join(",", preProcessedList.ToArray()), progress);
                IWordStemmer         stemmer   = ComponentFactory.CreateWordStemmer(false);

                IEnumerable <IWord> words = terms
                                            .Filter(blacklist)
                                            .CountOccurences();

                abc.WeightedWords =
                    words
                    .GroupByStem(stemmer)
                    .SortByOccurences()
                    .Cast <IWord>();

                // Assign the MaskedTextBox control as the host control's child.
                host.Child = abc;

                this.userRequirementsSummaryWordCloudGrid.Children.Add(host);
            }
            catch (Exception)
            {
                MessageBox.Show("Something went wrong in the word cloud engine.", "Unexpected Error", MessageBoxButton.OK, MessageBoxImage.Information);
            }
        }
Esempio n. 5
0
        public GenerateWordCloudResponse GenerateWordCloud(GenerateWordCloudArgs args)
        {
            InitializePanel(args);

            if (Uri.IsWellFormedUriString(args.Text, UriKind.Absolute))
            {
                args.Text = this.GetTextFromUrl(args.Text);
            }

            IEnumerable <string> customWordsToExclude = this.GetCustomWordsFromString(args.WordsToExclude);

            IBlacklist blacklist       = ComponentFactory.CreateBlacklist(args.ExcludeCommonWords);
            IBlacklist customBlacklist = new CommonBlacklist(customWordsToExclude);

            IEnumerable <string> terms   = ComponentFactory.CreateExtractor(args.Text);
            IWordStemmer         stemmer = ComponentFactory.CreateWordStemmer(args.ExcludeCommonWords);

            IEnumerable <IWord> words = terms
                                        .Filter(blacklist)
                                        .Filter(customBlacklist)
                                        .CountOccurences();

            cloudControl.WeightedWords =
                words
                .GroupByStem(stemmer)
                .SortByOccurences()
                .Cast <IWord>();

            string imageUrl = string.Empty;

            imageUrl = UploadToImgurAndGetUrl(imageUrl);

            GenerateWordCloudResponse response = new GenerateWordCloudResponse
            {
                ImgurUrl = imageUrl
            };

            return(response);
        }
 public static IEnumerable <WordGroup> GroupByStem(this IEnumerable <IWord> words, IWordStemmer stemmer)
 {
     return
         (words.GroupBy(
              word => stemmer.GetStem(word.Text),
              (stam, sameStamWords) => new WordGroup(stam, sameStamWords)));
 }
Esempio n. 7
0
        private void btn_Go_Click(object sender, EventArgs e)
        {
            // Used factory pattern here instead of DI because the user can change these at will during runtime.
            IWordStemmer stemmer   = Factory.CreateWordStemmer(cb_grouping.Checked);
            IBlacklist   blacklist = Factory.CreateBlacklist(cb_ignoreCommonwords.Checked);

            // Perform in another thread to keep the UI active and display animation to the user.
            DoWait(() =>
            {
                var task = Task.Run(() =>
                {
                    using (var document = new UriExtractor(progressIndicator, webDriver.GetWebDriver())
                    {
                        URI = new Uri(Normalize(txt_URL.Text))
                    })
                    {
                        document.SearchTags.Clear();
                        document.SearchTags.AddRange(CustomSettings.SearchTagNames);
                        document.ExcludeSymbolsRegEx = CustomSettings.RegExExcludeSymbols;

                        _lastGroupOfWords = document
                                            .Filter(blacklist)
                                            .Filter(customIgnorelist)
                                            .OrderBy(w => w) // Sort alpabetically
                                            .CountOccurences()
                                            .GroupByStem(stemmer)
                                            .SortByOccurences() // Sort by occurences
                                            .ToList();          // Remove deffered execution as Extractor will be disposed after this method ends.


                        // Compute top n words.
                        var topWords = _lastGroupOfWords.Take(CustomSettings.TopNumberOfWords);

                        SetWordList(topWords);
                        DoLocal(() => ClearImageList());
                        var images = document.GetImages();
                        foreach (var image in images)
                        {
                            var imageObj = GetImage(image.Item1);
                            if (imageObj == null)
                            {
                                continue;
                            }
                            DoLocal(() => imagesFromCurrentSite.Images.Add(image.Item1, imageObj));
                            DoLocal(() => lv_images.Items.Add(new ListViewItem(image.Item2, image.Item1)
                            {
                                ToolTipText = "Click the image to display full size."
                            }));
                        }
                    }
                });

                // Set callback to check for any errors during execution
                task.GetAwaiter().OnCompleted(() =>
                {
                    if (task.Exception != null)
                    {
                        DoLocal(() => MessageBox.Show(this, $"Sorry the following error occured while trying to execute your last request:\r\n {GetErrorMessage(task.Exception)}"), true);
                        DoLocal(() => txt_URL.BackColor = Color.Red);
                    }
                    else
                    {
                        DoLocal(() => txt_URL.BackColor = Color.Green);
                    }
                });

                // Wait until the task is done so the progress bar doesn't go away.
                task.Wait();
            });
        }
Esempio n. 8
0
 public static IEnumerable <WordDo> GroupByStem(this IEnumerable <WordDo> words, IWordStemmer stemmer)
 {
     return
         (words
          .GroupBy(word => stemmer.GetStem(word.text))
          .Select(group =>
     {
         var pairs = group.ToArray();
         return new WordDo()
         {
             size =
                 pairs
                 .Select(s => s.size).Sum(),
             text =
                 pairs
                 .OrderByDescending(p => p.size)
                 .First().text
         };
     }));
 }
Esempio n. 9
0
 private List <IWord> GetWordsParallely(IEnumerable <string> files, Language language, IBlacklist blacklist, IWordStemmer stemmer)
 {
     return(files
            .AsParallel()
            //.WithDegreeOfParallelism(0x8)
            .WithCancellation(m_CancelSource.Token)
            .WithCallback(DoProgress)
            .SelectMany(file => ByLanguageFactory.GetWordExtractor(language, file))
            .Filter(blacklist)
            .CountOccurences()
            .GroupByStem(stemmer)
            .SortByOccurences()
            .AsEnumerable()
            .Cast <IWord>()
            .ToList());
 }
 public DefaultTermSieve(IWordStemmer stemmer)
 {
     this.stemmer = stemmer ?? throw new ArgumentNullException(nameof(stemmer));
 }