private void ToolStripButtonGoClick(object sender, EventArgs e) { IsRunning = true; string path = FolderTree.SelectedPath; Language language = ByLanguageFactory.GetLanguageFromString(toolStripComboBoxLanguage.Text); FileIterator fileIterator = ByLanguageFactory.GetFileIterator(language); IBlacklist blacklist = ByLanguageFactory.GetBlacklist(language); IWordStemmer stemmer = ByLanguageFactory.GetStemmer(language); SetCaptionText("Estimating ..."); string[] files = fileIterator .GetFiles(path) .ToArray(); ToolStripProgressBar.Maximum = files.Length; m_CloudControl.WeightedWords = new List <IWord>(); //Note do not dispose m_CancelSource it will be disposed by task //TODO need to find correct way to work with CancelationToken //http://stackoverflow.com/questions/6960520/when-to-dispose-cancellationtokensource m_CancelSource = new CancellationTokenSource(); Task.Factory .StartNew( () => GetWordsParallely(files, language, blacklist, stemmer), m_CancelSource.Token) .ContinueWith( ApplyResults); }
/// <summary> /// /// </summary> private void ShowBugReportSummaryWordCloud() { try { // Create the interop host control. System.Windows.Forms.Integration.WindowsFormsHost host = new System.Windows.Forms.Integration.WindowsFormsHost(); // Create the MaskedTextBox control. Gma.CodeCloud.Controls.CloudControl abc = new Gma.CodeCloud.Controls.CloudControl(); System.Windows.Forms.ProgressBar abcd = new System.Windows.Forms.ProgressBar(); //IBlacklist blacklist = ComponentFactory.CreateBlacklist(false); var currDir = System.Environment.GetFolderPath(System.Environment.SpecialFolder.ApplicationData); // Combine the base folder with your specific folder.... string specificFolder = System.IO.Path.Combine(currDir, "MARC 3.0"); // Check if folder exists and if not, create it if (!Directory.Exists(specificFolder)) { Directory.CreateDirectory(specificFolder); } IBlacklist blacklist = CommonBlacklist.CreateFromTextFile(specificFolder + "\\InputData\\stopwords_en.txt"); var preProcessedList = ApplyStopwordsRemoval(Model.BugReportSummaryList); InputType inputType = ComponentFactory.DetectInputType(String.Join(",", preProcessedList.ToArray())); IProgressIndicator progress = ComponentFactory.CreateProgressBar(inputType, abcd); IEnumerable <string> terms = ComponentFactory.CreateExtractor(inputType, String.Join(",", preProcessedList.ToArray()), progress); IWordStemmer stemmer = ComponentFactory.CreateWordStemmer(false); IEnumerable <IWord> words = terms .Filter(blacklist) .CountOccurences(); abc.WeightedWords = words .GroupByStem(stemmer) .SortByOccurences() .Cast <IWord>(); // Assign the MaskedTextBox control as the host control's child. host.Child = abc; this.bugReportSummaryWordCloudGrid.Children.Add(host); } catch (Exception) { MessageBox.Show("Something went wrong in the word cloud engine.", "Unexpected Error", MessageBoxButton.OK, MessageBoxImage.Information); } }
public CachedWordStemmer(IWordStemmer backend) { this.backend = backend ?? throw new ArgumentNullException(nameof(backend)); if (backend is CachedWordStemmer) { throw new ArgumentException("Nesting cached word stemmers is not allowed", nameof(backend)); } cache = new ConcurrentDictionary <string, string>(); }
/// <summary> /// /// </summary> private void ShowUserRequirementsSummaryWordCloud() { try { // Create the interop host control. System.Windows.Forms.Integration.WindowsFormsHost host = new System.Windows.Forms.Integration.WindowsFormsHost(); // Create the MaskedTextBox control. Gma.CodeCloud.Controls.CloudControl abc = new Gma.CodeCloud.Controls.CloudControl(); System.Windows.Forms.ProgressBar abcd = new System.Windows.Forms.ProgressBar(); IBlacklist blacklist = ComponentFactory.CreateBlacklist(false); //IBlacklist customBlacklist = CommonBlacklist.CreateFromTextFile(s_BlacklistTxtFileName); var preProcessedList = ApplyStopwordsRemoval(Model.UserRequirementsSummaryList); InputType inputType = ComponentFactory.DetectInputType(String.Join(",", preProcessedList.ToArray())); IProgressIndicator progress = ComponentFactory.CreateProgressBar(inputType, abcd); IEnumerable <string> terms = ComponentFactory.CreateExtractor(inputType, String.Join(",", preProcessedList.ToArray()), progress); IWordStemmer stemmer = ComponentFactory.CreateWordStemmer(false); IEnumerable <IWord> words = terms .Filter(blacklist) .CountOccurences(); abc.WeightedWords = words .GroupByStem(stemmer) .SortByOccurences() .Cast <IWord>(); // Assign the MaskedTextBox control as the host control's child. host.Child = abc; this.userRequirementsSummaryWordCloudGrid.Children.Add(host); } catch (Exception) { MessageBox.Show("Something went wrong in the word cloud engine.", "Unexpected Error", MessageBoxButton.OK, MessageBoxImage.Information); } }
public GenerateWordCloudResponse GenerateWordCloud(GenerateWordCloudArgs args) { InitializePanel(args); if (Uri.IsWellFormedUriString(args.Text, UriKind.Absolute)) { args.Text = this.GetTextFromUrl(args.Text); } IEnumerable <string> customWordsToExclude = this.GetCustomWordsFromString(args.WordsToExclude); IBlacklist blacklist = ComponentFactory.CreateBlacklist(args.ExcludeCommonWords); IBlacklist customBlacklist = new CommonBlacklist(customWordsToExclude); IEnumerable <string> terms = ComponentFactory.CreateExtractor(args.Text); IWordStemmer stemmer = ComponentFactory.CreateWordStemmer(args.ExcludeCommonWords); IEnumerable <IWord> words = terms .Filter(blacklist) .Filter(customBlacklist) .CountOccurences(); cloudControl.WeightedWords = words .GroupByStem(stemmer) .SortByOccurences() .Cast <IWord>(); string imageUrl = string.Empty; imageUrl = UploadToImgurAndGetUrl(imageUrl); GenerateWordCloudResponse response = new GenerateWordCloudResponse { ImgurUrl = imageUrl }; return(response); }
public static IEnumerable <WordGroup> GroupByStem(this IEnumerable <IWord> words, IWordStemmer stemmer) { return (words.GroupBy( word => stemmer.GetStem(word.Text), (stam, sameStamWords) => new WordGroup(stam, sameStamWords))); }
private void btn_Go_Click(object sender, EventArgs e) { // Used factory pattern here instead of DI because the user can change these at will during runtime. IWordStemmer stemmer = Factory.CreateWordStemmer(cb_grouping.Checked); IBlacklist blacklist = Factory.CreateBlacklist(cb_ignoreCommonwords.Checked); // Perform in another thread to keep the UI active and display animation to the user. DoWait(() => { var task = Task.Run(() => { using (var document = new UriExtractor(progressIndicator, webDriver.GetWebDriver()) { URI = new Uri(Normalize(txt_URL.Text)) }) { document.SearchTags.Clear(); document.SearchTags.AddRange(CustomSettings.SearchTagNames); document.ExcludeSymbolsRegEx = CustomSettings.RegExExcludeSymbols; _lastGroupOfWords = document .Filter(blacklist) .Filter(customIgnorelist) .OrderBy(w => w) // Sort alpabetically .CountOccurences() .GroupByStem(stemmer) .SortByOccurences() // Sort by occurences .ToList(); // Remove deffered execution as Extractor will be disposed after this method ends. // Compute top n words. var topWords = _lastGroupOfWords.Take(CustomSettings.TopNumberOfWords); SetWordList(topWords); DoLocal(() => ClearImageList()); var images = document.GetImages(); foreach (var image in images) { var imageObj = GetImage(image.Item1); if (imageObj == null) { continue; } DoLocal(() => imagesFromCurrentSite.Images.Add(image.Item1, imageObj)); DoLocal(() => lv_images.Items.Add(new ListViewItem(image.Item2, image.Item1) { ToolTipText = "Click the image to display full size." })); } } }); // Set callback to check for any errors during execution task.GetAwaiter().OnCompleted(() => { if (task.Exception != null) { DoLocal(() => MessageBox.Show(this, $"Sorry the following error occured while trying to execute your last request:\r\n {GetErrorMessage(task.Exception)}"), true); DoLocal(() => txt_URL.BackColor = Color.Red); } else { DoLocal(() => txt_URL.BackColor = Color.Green); } }); // Wait until the task is done so the progress bar doesn't go away. task.Wait(); }); }
public static IEnumerable <WordDo> GroupByStem(this IEnumerable <WordDo> words, IWordStemmer stemmer) { return (words .GroupBy(word => stemmer.GetStem(word.text)) .Select(group => { var pairs = group.ToArray(); return new WordDo() { size = pairs .Select(s => s.size).Sum(), text = pairs .OrderByDescending(p => p.size) .First().text }; })); }
private List <IWord> GetWordsParallely(IEnumerable <string> files, Language language, IBlacklist blacklist, IWordStemmer stemmer) { return(files .AsParallel() //.WithDegreeOfParallelism(0x8) .WithCancellation(m_CancelSource.Token) .WithCallback(DoProgress) .SelectMany(file => ByLanguageFactory.GetWordExtractor(language, file)) .Filter(blacklist) .CountOccurences() .GroupByStem(stemmer) .SortByOccurences() .AsEnumerable() .Cast <IWord>() .ToList()); }
public DefaultTermSieve(IWordStemmer stemmer) { this.stemmer = stemmer ?? throw new ArgumentNullException(nameof(stemmer)); }