private void AddProjectTab(WsdProject project) { var tabPage = new TabPage { Text = project.ProjectInfo.ProjectName }; MainTabControl.TabPages.Add(tabPage); var pluginComponents = _pluginInfos .Where(x => x.Loaded) .SelectMany(x => x.Components) .ToArray(); var projectForm = new ProjectForm(pluginComponents, project, _progressFactory) { TopLevel = false }; tabPage.Controls.Add(projectForm); projectForm.FormBorderStyle = FormBorderStyle.None; projectForm.Dock = DockStyle.Fill; projectForm.Show(); MainTabControl.SelectedTab = tabPage; }
public void BeforeDataWritten( IList <DataSetGroup> dataSetGroups, WsdProject project, GenerationInfo info, IProgressHandle progress) { var logger = project.PluginData.GetData <LoggingPlugin, EventLogger>(string.Empty); logger.LogMessage("BeforeDataWritten() called."); }
public void AfterDictionaryReordered( WordDictionary reorderedDictionary, WsdProject project, GenerationInfo info, IProgressHandle progress) { var logger = project.PluginData.GetData <LoggingPlugin, EventLogger>(string.Empty); logger.LogMessage("AfterDictionaryReordered() called."); }
public void AfterRecordsGenerated( Dictionary <DataSetName, DataSetByText> dataSets, WsdProject project, GenerationInfo info, IProgressHandle progress) { var logger = project.PluginData.GetData <LoggingPlugin, EventLogger>(string.Empty); logger.LogMessage("AfterRecordsGenerated() called."); }
public void AfterValidationSetExtracted( IList <DataSetGroup> dataSetGroups, WsdProject project, GenerationInfo info, IProgressHandle progress) { var logger = project.PluginData.GetData <LoggingPlugin, EventLogger>(string.Empty); logger.LogMessage("AfterValidationSetExtracted() called."); }
public void BeforeGenerationStarted(WsdProject project, GenerationInfo info, IProgressHandle progress) { var logger = project.PluginData.GetData <LoggingPlugin, EventLogger>(string.Empty); logger.LogMessage("Generation started."); logger.LogMessage(""); logger.LogMessage("BeforeGenerationStarted() called."); }
public DataGenerationForm( IPluginComponent[] pluginComponents, WsdProject project, IProgressHandleFactory progressFactory) { _pluginComponents = pluginComponents ?? throw new ArgumentNullException(nameof(project)); _project = project ?? throw new ArgumentNullException(nameof(project)); _progressFactory = progressFactory ?? throw new ArgumentNullException(nameof(progressFactory)); InitializeComponent(); SetStyle(ControlStyles.AllPaintingInWmPaint | ControlStyles.UserPaint | ControlStyles.OptimizedDoubleBuffer, true); GenerateData_SavingStrategyComboBox.Items.AddRange(new object[] { SavingStrategy.SingleFile, SavingStrategy.FilePerWord, SavingStrategy.FilePerPos, SavingStrategy.FilePerWordAndPos, SavingStrategy.OriginalFiles }); GenerateData_OutputFormatComboBox.Items.AddRange(new object[] { OutputFormat.txt, OutputFormat.arff }); GenerateData_OrderMeaningsComboBox.Items.AddRange(new object[] { OrderMeanings.None, OrderMeanings.ByDictionary, OrderMeanings.ByTrainingSet, OrderMeanings.ByDictionaryAndTrainingSet }); GenerateData_OrderMeaningsStrategyComboBox.Items.AddRange(new object[] { OrderMeaningsStrategy.GroupByWordAndPos, OrderMeaningsStrategy.GroupByWord }); foreach (var pos in project.PosList) { PosList_CheckedListBox.Items.Add(pos); if (pos != "X" && pos != ".") { _generationInfo.FilteredPosList.Add(pos); } } RefreshUI(true); }
public ProjectInfoForm(WsdProject project) { _project = project; InitializeComponent(); SetStyle(ControlStyles.AllPaintingInWmPaint | ControlStyles.UserPaint | ControlStyles.OptimizedDoubleBuffer, true); RefreshUI(); }
public void BeforeDataWritten( IList <DataSetGroup> dataSetGroups, WsdProject project, GenerationInfo info, IProgressHandle progress) { var config = project.PluginData.GetData <StatisticsPlugin, StatisticsConfig>(string.Empty); if (!config.PluginEnabled) { return; } var dictionary = project.PluginData.GetData <StatisticsPlugin, WordDictionary>(string.Empty); var filePath = Path.Combine( info.DestinationFolder, FileName.DataSetStatistics + FileExtension.Csv); using (var streamWriter = new StreamWriter(filePath)) using (var writer = new CsvWriter(streamWriter)) using (var scope = progress.Scope(dataSetGroups.Count, MessageFormat.ComputingStatistics_Groups)) { writer.WriteLine( "Group", "Train examples", "Validation examples", "Test examples", "Test-only examples", "Majority vote", "Train classes", "Test classes", "Train entropy", "Test entropy"); for (var i = 0; i < dataSetGroups.Count; i++) { scope.TrySet(i); var dataSetGroup = dataSetGroups[i]; var statistics = DataSetGroupStatistics.Compute(dictionary, dataSetGroup); if (config.RequireTrainingSet && statistics.TrainExamples == 0 || config.RequireTestSet && statistics.TestExamples == 0 || statistics.TrainExamples + statistics.ValidationExamples < config.MinimumTrainingValidationExamples) { continue; } writer.WriteLine( dataSetGroup.GroupName, statistics.TrainExamples, statistics.ValidationExamples, statistics.TestExamples, statistics.TestOnlyExamples, statistics.MajorityVote, statistics.TrainClasses, statistics.TestClasses, statistics.TrainEntropy, statistics.TestEntropy); } } if (config.AbortGenerationAfterStatisticsAreComputed) { throw new OperationCanceledException(); } }
public void Extract( IList <DataSetGroup> dataSetGroups, WsdProject project, GenerationInfo info, IProgressHandle progress) { using (var scope = progress.Scope(dataSetGroups.Count, MessageFormat.ExtractingTestOnlySet_Groups)) { var counter = 0; foreach (var dataSetGroup in dataSetGroups) { scope.TrySet(counter++); var oldTestSet = dataSetGroup.DataSets.GetByName(DataSetName.Test); if (oldTestSet == null) { continue; } var testExamples = oldTestSet.Data .Where(x => project.DataAnalysis[x.Word].TrainEncounters.Any()) .ToArray(); var testOnlyExamples = oldTestSet.Data .Where(x => !project.DataAnalysis[x.Word].TrainEncounters.Any()) .ToArray(); if (testExamples.Length > 0) { dataSetGroup.DataSets[DataSetName.Test] = new DataSet(DataSetName.Test, testExamples); } else { dataSetGroup.DataSets.Remove(DataSetName.Test); } if (testOnlyExamples.Length > 0) { dataSetGroup.DataSets[DataSetName.TestOnly] = new DataSet(DataSetName.TestOnly, testOnlyExamples); } else { dataSetGroup.DataSets.Remove(DataSetName.TestOnly); } } } }
public void AfterGenerationCompleted(WsdProject project, GenerationInfo info, IProgressHandle progress) { var logger = project.PluginData.GetData <LoggingPlugin, EventLogger>(string.Empty); var statistics = project.PluginData.GetData <LoggingPlugin, UsageStatistics>(string.Empty); logger.LogMessage("AfterGenerationCompleted() called."); logger.LogMessage(""); logger.LogMessage("Generation completed."); logger.LogMessage(""); logger.LogMessage("Usage statistics:"); logger.LogMessage($" Colocation source - {statistics.ColocationSourceCounter}"); logger.LogMessage($" CosThetaUnitary function - {statistics.CosThetaUnitaryCounter}"); logger.LogMessage($" String concat - {statistics.StringConcatCounter}"); logger.LogMessage($" Word element - {statistics.WordElementCounter}"); }
public LoggingForm(WsdProject project) { InitializeComponent(); var logger = new EventLogger(); logger.OnMessageLogged += message => { this.InvokeIfRequired(() => { Log_RichTextBox.AppendText(message + Environment.NewLine); }); }; project.PluginData.SetData <LoggingPlugin, EventLogger>(string.Empty, logger); project.PluginData.SetData <LoggingPlugin, UsageStatistics>(string.Empty, new UsageStatistics()); }
private void CreateProject_UEFXML_CreateProjectButton_Click(object sender, EventArgs e) { using (var dialog = DialogEx.SelectFolder()) { var result = dialog.ShowDialog(); if (result == CommonFileDialogResult.Ok) { var projectPath = dialog.FileName; if (Directory.GetFiles(projectPath, "*", SearchOption.AllDirectories).Length > 0) { MessageBox.Show("Directory must be empty.", "Error"); return; } Task.Factory.StartNew(() => { try { using (var progress = _progressFactory.NewInstance("Creating and saving project...")) { var project = WsdProject.CreateAndSave( _projectCreateInfoUEFXML, projectPath, progress); this.InvokeIfRequired(() => { RefreshUI(); AddProjectTab(project); }); } } catch (OperationCanceledException) { this.InvokeIfRequired(RefreshUI); } catch (Exception ex) { MessageBox.Show(ex.ToString(), "Error"); } }, TaskCreationOptions.LongRunning); } } }
public IList <GeneratedTextData> GenerateRecords( IList <TextData> data, WsdProject project, GenerationInfo info, IProgressHandle progress) { var result = new List <GeneratedTextData>(); using (var scope = progress.Scope(data.Count, MessageFormat.GeneratingRecords_Texts)) { for (var i = 0; i < data.Count; i++) { scope.TrySet(i); result.Add(new GeneratedTextData( data[i].TextName, GenerateRecords(data[i].Data, project, info))); } } return(result); }
private void OpenProjectButton_Click(object sender, EventArgs e) { using (var dialog = DialogEx.OpenFile("WsdProject files (*.wsdproj)|*.wsdproj")) { var result = dialog.ShowDialog(); if (result == DialogResult.OK) { var projectFile = dialog.FileName; Task.Factory.StartNew(() => { try { using (var progress = _progressFactory.NewInstance("Loading project...")) { var project = WsdProject.Load(projectFile, progress); this.InvokeIfRequired(() => { RefreshUI(); AddProjectTab(project); }); } } catch (OperationCanceledException) { this.InvokeIfRequired(RefreshUI); } catch (Exception ex) { MessageBox.Show(ex.ToString(), "Error"); } }, TaskCreationOptions.LongRunning); } } }
public ProjectForm( IPluginComponent[] pluginComponents, WsdProject project, IProgressHandleFactory progressFactory) { if (project == null) { throw new ArgumentNullException(nameof(project)); } if (progressFactory == null) { throw new ArgumentNullException(nameof(progressFactory)); } InitializeComponent(); SetStyle(ControlStyles.AllPaintingInWmPaint | ControlStyles.UserPaint | ControlStyles.OptimizedDoubleBuffer, true); AddForm("Project info", new ProjectInfoForm(project)); AddForm("Data generation", new DataGenerationForm(pluginComponents, project, progressFactory)); var projectForms = pluginComponents .Where(x => x is IPluginProjectForm) .Cast <IPluginProjectForm>() .ToArray(); foreach (var projectForm in projectForms) { AddForm( projectForm.DisplayName, projectForm.CreateForm(pluginComponents, project, progressFactory)); } FormsListBox.SelectedIndex = 0; }
public void Generate( WsdProject project, GenerationInfo info, IProgressHandle progress) { if (info == null) { throw new ArgumentNullException(nameof(info)); } if (PathEx.Identify(info.DestinationFolder) != PathIdentity.Directory || Directory.GetFiles(info.DestinationFolder, "*", SearchOption.AllDirectories).Length > 0) { throw new ArgumentException("DestinationFolder must be an empty existing directory."); } info.AssertIsValid(); var handlers = _dataGenerationHandlers .OrderBy(x => x.GetExecutionPriority(project)) .ToArray(); foreach (var handler in handlers) { handler.BeforeGenerationStarted(project, info, progress); } var reorderedDictionary = _classDeterminator.GetReorderedDictionary(project, info, progress); foreach (var handler in handlers) { handler.AfterDictionaryReordered(reorderedDictionary, project, info, progress); } var dataSets = new Dictionary <DataSetName, DataSetByText> { [DataSetName.Train] = new DataSetByText( DataSetName.Train, _generationAlgorithm.GenerateRecords(project.TrainData, project, info, progress)), [DataSetName.Test] = new DataSetByText( DataSetName.Test, _generationAlgorithm.GenerateRecords(project.TestData, project, info, progress)) }; foreach (var handler in handlers) { handler.AfterRecordsGenerated(dataSets, project, info, progress); } var dataSetGroups = _dataSetGrouper.FormGroups(dataSets, project, info, progress); foreach (var handler in handlers) { handler.AfterGroupsFormed(dataSetGroups, project, info, progress); } _testOnlySetExtractor.Extract(dataSetGroups, project, info, progress); foreach (var handler in handlers) { handler.AfterTestOnlySetExtracted(dataSetGroups, project, info, progress); } if (info.ExtractValidationSet) { _validationSetExtractor.Extract(dataSetGroups, info, progress); foreach (var handler in handlers) { handler.AfterValidationSetExtracted(dataSetGroups, project, info, progress); } } if (info.ShuffleData) { _dataSetShuffler.ShuffleData(dataSetGroups, progress); foreach (var handler in handlers) { handler.AfterDataShuffled(dataSetGroups, project, info, progress); } } var context = new FeatureSelectionContext { GenerationInfo = info, ReorderedDictionary = reorderedDictionary, FilteredPosList = new WsdPosList(info.FilteredPosList), Project = project }; foreach (var handler in handlers) { handler.BeforeDataWritten(dataSetGroups, project, info, progress); } _dataSetWriter.WriteData(info.DestinationFolder, dataSetGroups, context, progress); SystemJsonWriter.Write( Path.Combine( info.DestinationFolder, FileName.GenerationInfo + FileExtension.WsdGenInfo), info); SystemJsonWriter.Write( Path.Combine( info.DestinationFolder, FileName.GenerationInfo + FileExtension.Text), new GenerationInfoReadable(info), null, false); foreach (var handler in handlers) { handler.AfterGenerationCompleted(project, info, progress); } }
public IList <DataSetGroup> FormGroups( Dictionary <DataSetName, DataSetByText> dataSets, WsdProject project, GenerationInfo info, IProgressHandle progress) { var dataSetGroups = new Dictionary <string, DataSetGroup>(); using (var scope = progress.Scope(dataSets.Count, MessageFormat.FormingGroups_DataSets)) { var counter = 0; foreach (var dataSet in dataSets.Values) { scope.TrySet(counter++); IEnumerable <(string groupName, IEnumerable <RawRecord> data)> dataByGroup; switch (info.SavingStrategy) { case SavingStrategy.SingleFile: { dataByGroup = dataSet.Texts .SelectMany(x => x.Data) .GroupBy(x => string.Empty) .Select(x => (x.Key, (IEnumerable <RawRecord>)x)); break; } case SavingStrategy.FilePerWord: { dataByGroup = dataSet.Texts .SelectMany(x => x.Data) .GroupBy(x => x.Word + "__" + project.Dictionary.GetByName(x.Word).Id) .Select(x => (x.Key, (IEnumerable <RawRecord>)x)); break; } case SavingStrategy.FilePerPos: { dataByGroup = dataSet.Texts .SelectMany(x => x.Data) .GroupBy(x => x.Pos) .Select(x => (x.Key, (IEnumerable <RawRecord>)x)); break; } case SavingStrategy.FilePerWordAndPos: { dataByGroup = dataSet.Texts .SelectMany(x => x.Data) .GroupBy(x => x.Word + "__" + x.Pos + "__" + project.Dictionary.GetByName(x.Word).Id) .Select(x => (x.Key, (IEnumerable <RawRecord>)x)); break; } case SavingStrategy.OriginalFiles: { dataByGroup = dataSet.Texts .Select(x => (x.TextName, (IEnumerable <RawRecord>)x.Data)); break; } default: { throw new NotSupportedException( $"Saving stragegy {info.SavingStrategy} is not supported."); } } foreach (var(groupName, data) in dataByGroup) { if (!dataSetGroups.ContainsKey(groupName)) { dataSetGroups[groupName] = new DataSetGroup(groupName); } dataSetGroups[groupName].DataSets[dataSet.Name] = new DataSet(dataSet.Name, data.ToArray()); } } return(dataSetGroups.Values.ToArray()); } }
public void AfterRecordsGenerated( Dictionary <DataSetName, DataSetByText> dataSets, WsdProject project, GenerationInfo info, IProgressHandle progress) { }
public void AfterValidationSetExtracted( IList <DataSetGroup> dataSetGroups, WsdProject project, GenerationInfo info, IProgressHandle progress) { }
public void BeforeGenerationStarted( WsdProject project, GenerationInfo info, IProgressHandle progress) { }
public int GetExecutionPriority(WsdProject project) { return(project.PluginData.GetData <LoggingPlugin, int?>(PluginDataKey_ExecutionPriority) ?? 1); }
public int GetExecutionPriority(WsdProject project) { var config = project.PluginData.GetData <StatisticsPlugin, StatisticsConfig>(string.Empty); return(config.HandlerExecutionPriority); }
public void AfterGroupsFormed( IList <DataSetGroup> dataSetGroups, WsdProject project, GenerationInfo info, IProgressHandle progress) { }
public void AfterDictionaryReordered( WordDictionary reorderedDictionary, WsdProject project, GenerationInfo info, IProgressHandle progress) { project.PluginData.SetData <StatisticsPlugin, WordDictionary>(string.Empty, reorderedDictionary); }
private IList <RawRecord> GenerateRecords( IList <RawWordEncounter> input, WsdProject project, GenerationInfo info) { var contextWindowLength = info.LeftContext + 1 + info.RightContext; var wordIndexInContext = info.LeftContext; var contextWindow = new RawWordEncounter[contextWindowLength]; var records = new List <RawRecord>(); contextWindow.ShiftLeft(new RawWordEncounter { Word = RawWordEncounter.EndOfSentence }); using (var enumerator = input.GetEnumerator()) { bool moveNext; do { moveNext = enumerator.MoveNext(); if (moveNext) { if (!string.IsNullOrEmpty(enumerator.Current.Pos) && !info.FilteredPosList.Contains(enumerator.Current.Pos)) { continue; } contextWindow.ShiftLeft(enumerator.Current); } else { contextWindow.ShiftLeft(); } var currentEncounter = contextWindow[wordIndexInContext]; if (currentEncounter == null || currentEncounter.Word == RawWordEncounter.EmptyWord || currentEncounter.Word == RawWordEncounter.EndOfSentence || string.IsNullOrWhiteSpace(currentEncounter.Meaning)) { continue; } var dictionaryWord = project.Dictionary.GetByName(currentEncounter.Word); if (dictionaryWord == null || dictionaryWord.Meanings.Count <= 1) { continue; } var context = new RawWordEncounter[contextWindowLength - 1]; if (info.Overlap) { for (var i = 0; i < contextWindowLength; i++) { if (i == wordIndexInContext) { continue; } var indexInBuffer = i < wordIndexInContext ? i : i - 1; context[indexInBuffer] = contextWindow[i] ?? RawWordEncounter.EmptyWordEncounter; } } else { var endOfSentence = false; for (var i = wordIndexInContext - 1; i >= 0; i--) { context[i] = endOfSentence ? RawWordEncounter.EmptyWordEncounter : contextWindow[i] ?? RawWordEncounter.EmptyWordEncounter; if (contextWindow[i]?.Word == RawWordEncounter.EndOfSentence) { endOfSentence = true; } } endOfSentence = false; for (var i = wordIndexInContext + 1; i < contextWindowLength; i++) { context[i - 1] = endOfSentence ? RawWordEncounter.EmptyWordEncounter : contextWindow[i] ?? RawWordEncounter.EmptyWordEncounter; if (contextWindow[i]?.Word == RawWordEncounter.EndOfSentence) { endOfSentence = true; } } } records.Add(new RawRecord { Word = currentEncounter.Word, Meaning = currentEncounter.Meaning, Pos = currentEncounter.Pos, Context = context }); } while (moveNext || !contextWindow.IsEmpty()); } return(records); }
public WordDictionary GetReorderedDictionary( WsdProject project, GenerationInfo info, IProgressHandle progress) { using (progress.Scope(1, MessageFormat.ReorderingDictionary)) { Func <IEnumerable <DictionaryMeaning>, string, IEnumerable <DictionaryMeaning> > meaningOrderFunc = (enumerable, word) => { if (info.OrderMeanings == OrderMeanings.ByDictionary || info.OrderMeanings == OrderMeanings.ByTrainingSet) { enumerable = enumerable .OrderByDescending(z => z.Encounters); } else if (info.OrderMeanings == OrderMeanings.ByDictionaryAndTrainingSet) { enumerable = enumerable .OrderByDescending(z => z.Encounters) .ThenByDescending(z => project.DataAnalysis.GetByName(word)? .TrainEncounters .GetByName(z.Meaning)?.Encounters ?? 0); } return(enumerable.Select((z, i) => new DictionaryMeaning { Id = i + 1, Meaning = z.Meaning, PartOfSpeech = z.PartOfSpeech, Encounters = z.Encounters })); }; var result = (info.OrderMeanings == OrderMeanings.ByTrainingSet ? project.DataAnalysis .Values .Where(x => x.TrainEncounters.Any()) .Select(x => new DictionaryWord { Id = project.Dictionary[x.Word].Id, Word = x.Word, Meanings = x.TrainEncounters.Values .OrderByDescending(y => y.Encounters) .Select((y, i) => new DictionaryMeaning { Id = i + 1, Meaning = y.Meaning, PartOfSpeech = y.PartOfSpeech, Encounters = y.Encounters }).ToMeaningDictionary() }) : project.Dictionary.Values) .Select(x => { var meanings = (IEnumerable <DictionaryMeaning>)x.Meanings.Values; if (info.OrderMeaningsStrategy == OrderMeaningsStrategy.GroupByWordAndPos) { meanings = meanings .GroupBy(y => y.PartOfSpeech) .SelectMany(y => meaningOrderFunc.Invoke(y, x.Word)); } else { meanings = meaningOrderFunc.Invoke(meanings, x.Word); } return(new DictionaryWord { Id = x.Id, Word = x.Word, Meanings = meanings.ToMeaningDictionary() }); }) .Where(x => x.Meanings.Count > 0) .ToWordDictionary(); return(result); } }
public Form CreateForm( IPluginComponent[] pluginComponents, WsdProject project, IProgressHandleFactory factory) { return(new LoggingForm(project)); }
public void AfterGenerationCompleted( WsdProject project, GenerationInfo info, IProgressHandle progress) { }