public static void BindToListView(DictionaryStatistics data, ListView control) { control .AddGroup("Dictionary statistics", group => { group .AddItem("Word count", data.WordCount.ToString()) .AddItem("Monosemantic word count", data.MonosemanticWordCount.ToString()) .AddItem("Polysemantic word count", data.PolysemanticWordCount.ToString()) .AddItem("Max meanings per word", data.MaxMeaningsPerWord.ToString()) .AddItem("Average meanings per word", data.AverageMeaningsPerWord.ToString("F2")) .AddItem("Unique meanings count", data.UniqueMeaningsCount.ToString()); }); }
private WsdProject( WsdProjectInfo projectInfo, WordDictionary dictionary, TextData[] trainData, TextData[] testData, EmbeddingDictionary wordEmbeddings, EmbeddingDictionary meaningEmbeddings, WordAnalysisDictionary dataAnalysis, DictionaryStatistics dictionaryStatistics, DataStatistics dataStatistics, EmbeddingStatistics wordEmbeddingStatistics, EmbeddingStatistics meaningEmbeddingStatistics) { ProjectInfo = projectInfo; Dictionary = dictionary; TrainData = trainData; TestData = testData; WordEmbeddings = wordEmbeddings; MeaningEmbeddings = meaningEmbeddings; DataAnalysis = dataAnalysis; DictionaryStatistics = dictionaryStatistics; DataStatistics = dataStatistics; WordEmbeddingStatistics = wordEmbeddingStatistics; MeaningEmbeddingStatistics = meaningEmbeddingStatistics; PosList = new WsdPosList(trainData); PluginData = new PluginData(); }
public static WsdProject CreateAndSave( WsdProjectCreateInfo info, string destinationPath, IProgressHandle progress) { if (info == null) { throw new ArgumentNullException(nameof(info)); } if (string.IsNullOrEmpty(destinationPath)) { throw new ArgumentNullException(nameof(destinationPath)); } if (PathEx.Identify(destinationPath) != PathIdentity.Directory || Directory.GetFiles(destinationPath, "*", SearchOption.AllDirectories).Length > 0) { throw new ArgumentException(ExceptionMessage.DestinationPathMustBeEmptyAndExisting); } if (progress == null) { throw new ArgumentNullException(nameof(progress)); } info.AssertIsValid(); progress.SetMessageFormat(MessageFormat.LoadingDictionary_Bytes); var dictionary = InputDictionaryReader.ReadAll(info.DictionaryPath, progress); progress.SetMessageFormat(MessageFormat.ComputingDictionaryStatistics); var dictionaryStatistics = new DictionaryStatistics().Compute(dictionary, progress); TextData[] trainData; TextData[] testData; if (info.DataType == InputDataType.PlainText) { progress.SetMessageFormat(MessageFormat.LoadingTrainData_Files); trainData = InputPlainTextDataReader.ReadAllFiles(info.TrainDataPath, progress); progress.SetMessageFormat(MessageFormat.LoadingTestData_Files); testData = InputPlainTextDataReader.ReadAllFiles(info.TestDataPath, progress); } else { progress.SetMessageFormat(MessageFormat.LoadingSynsetMappings_Bytes); var synsetMappings = InputSynsetMappingReader.ReadAll(info.SynsetMappingsPath, progress); progress.SetMessageFormat(MessageFormat.LoadingTrainData_Files); trainData = InputXmlDataReader.Read( info.TrainDataPath, info.TrainGoldKeyPath, synsetMappings, dictionary, out var trainXmlParseErrors, progress); if (trainXmlParseErrors != null && trainXmlParseErrors.Any()) { XmlParseErrorWriter.WriteAll( Path.Combine(destinationPath, FileName.TrainXmlParseErrors + FileExtension.Text), trainXmlParseErrors); } progress.SetMessageFormat(MessageFormat.LoadingTestData_Files); testData = InputXmlDataReader.Read( info.TestDataPath, info.TestGoldKeyPath, synsetMappings, dictionary, out var testXmlParseErrors, progress); if (testXmlParseErrors != null && testXmlParseErrors.Any()) { XmlParseErrorWriter.WriteAll( Path.Combine(destinationPath, FileName.TestXmlParseErrors + FileExtension.Text), testXmlParseErrors); } } progress.SetMessageFormat(MessageFormat.AnalyzingData_Files); var dataAnalysis = new WordAnalysisDictionary() .Analyze(dictionary, trainData, testData, progress); progress.SetMessageFormat(MessageFormat.ComputingDataStatistics); var dataStatistics = new DataStatistics() .Compute(dictionary, dataAnalysis, progress); progress.SetMessageFormat(MessageFormat.LoadingWordEmbeddings_Bytes); var wordEmbeddings = InputEmbeddingReader.ReadAll( info.WordEmbeddingsPath, dataAnalysis.GetAllWordOccurrences(), progress); var wordEmbeddingStatistics = new EmbeddingStatistics().Compute(wordEmbeddings); EmbeddingDictionary meaningEmbeddings = null; var meaningEmbeddingStatistics = new EmbeddingStatistics(); if (!string.IsNullOrWhiteSpace(info.MeaningEmbeddingsPath)) { progress.SetMessageFormat(MessageFormat.LoadingMeaningEmbeddings_Bytes); meaningEmbeddings = InputEmbeddingReader.ReadAll( info.MeaningEmbeddingsPath, dataAnalysis.GetAllMeaningOccurrences(), progress); meaningEmbeddingStatistics.Compute(meaningEmbeddings); } var projectInfo = new WsdProjectInfo { ProjectName = Path.GetFileName(destinationPath), ProjectVersion = CurrentProjectVersion, ApplicationVersion = typeof(WsdProject).Assembly.GetName().Version.ToString(), Dictionary = FileName.Dictionary + FileExtension.WsdData, TrainData = trainData.Select(x => new WsdProjectTextDataInfo { Name = x.TextName, Path = Path.Combine(FolderName.Train, x.TextName + FileExtension.WsdData) }).ToArray(), TestData = testData.Select(x => new WsdProjectTextDataInfo { Name = x.TextName, Path = Path.Combine(FolderName.Test, x.TextName + FileExtension.WsdData) }).ToArray(), WordEmbeddings = FileName.WordEmbeddings + FileExtension.WsdData, MeaningEmbeddings = meaningEmbeddings != null ? FileName.MeaningEmbeddings + FileExtension.WsdData : string.Empty, DataAnalysis = FileName.DataAnalysis + FileExtension.WsdData, DictionaryStatistics = FileName.DictionaryStatistics + FileExtension.WsdData, DataStatistics = FileName.DataStatistics + FileExtension.WsdData, WordEmbeddingsStatistics = FileName.WordEmbeddingsStatistics + FileExtension.WsdData, MeaningEmbeddingsStatistics = FileName.MeaningEmbeddingsStatistics + FileExtension.WsdData }; progress.SetMessageFormat(MessageFormat.SavingDictionary_Words); SystemDictionaryWriter.WriteAll( Path.Combine(destinationPath, projectInfo.Dictionary), dictionary, progress); progress.SetMessageFormat(MessageFormat.SavingTrainData_Files); SystemDataWriter.WriteAllFiles( destinationPath, projectInfo.TrainData .Select(x => (x.Path, trainData.Single(y => y.TextName == x.Name).Data)) .ToArray(), progress); progress.SetMessageFormat(MessageFormat.SavingTestData_Files); SystemDataWriter.WriteAllFiles( destinationPath, projectInfo.TestData .Select(x => (x.Path, testData.Single(y => y.TextName == x.Name).Data)) .ToArray(), progress); progress.SetMessageFormat(MessageFormat.SavingWordEmbeddings_Embeddings); SystemEmbeddingWriter.WriteAll( Path.Combine(destinationPath, projectInfo.WordEmbeddings), wordEmbeddings, progress); if (meaningEmbeddings != null) { progress.SetMessageFormat(MessageFormat.SavingMeaningEmbeddings_Embeddings); SystemEmbeddingWriter.WriteAll( Path.Combine(destinationPath, projectInfo.MeaningEmbeddings), meaningEmbeddings, progress); } progress.SetMessageFormat(MessageFormat.SavingDataAnalysis_Words); SystemDataAnalysisWriter.WriteAll( Path.Combine(destinationPath, projectInfo.DataAnalysis), dataAnalysis, progress); SystemJsonWriter.Write( Path.Combine(destinationPath, projectInfo.DictionaryStatistics), dictionaryStatistics); SystemJsonWriter.Write( Path.Combine(destinationPath, projectInfo.DataStatistics), dataStatistics); SystemJsonWriter.Write( Path.Combine(destinationPath, projectInfo.WordEmbeddingsStatistics), wordEmbeddingStatistics); SystemJsonWriter.Write( Path.Combine(destinationPath, projectInfo.MeaningEmbeddingsStatistics), meaningEmbeddingStatistics); SystemJsonWriter.Write( Path.Combine(destinationPath, projectInfo.ProjectName + FileExtension.WsdProj), projectInfo); return(new WsdProject( projectInfo, dictionary, trainData, testData, wordEmbeddings, meaningEmbeddings, dataAnalysis, dictionaryStatistics, dataStatistics, wordEmbeddingStatistics, meaningEmbeddingStatistics)); }