public void Load() { if (loaded) { throw new InvalidOperationException(); } loaded = true; booster = ReadTextData("BoosterWordList.txt"); negating = ReadTextData("NegatingWordList.txt"); question = ReadTextData("QuestionWords.txt"); stopWords = ReadTextData("StopWords.txt"); stopPos = ReadTextData("StopPos.txt"); var emotions = ReadTextData("EmotionLookupTable.txt"); foreach (var sentiment in extended.GetSentiments()) { if (!emotions.ContainsKey(sentiment.Word)) { emotions[sentiment.Word] = sentiment.Sentiment; } } sentimentData = SentimentDataHolder.PopulateEmotionsData(emotions); ReadRepairRules(); }
public void Merge(ISentimentDataHolder holder) { foreach (var value in holder.Values) { SetValue(value); } }
public async Task NgramSentiment() { ActualWordsHandler.InstanceOpen.Container.Context.DisableFeatureSentiment = true; var words = Path.Combine(TestContext.CurrentContext.TestDirectory, @"Adjustment/words.csv"); ISentimentDataHolder lexicon = SentimentDataHolder.Load(words); var loader = SentimentDataHolder.Load(new[] { "veto it really" }.Select(item => new WordSentimentValueData( item, new SentimentValueData(2)))); lexicon.Merge(loader); var text = "I Veto it really"; var result = await ActualWordsHandler.InstanceOpen.TextSplitter.Process(new ParseRequest(text)).ConfigureAwait(false); var document = result.Construct(ActualWordsHandler.InstanceOpen.WordFactory); ActualWordsHandler.InstanceOpen.Container.Context.Lexicon = lexicon; Text.Data.IParsedReview review = ActualWordsHandler.InstanceOpen.Container.Resolve <Func <Document, IParsedReviewManager> >()(document).Create(); Assert.AreEqual(1, review.CalculateRawRating().StarsRating); ActualWordsHandler.InstanceOpen.Container.Context.NGram = 3; review = ActualWordsHandler.InstanceOpen.Container.Resolve <Func <Document, IParsedReviewManager> >()(document).Create(); Assert.AreEqual(5, review.CalculateRawRating().StarsRating); IRatingAdjustment adjustment = RatingAdjustment.Create(review, null); var resultDocument = new DocumentFromReviewFactory(ActualWordsHandler.InstanceOpen.Container.Resolve <INRCDictionary>()).ReparseDocument(adjustment); Assert.AreEqual(5, resultDocument.Stars); Assert.AreEqual("I Veto it really", resultDocument.Text); }
protected override async Task Process(IObservable <IParsedDocumentHolder> reviews, ISessionContainer container, ISentimentDataHolder sentimentAdjustment) { var client = container.GetTesting(Config.Model); container.Context.Lexicon = sentimentAdjustment; using (Observable.Interval(TimeSpan.FromSeconds(30)) .Subscribe(item => Logger.LogInformation(client.Pipeline.Monitor.ToString()))) { client.TrackArff = false; client.UseBagOfWords = Config.UseBagOfWords; client.Init(); var result = await client.Process(reviews.ObserveOn(TaskPoolScheduler.Default)) .Select( item => { Semaphore.Release(); return(Resolve(item)); }) .ToArray(); SaveDocuments(result); } }
public async Task Process(IConnectionContext target, TrainMessage request, CancellationToken token) { if (target == null) { throw new ArgumentNullException(nameof(target)); } if (request == null) { throw new ArgumentNullException(nameof(request)); } ISentimentDataHolder loader = default; var completed = new CompletedMessage(); try { if (!string.IsNullOrEmpty(request.Domain)) { logger.LogInformation("Using Domain dictionary [{0}]", request.Domain); loader = lexiconLoader.GetLexicon(request.Domain); } var modelLocation = storage.GetLocation(target.Connection.User, request.Name, ServiceConstants.Model); using (var scope = provider.CreateScope()) { var container = scope.ServiceProvider.GetService <ISessionContainer>(); container.Context.NGram = 3; var client = container.GetTraining(modelLocation); var converter = scope.ServiceProvider.GetService <IDocumentConverter>(); client.Pipeline.ResetMonitor(); if (loader != null) { client.Lexicon = loader; } var positive = storage.Load(target.Connection.User, request.Name, true) .Take(2000); var negative = storage.Load(target.Connection.User, request.Name, false) .Take(2000); var documents = positive.Concat(negative) .Select(item => converter.Convert(item, request.CleanText)); await client.Train(documents).ConfigureAwait(false); completed.Message = "Training Completed"; await target.Write(completed, token).ConfigureAwait(false); } } catch (Exception e) { completed.Message = e.Message; await target.Write(completed, token).ConfigureAwait(false); completed.IsError = true; throw; } }
public LexiconRatingAdjustment(IParsedReview review, ISentimentDataHolder sentimentData) : base(review) { if (review is null) { throw new ArgumentNullException(nameof(review)); } this.sentimentData = sentimentData ?? throw new ArgumentNullException(nameof(sentimentData)); }
public async Task Adjusted() { ActualWordsHandler.InstanceOpen.Container.Context.DisableFeatureSentiment = true; var words = Path.Combine(TestContext.CurrentContext.TestDirectory, @"Adjustment/words.csv"); ISentimentDataHolder lexicon = SentimentDataHolder.Load(words); var text = "I Veto it"; var result = await ActualWordsHandler.InstanceOpen.TextSplitter.Process(new ParseRequest(text)).ConfigureAwait(false); var document = result.Construct(ActualWordsHandler.InstanceOpen.WordFactory); ActualWordsHandler.InstanceOpen.Container.Context.Lexicon = lexicon; Text.Data.IParsedReview review = ActualWordsHandler.InstanceOpen.Container.Resolve <Func <Document, IParsedReviewManager> >()(document).Create(); Assert.AreEqual(1, review.CalculateRawRating().StarsRating); }
protected override Task Execute(CancellationToken token) { Logger.LogInformation("Initialize..."); container.Context.DisableFeatureSentiment = Config.InvertOff; Logger.LogInformation("Processing..."); ISentimentDataHolder sentimentAdjustment = default; if (!string.IsNullOrEmpty(Config.Weights)) { Logger.LogInformation("Adjusting Embeddings sentiments using [{0}] ...", Config.Weights); sentimentAdjustment = SentimentDataHolder.Load(Config.Weights); } IObservable <IParsedDocumentHolder> review = GetAllDocuments(); return(Process(review.Select(SynchronizedReviews).Merge(), container, sentimentAdjustment)); }
public async Task SentimentTests(SentimentTestData data) { log.LogInformation("SentimentTests: {0}", data); string file; switch (data.Category) { case ProductCategory.Electronics: file = "Electronics.csv"; break; case ProductCategory.Video: file = "video.csv"; break; case ProductCategory.Kitchen: file = "kitchen.csv"; break; case ProductCategory.Medic: case ProductCategory.Games: case ProductCategory.Toys: case ProductCategory.Book: throw new NotImplementedException(); default: throw new ArgumentOutOfRangeException(); } ISentimentDataHolder holder = SentimentDataHolder.Load(Path.Combine(TestContext.CurrentContext.TestDirectory, "Sentiments", file)); var runner = new TestRunner(TestHelper.Instance, data); Analysis.Processing.ITestingClient testing = runner.Active.GetTesting(); runner.Active.Context.Lexicon = holder; testing.DisableAspects = true; testing.DisableSvm = true; testing.TrackArff = true; testing.Init(); await testing.Process(runner.Load()).LastOrDefaultAsync(); testing.Save(Path.Combine(TestContext.CurrentContext.TestDirectory, "Word2Vec")); Assert.AreEqual(data.Performance, testing.GetPerformanceDescription()); Assert.AreEqual(data.Errors, testing.Errors); }
public WordsHandler(ILexiconConfiguration config, IExtendedWords extended) { this.config = config ?? throw new ArgumentNullException(nameof(config)); this.extended = extended ?? throw new ArgumentNullException(nameof(extended)); sentimentData = new SentimentDataHolder(); }
public async Task Process(IConnectionContext target, SentimentMessage message, CancellationToken token) { if (message == null) { throw new ArgumentNullException(nameof(message)); } var request = message.Request; if (request?.Documents == null) { throw new Exception("Nothing to process"); } if (request.Documents.Length > 500) { throw new Exception("Too many documents. Maximum is 500"); } var completed = new CompletedMessage(); try { var monitor = new PerformanceMonitor(request.Documents.Length); using (Observable.Interval(TimeSpan.FromSeconds(10)) .Subscribe(item => logger.LogInformation(monitor.ToString()))) { ISentimentDataHolder lexicon = default; if (request.Dictionary != null && request.Dictionary.Count > 0) { logger.LogInformation("Creating custom dictionary with {0} words", request.Dictionary.Count); lexicon = SentimentDataHolder.Load(request.Dictionary.Select(item => new WordSentimentValueData( item.Key, new SentimentValueData(item.Value)))); } if ((lexicon == null || request.AdjustDomain) && !string.IsNullOrEmpty(request.Domain)) { logger.LogInformation("Using Domain dictionary [{0}]", request.Domain); var previous = lexicon; lexicon = lexiconLoader.GetLexicon(request.Domain); if (previous != null) { lexicon.Merge(previous); } } string modelLocation = null; if (!string.IsNullOrEmpty(request.Model)) { logger.LogInformation("Using model path: {0}", request.Model); modelLocation = storage.GetLocation(target.Connection.User, request.Model, ServiceConstants.Model); if (!Directory.Exists(modelLocation)) { throw new ApplicationException($"Can't find model {request.Model}"); } } using (var scope = provider.CreateScope()) { var container = scope.ServiceProvider.GetService <ISessionContainer>(); container.Context.NGram = 3; container.Context.ExtractAttributes = request.Emotions; var client = container.GetTesting(modelLocation); var converter = scope.ServiceProvider.GetService <IDocumentConverter>(); client.Init(); client.Pipeline.ResetMonitor(); if (lexicon != null) { client.Lexicon = lexicon; } await client.Process(request.Documents.Select(item => converter.Convert(item, request.CleanText)) .ToObservable()) .Select(item => { monitor.Increment(); return(item); }) .Buffer(TimeSpan.FromSeconds(5), 10, scheduler) .Select(async item => { var result = new ResultMessage <Document> { Data = item.Select(x => x.Processed).ToArray() }; await target.Write(result, token).ConfigureAwait(false); return(Unit.Default); }) .Merge(); } logger.LogInformation("Completed with final performance: {0}", monitor); completed.Message = "Testing Completed"; await target.Write(completed, token).ConfigureAwait(false); } } catch (Exception e) { completed.Message = e.Message; await target.Write(completed, token).ConfigureAwait(false); completed.IsError = true; throw; } }
protected override async Task Process(IObservable <IParsedDocumentHolder> reviews, ISessionContainer container, ISentimentDataHolder sentimentAdjustment) { ITestingClient client; Config.Out.EnsureDirectoryExistence(); using (persistency) { persistency.Start(Config.Out); persistency.Debug = Config.Debug; persistency.ExtractStyle = Config.ExtractStyle; client = container.GetTesting(Config.Model); container.Context.Lexicon = sentimentAdjustment; using (Observable.Interval(TimeSpan.FromSeconds(30)) .Subscribe(item => Logger.LogInformation(client.Pipeline.Monitor.ToString()))) { client.TrackArff = Config.TrackArff; client.UseBagOfWords = Config.UseBagOfWords; client.Init(); await client.Process(reviews.ObserveOn(TaskPoolScheduler.Default)) .Select( item => { persistency.Save(item); Semaphore.Release(); return(item); }) .LastOrDefaultAsync(); } if (!Config.TrackArff) { client.Save(Config.Out); } } Logger.LogInformation($"Testing performance {client.GetPerformanceDescription()}"); }
protected override async Task Process(IObservable <IParsedDocumentHolder> reviews, ISessionContainer container, ISentimentDataHolder sentimentAdjustment) { Logger.LogInformation("Training Operation..."); ITrainingClient client = container.GetTraining(Config.Model); container.Context.Lexicon = sentimentAdjustment; client.OverrideAspects = Config.Features; client.UseBagOfWords = Config.UseBagOfWords; client.UseAll = Config.UseAll; await client.Train(reviews.ObserveOn(TaskPoolScheduler.Default)).ConfigureAwait(false); }
protected abstract Task Process(IObservable <IParsedDocumentHolder> reviews, ISessionContainer container, ISentimentDataHolder sentimentAdjustment);