public IModelSettings <T> Parse(INlpRequest <T> Request, string Id = null) { if (Id != null) { return(_modelrepo.GetModelSettingsByModelId(Id)); } if (Request == null) { throw new NlpException(HttpStatusCode.BadRequest, $"request cannot be empty. please fix your JSON payload"); } else if (Request.Content == null) { throw new NlpException(HttpStatusCode.BadRequest, $"'content' key is required. please include it in your JSON payload"); } else if (Request.Content.Length > 100000) { throw new NlpException(HttpStatusCode.BadRequest, $"'content' length={Request?.Content?.Length} is too big. it must be less than 100,000 characters"); } if (Request.Model != null) { return(new ModelSettings <T>() { Id = Guid.NewGuid().ToString(), StopWords = Request.StopWords, Delimiters = Request.Delimiters.Select(char.Parse).ToArray(), Model = _cache.GetOrCreate(Request.Model.PublicKey, e => { e.SlidingExpiration = TimeSpan.FromSeconds(_modelrepo._models.TenMinutesCacheTimeSpan); return Request.Model; }) }); } else if (Request.ModelId != null && Request.ModelName != null && Request.ModelDetails != null) { return(new ModelSettings <T>() { Id = Guid.NewGuid().ToString(), StopWords = Request.StopWords, Delimiters = Request.Delimiters.Select(char.Parse).ToArray(), Model = _cache.GetOrCreate(Request.ModelId, e => { e.SlidingExpiration = TimeSpan.FromSeconds(_modelrepo._models.TenMinutesCacheTimeSpan); return new T() { Id = Request.ModelId, Name = Request.ModelName, Details = Request.ModelDetails }; }) }); } else { throw new NlpException(HttpStatusCode.BadRequest, $"not enough information given to parse the JSON payload"); } }
public INlpResponse Categorize(INlpRequest <T> Request, string Id = null) { var modelSettings = Parse(Request, Id); var content = Request.Summarize ? _txtrepo.Summarize(Request.Content) : Request.Content; var models = new Stack <T>(new List <T>() { modelSettings.Model }); var delimiters = modelSettings.Delimiters .Union(_modelrepo._models.DefaultDelimiters) .ToArray(); var sw = new Stopwatch(); sw.Start(); while (models.Any()) { var model = models.Pop() as IModel <T>; var detailsArray = model.Details ?.Split(delimiters); if (detailsArray != null) { Tokenize(content, delimiters, modelSettings.StopWords) .ToList() .ForEach(x => { if (detailsArray.Count() >= 3) { BinarySearchDetails(x, detailsArray, model.Name); } else { SearchDetails(x, detailsArray, model.Name); } }); detailsArray.Where(x => x.Contains(' ')) .ToList() .ForEach(x => { if (content.Contains(x)) { _categories.AddCategory(model.Name, x); } }); } if (model.Children.Any()) { model.Children .ToList() .ForEach(x => { models.Push(x); }); } } sw.Stop(); _logger.LogInformation($"categorization algorithm took {sw.Elapsed.TotalMilliseconds * 1000} µs (microseconds)"); return(new NlpResponse(_categories, Request.Content.Length) { Summarized = Request.Summarize, SummarizedLength = Request.Summarize ? content.Length : null as int? }); }
public T AddModel(INlpRequest <T> Request) { _cache.Set(Request.Model.PublicKey, Request.Model, DateTimeOffset.Now.AddSeconds(_models.DefaultCacheTimeSpan)); return(Request.Model); }