public static RasaTrainingData GrabCorpus(this Agent agent, Database dc) { var trainingData = new RasaTrainingData { Entities = new List <RasaTraningEntity>(), UserSays = new List <RasaIntentExpression>() }; var expressParts = new List <IntentExpressionPart>(); var intents = dc.Table <Intent>() .Include(x => x.Contexts) .Include(x => x.UserSays).ThenInclude(say => say.Data) .Where(x => x.UserSays.Count > 0) .ToList(); intents.ForEach(intent => { intent.UserSays.ForEach(exp => { var say = new RasaIntentExpression { Intent = intent.Name, Text = String.Join("", exp.Data.OrderBy(x => x.UpdatedTime).Select(x => x.Text)), }; // convert entity format exp.Data.Where(x => !String.IsNullOrEmpty(x.Meta)) .ToList() .ForEach(x => { int start = say.Text.IndexOf(x.Text); var part = new RasaIntentExpressionPart { Value = x.Text, Entity = x.Alias, Start = start, End = start + x.Text.Length }; if (say.Entities == null) { say.Entities = new List <RasaIntentExpressionPart>(); } say.Entities.Add(part); // assemble entity synonmus if (!trainingData.Entities.Any(y => y.EntityType == x.Alias && y.EntityValue == x.Text)) { var allSynonyms = (from e in dc.Table <Entity>() join ee in dc.Table <EntityEntry>() on e.Id equals ee.EntityId join ees in dc.Table <EntityEntrySynonym>() on ee.Id equals ees.EntityEntryId where e.Name == x.Alias && ee.Value == x.Text & ees.Synonym != x.Text select ees.Synonym).ToList(); var te = new RasaTraningEntity { EntityType = x.Alias, EntityValue = x.Text, Synonyms = allSynonyms }; trainingData.Entities.Add(te); } }); trainingData.UserSays.Add(say); }); }); return(trainingData); }
public void Train() { var trainingData = new RasaTrainingData { Entities = new List <RasaTraningEntity>(), UserSays = new List <RasaIntentExpression>() }; var corpus = GetIntentExpressions(); var config = (IConfiguration)AppDomain.CurrentDomain.GetData("Configuration"); var client = new RestClient($"{config.GetSection("Rasa:Nlu").Value}"); var contextHashs = corpus.UserSays .Select(x => x.ContextHash) .Distinct() .ToList(); contextHashs.ForEach(ctx => { var common_examples = corpus.UserSays.Where(x => x.ContextHash == ctx || x.ContextHash == Guid.Empty.ToString("N")).ToList(); // assemble entity and synonyms var usedEntities = new List <String>(); common_examples.ForEach(x => { if (x.Entities != null) { usedEntities.AddRange(x.Entities.Select(y => y.Entity)); } }); usedEntities = usedEntities.Distinct().ToList(); var entity_synonyms = corpus.Entities.Where(x => usedEntities.Contains(x.EntityType)).ToList(); var data = new RasaTrainingData { Entities = entity_synonyms.Select(x => x.ToObject <RasaTraningEntity>()).ToList(), UserSays = common_examples.Select(x => x.ToObject <RasaIntentExpression>()).ToList() }; // meet minimal requirement // at least 2 different classes int count = data.UserSays .Select(x => x.Intent) .Distinct().Count(); if (count < 2) { data.UserSays.Add(new RasaIntentExpression { Intent = "Intent2", Text = Guid.NewGuid().ToString("N") }); data.UserSays.Add(new RasaIntentExpression { Intent = "Intent2", Text = Guid.NewGuid().ToString("N") }); } // at least 2 corpus per intent data.UserSays.Select(x => x.Intent) .Distinct() .ToList() .ForEach(intent => { if (data.UserSays.Count(x => x.Intent == intent) < 2) { data.UserSays.Add(new RasaIntentExpression { Intent = intent, Text = Guid.NewGuid().ToString("N") }); } }); // set empty synonym to null data.Entities .Where(x => x.Synonyms != null) .ToList() .ForEach(entity => { if (entity.Synonyms.Count == 0) { entity.Synonyms = null; } }); string json = JsonConvert.SerializeObject(new { rasa_nlu_data = data }, new JsonSerializerSettings { ContractResolver = new CamelCasePropertyNamesContractResolver(), NullValueHandling = NullValueHandling.Ignore, }); var rest = new RestRequest("train", Method.POST); rest.AddQueryParameter("project", agent.Id); rest.AddQueryParameter("model", ctx); string trainingConfig = agent.Language == "zh" ? "config_jieba_mitie_sklearn.yml" : "config_mitie_sklearn.yml"; var contentRootPatch = AppDomain.CurrentDomain.GetData("ContentRootPath").ToString(); string body = File.ReadAllText(Path.Join(contentRootPatch, "Settings", trainingConfig)); body = $"{body}\r\ndata: {json}"; rest.AddParameter("application/x-yml", body, ParameterType.RequestBody); var response = client.Execute(rest); if (response.IsSuccessful) { var result = JObject.Parse(response.Content); string modelName = result["info"].Value <String>().Split(": ")[1]; } else { var result = JObject.Parse(response.Content); Console.WriteLine(result["error"]); result["error"].Log(); } }); }