Ejemplo n.º 1
0
        public static RasaTrainingData GrabCorpus(this Agent agent, Database dc)
        {
            var trainingData = new RasaTrainingData
            {
                Entities = new List <RasaTraningEntity>(),
                UserSays = new List <RasaIntentExpression>()
            };

            var expressParts = new List <IntentExpressionPart>();

            var intents = dc.Table <Intent>()
                          .Include(x => x.Contexts)
                          .Include(x => x.UserSays).ThenInclude(say => say.Data)
                          .Where(x => x.UserSays.Count > 0)
                          .ToList();

            intents.ForEach(intent =>
            {
                intent.UserSays.ForEach(exp =>
                {
                    var say = new RasaIntentExpression
                    {
                        Intent = intent.Name,
                        Text   = String.Join("", exp.Data.OrderBy(x => x.UpdatedTime).Select(x => x.Text)),
                    };

                    // convert entity format
                    exp.Data.Where(x => !String.IsNullOrEmpty(x.Meta))
                    .ToList()
                    .ForEach(x =>
                    {
                        int start = say.Text.IndexOf(x.Text);

                        var part = new RasaIntentExpressionPart
                        {
                            Value  = x.Text,
                            Entity = x.Alias,
                            Start  = start,
                            End    = start + x.Text.Length
                        };

                        if (say.Entities == null)
                        {
                            say.Entities = new List <RasaIntentExpressionPart>();
                        }
                        say.Entities.Add(part);

                        // assemble entity synonmus
                        if (!trainingData.Entities.Any(y => y.EntityType == x.Alias && y.EntityValue == x.Text))
                        {
                            var allSynonyms = (from e in dc.Table <Entity>()
                                               join ee in dc.Table <EntityEntry>() on e.Id equals ee.EntityId
                                               join ees in dc.Table <EntityEntrySynonym>() on ee.Id equals ees.EntityEntryId
                                               where e.Name == x.Alias && ee.Value == x.Text & ees.Synonym != x.Text
                                               select ees.Synonym).ToList();

                            var te = new RasaTraningEntity
                            {
                                EntityType  = x.Alias,
                                EntityValue = x.Text,
                                Synonyms    = allSynonyms
                            };

                            trainingData.Entities.Add(te);
                        }
                    });

                    trainingData.UserSays.Add(say);
                });
            });

            return(trainingData);
        }
Ejemplo n.º 2
0
        public void Train()
        {
            var trainingData = new RasaTrainingData
            {
                Entities = new List <RasaTraningEntity>(),
                UserSays = new List <RasaIntentExpression>()
            };

            var corpus = GetIntentExpressions();
            var config = (IConfiguration)AppDomain.CurrentDomain.GetData("Configuration");
            var client = new RestClient($"{config.GetSection("Rasa:Nlu").Value}");

            var contextHashs = corpus.UserSays
                               .Select(x => x.ContextHash)
                               .Distinct()
                               .ToList();

            contextHashs.ForEach(ctx =>
            {
                var common_examples = corpus.UserSays.Where(x => x.ContextHash == ctx || x.ContextHash == Guid.Empty.ToString("N")).ToList();

                // assemble entity and synonyms
                var usedEntities = new List <String>();
                common_examples.ForEach(x =>
                {
                    if (x.Entities != null)
                    {
                        usedEntities.AddRange(x.Entities.Select(y => y.Entity));
                    }
                });
                usedEntities = usedEntities.Distinct().ToList();

                var entity_synonyms = corpus.Entities.Where(x => usedEntities.Contains(x.EntityType)).ToList();

                var data = new RasaTrainingData
                {
                    Entities = entity_synonyms.Select(x => x.ToObject <RasaTraningEntity>()).ToList(),
                    UserSays = common_examples.Select(x => x.ToObject <RasaIntentExpression>()).ToList()
                };

                // meet minimal requirement
                // at least 2 different classes
                int count = data.UserSays
                            .Select(x => x.Intent)
                            .Distinct().Count();

                if (count < 2)
                {
                    data.UserSays.Add(new RasaIntentExpression
                    {
                        Intent = "Intent2",
                        Text   = Guid.NewGuid().ToString("N")
                    });

                    data.UserSays.Add(new RasaIntentExpression
                    {
                        Intent = "Intent2",
                        Text   = Guid.NewGuid().ToString("N")
                    });
                }

                // at least 2 corpus per intent
                data.UserSays.Select(x => x.Intent)
                .Distinct()
                .ToList()
                .ForEach(intent =>
                {
                    if (data.UserSays.Count(x => x.Intent == intent) < 2)
                    {
                        data.UserSays.Add(new RasaIntentExpression
                        {
                            Intent = intent,
                            Text   = Guid.NewGuid().ToString("N")
                        });
                    }
                });

                // set empty synonym to null
                data.Entities
                .Where(x => x.Synonyms != null)
                .ToList()
                .ForEach(entity =>
                {
                    if (entity.Synonyms.Count == 0)
                    {
                        entity.Synonyms = null;
                    }
                });

                string json = JsonConvert.SerializeObject(new { rasa_nlu_data = data },
                                                          new JsonSerializerSettings
                {
                    ContractResolver  = new CamelCasePropertyNamesContractResolver(),
                    NullValueHandling = NullValueHandling.Ignore,
                });

                var rest = new RestRequest("train", Method.POST);
                rest.AddQueryParameter("project", agent.Id);
                rest.AddQueryParameter("model", ctx);
                string trainingConfig = agent.Language == "zh" ? "config_jieba_mitie_sklearn.yml" : "config_mitie_sklearn.yml";
                var contentRootPatch  = AppDomain.CurrentDomain.GetData("ContentRootPath").ToString();
                string body           = File.ReadAllText(Path.Join(contentRootPatch, "Settings", trainingConfig));
                body = $"{body}\r\ndata: {json}";
                rest.AddParameter("application/x-yml", body, ParameterType.RequestBody);

                var response = client.Execute(rest);

                if (response.IsSuccessful)
                {
                    var result = JObject.Parse(response.Content);

                    string modelName = result["info"].Value <String>().Split(": ")[1];
                }
                else
                {
                    var result = JObject.Parse(response.Content);
                    Console.WriteLine(result["error"]);
                    result["error"].Log();
                }
            });
        }