示例#1
0
        public List <TrainingData> Merge(List <NlpToken> tokens, List <TrainingIntentExpressionPart> entities)
        {
            List <TrainingData> trainingTuple = new List <TrainingData>();
            HashSet <String>    entityWordBag = new HashSet <String>();
            int wordCandidateCount            = 0;

            for (int i = 0; i < tokens.Count; i++)
            {
                TrainingIntentExpressionPart curEntity = null;
                if (entities != null)
                {
                    bool entityFinded = false;
                    entities.ForEach(entity => {
                        if (!entityFinded)
                        {
                            string[] words = entity.Value.Split(" ");
                            for (int j = 0; j < words.Length; j++)
                            {
                                if (tokens[i + j].Text == words[j])
                                {
                                    wordCandidateCount++;
                                    if (j == words.Length - 1)
                                    {
                                        curEntity = entity;
                                    }
                                }
                                else
                                {
                                    wordCandidateCount = 0;
                                    break;
                                }
                            }
                            if (wordCandidateCount != 0)
                            {
                                String entityName = curEntity.Entity.Contains(":")? curEntity.Entity.Substring(curEntity.Entity.IndexOf(":") + 1): curEntity.Entity;
                                foreach (string s in words)
                                {
                                    trainingTuple.Add(new TrainingData(entityName, s, tokens[i].Pos, "I"));
                                }
                                entityFinded = true;
                            }
                        }
                    });
                }
                if (wordCandidateCount == 0)
                {
                    trainingTuple.Add(new TrainingData("O", tokens[i].Text, tokens[i].Pos, "O"));
                }
                else
                {
                    i = i + wordCandidateCount - 1;
                }
            }

            return(trainingTuple);
        }
示例#2
0
        private List <TrainingData> Merge(NlpDoc doc, List <Token> tokens, List <TrainingIntentExpressionPart> entities)
        {
            List <TrainingData> trainingTuple = new List <TrainingData>();
            HashSet <String>    entityWordBag = new HashSet <String>();
            int wordCandidateCount            = 0;

            for (int i = 0; i < tokens.Count; i++)
            {
                TrainingIntentExpressionPart curEntity = null;
                if (entities == null)
                {
                    continue;
                }

                bool entityFinded = false;
                for (int entityIndex = 0; entityIndex < entities.Count; entityIndex++)
                {
                    var entity = entities[entityIndex];

                    if (!entityFinded)
                    {
                        var vDoc = new NlpDoc {
                            Sentences = new List <NlpDocSentence> {
                                new NlpDocSentence {
                                    Text = entity.Value
                                }
                            }
                        };
                        doc.Tokenizer.Predict(null, vDoc, null);
                        string[] words = vDoc.Sentences[0].Tokens.Select(x => x.Text).ToArray();

                        for (int j = 0; j < words.Length; j++)
                        {
                            if (tokens[i + j].Text == words[j])
                            {
                                wordCandidateCount++;
                                if (j == words.Length - 1)
                                {
                                    curEntity = entity;
                                }
                            }
                            else
                            {
                                wordCandidateCount = 0;
                                break;
                            }
                        }
                        if (wordCandidateCount != 0) // && entity.Start == tokens[i].Offset)
                        {
                            String entityName = curEntity.Entity.Contains(":") ? curEntity.Entity.Substring(curEntity.Entity.IndexOf(":") + 1) : curEntity.Entity;

                            for (int wordIndex = 0; wordIndex < words.Length; wordIndex++)
                            {
                                var tag = entityName;

                                if (wordIndex == 0)
                                {
                                    if (words.Length == 1)
                                    {
                                        tag = "S_" + entityName;
                                    }
                                    else
                                    {
                                        tag = "B_" + entityName;
                                    }
                                }
                                else if (wordIndex == words.Length - 1)
                                {
                                    tag = "E_" + entityName;
                                }
                                else
                                {
                                    tag = "M_" + entityName;
                                }

                                var word = words[wordIndex];
                                trainingTuple.Add(new TrainingData(tag, word, tokens[i].Pos));
                            }

                            entityFinded = true;
                        }
                    }
                }

                if (wordCandidateCount == 0)
                {
                    trainingTuple.Add(new TrainingData("S", tokens[i].Text, tokens[i].Pos));
                }
                else
                {
                    i = i + wordCandidateCount - 1;
                }
            }

            return(trainingTuple);
        }
示例#3
0
        public List <TrainingData> Merge(NlpDoc doc, List <Token> tokens, List <TrainingIntentExpressionPart> entities)
        {
            List <TrainingData> trainingTuple = new List <TrainingData>();
            HashSet <String>    entityWordBag = new HashSet <String>();
            int wordCandidateCount            = 0;

            for (int i = 0; i < tokens.Count; i++)
            {
                TrainingIntentExpressionPart curEntity = null;
                if (entities != null)
                {
                    bool entityFinded = false;
                    entities.ForEach(entity => {
                        if (!entityFinded)
                        {
                            var vDoc = new NlpDoc {
                                Sentences = new List <NlpDocSentence> {
                                    new NlpDocSentence {
                                        Text = entity.Value
                                    }
                                }
                            };
                            doc.Tokenizer.Predict(null, vDoc, null);
                            string[] words = vDoc.Sentences[0].Tokens.Select(x => x.Text).ToArray();

                            for (int j = 0; j < words.Length; j++)
                            {
                                if (tokens[i + j].Text == words[j])
                                {
                                    wordCandidateCount++;
                                    if (j == words.Length - 1)
                                    {
                                        curEntity = entity;
                                    }
                                }
                                else
                                {
                                    wordCandidateCount = 0;
                                    break;
                                }
                            }
                            if (wordCandidateCount != 0) // && entity.Start == tokens[i].Offset)
                            {
                                String entityName = curEntity.Entity.Contains(":")? curEntity.Entity.Substring(curEntity.Entity.IndexOf(":") + 1): curEntity.Entity;
                                foreach (string s in words)
                                {
                                    trainingTuple.Add(new TrainingData(entityName, s, tokens[i].Pos, "I"));
                                }
                                entityFinded = true;
                            }
                        }
                    });
                }
                if (wordCandidateCount == 0)
                {
                    trainingTuple.Add(new TrainingData("O", tokens[i].Text, tokens[i].Pos, "O"));
                }
                else
                {
                    i = i + wordCandidateCount - 1;
                }
            }

            return(trainingTuple);
        }