private void AddComparableEdges(SentenceWrapper sentence, SentenceGraph sentenceGraph, bool isLeft, bool isRight) { string to, from; var vertices = sentenceGraph.Vertices.ToList(); foreach (var word in sentence.Words) { from = word.GetAttributeByName(CurrentDefinition.Edge.SourceVertexAttributeName); if (from == "0") { continue; } to = word.GetAttributeByName(CurrentDefinition.Edge.TargetVertexAttributeName); var toWordVertex = vertices.FirstOrDefault( v => v.WordWrapper.GetAttributeByName(CurrentDefinition.Edge.TargetVertexAttributeName).Equals(to)); var fromWordVertex = vertices.FirstOrDefault( v => v.WordWrapper.GetAttributeByName(CurrentDefinition.Edge.TargetVertexAttributeName).Equals(from)); if ((toWordVertex != null) && (fromWordVertex != null)) { sentenceGraph.AddEdge( new OrderedWordEdge(fromWordVertex, toWordVertex) { Text = word.GetAttributeByName(CurrentDefinition.Edge.LabelAttributeName), SourceConnectionPointId = 1, TargetConnectionPointId = 1, IsLeft = isLeft, IsRight = isRight }); } } }
public static List <SentenceInterpretation> IdentifyEntities(fastText fastTextModel, string text, int numberOfPossibilities) { var paths = new List <SentenceInterpretation>(); var words = text.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList(); //IMPROVE TOKENIZATION HERE if (words.Count == 0) { return(paths); } var tokens = new List <List <Token> >(); foreach (var w in words) { var pred = fastTextModel.GetPrediction(w, 5); if (pred.Count == 0) { pred.Add(new prediction() { label = "S_O", intensity = 1 }); } tokens.Add(pred.Select(p => new Token(w, (p.label.Contains("_") ? p.label : "S_" + p.label), p.intensity)).ToList()); } var graph = new SentenceGraph(); tokens.ForEach(t => graph.AddVertexRange(t)); //tokens.ForEach(p => Console.WriteLine("\t" + string.Join(" ", p.Select(t => t.Label + "[" + t.Intensity.ToString("0.0") + "]")))); for (int i = 0; i < (words.Count - 1); i++) { double maxIntensityC = tokens[i].Max(p => p.Intensity); double maxIntensityN = tokens[i + 1].Max(p => p.Intensity); foreach (var source in tokens[i]) { foreach (var dest in tokens[i + 1]) { string clabel = source.Label; string nlabel = dest.Label; if (!source.Label.Contains("_")) { clabel = SentenceGraph.SingleTag + "_" + source.Label; } if (!dest.Label.Contains("_")) { nlabel = SentenceGraph.SingleTag + "_" + dest.Label; } if (SentenceGraph.IsTransitionAllowed(clabel, nlabel)) { double probability = (source.Intensity / maxIntensityC) * (dest.Intensity / maxIntensityN); if (source.Intensity < 0 || dest.Intensity < 0) { probability = 0; } graph.AddEdge(new SentenceEdge(source, dest, clabel, nlabel, probability)); //Console.WriteLine($"\tFound {source.Value}[{clabel}] -> {dest.Value}[{nlabel}] with probablity {probability} and intensities {source.Intensity} and {dest.Intensity}"); } } } } var BoS = new Token("__BEGIN__", "", 1); var EoS = new Token("__END__", "", 1); graph.AddVertex(BoS); graph.AddVertex(EoS); foreach (var t in tokens.First()) { string tlabel = t.Label; if (!t.Label.Contains("_")) { tlabel = SentenceGraph.SingleTag + "_" + t.Label; } if (SentenceGraph.IsTransitionAllowed("S_O", tlabel)) { graph.AddEdge(new SentenceEdge(BoS, t, "S_O", tlabel, 1.0)); } } foreach (var t in tokens.Last()) { string tlabel = t.Label; if (!t.Label.Contains("_")) { tlabel = SentenceGraph.SingleTag + "_" + t.Label; } if (SentenceGraph.IsTransitionAllowed(tlabel, "S_O")) { graph.AddEdge(new SentenceEdge(t, EoS, tlabel, "S_O", 1.0)); } } paths = graph.GetAllPossibleSentenceInterpretations(BoS, EoS, numberOfPossibilities); //paths.ForEach(p => Console.WriteLine("Probability: " + p.Probability.ToString("0.00") + "\t" + string.Join(" ", p.Sentence.Select(n => string.Join(" ", n.Tokens.Select(t => t.Value + "[" + t.Label + "]" ) ) )))); return(paths); }
private SentenceGraph BuildSentenceGraph(SentenceWrapper sentence, SentenceWrapper rightSentence) { var sentenceGraph = new SentenceGraph(); sentence.Words.Sort( (l, r) => int.Parse(l.GetAttributeByName(CurrentDefinition.Edge.TargetVertexAttributeName)) .CompareTo(int.Parse(r.GetAttributeByName(CurrentDefinition.Edge.TargetVertexAttributeName)))); foreach (var word in sentence.Words) { sentenceGraph.AddVertex(new WordVertex(word, CurrentDefinition.Vertex.LabelAttributeName)); } AddComparableEdges(sentence, sentenceGraph, true, false); AddComparableEdges(rightSentence, sentenceGraph, false, true); return sentenceGraph; }