/// <summary> /// Method for collecting candidates for replacing target words (at the sentence level). Candidates are retrieved as most similar words obtained via word embeddings (cosine similarity between embedding vectors) /// </summary> /// <param name="sentence">Sentence to be simplified</param> /// <param name="substCandidates">List of all substitution candidates</param> /// <param name="icTreshold">The information content treshold</param> /// <param name="word">Target word to be replaced (if not provided, all content words are considered for replacing)</param> public void CollectCandidates(SentenceAnnotation sentence, List <Tuple <TokenAnnotation, List <Tuple <string, double> > > > substCandidates, double icTreshold, string word = null) { List <string> sides = new List <string> { "north", "west", "south", "east" }; var contentTokens = sentence.Tokens.Where(t => t.IsContent() && !t.POSTag.StartsWith("C") && string.IsNullOrEmpty(t.NamedEntity) && !sides.Any(si => t.Text.ToLower().Contains(si))).ToList(); contentTokens.ForEach(ct => { var ic = InformationContent.GetRelativeInformationContent(string.IsNullOrEmpty(ct.Lemma) ? ct.Text.ToLower() : ct.Lemma.ToLower()); if (ic > icTreshold && (!char.IsUpper(ct.Text[0]) || sentence.Tokens.IndexOf(ct) == 0)) { if (word == null || ct.Text.ToLower() == word.ToLower()) { var candidates = new List <Tuple <string, double> >(); var cands1 = VectorSpace.GetMostSimilar(ct.Text.ToLower(), 30); if (cands1 != null) { candidates.AddRange(cands1); } if (!string.IsNullOrEmpty(ct.Lemma)) { var cands2 = VectorSpace.GetMostSimilar(ct.Lemma.ToLower(), 30); if (cands2 != null) { candidates.AddRange(cands2); } } candidates = candidates.OrderByDescending(x => x.Item2).ToList(); substCandidates.Add(new Tuple <TokenAnnotation, List <Tuple <string, double> > >(ct, candidates)); } } }); }
/// <summary> /// Method for collecting candidates for replacing target words (at the sentence level). Candidates are retrieved as most similar words obtained via word embeddings (cosine similarity between embedding vectors) /// </summary> /// <param name="sentence">Sentence to be simplified</param> /// <param name="substCandidates">List of all substitution candidates</param> /// <param name="icTreshold">The information content treshold</param> /// <param name="word">Target word to be replaced (if not provided, all content words are considered for replacing)</param> public void CollectCandidates(SentenceAnnotation sentence, List<Tuple<TokenAnnotation, List<Tuple<string, double>>>> substCandidates, double icTreshold, string word = null) { List<string> sides = new List<string> { "north", "west", "south", "east" }; var contentTokens = sentence.Tokens.Where(t => t.IsContent() && !t.POSTag.StartsWith("C") && string.IsNullOrEmpty(t.NamedEntity) && !sides.Any(si => t.Text.ToLower().Contains(si))).ToList(); contentTokens.ForEach(ct => { var ic = InformationContent.GetRelativeInformationContent(string.IsNullOrEmpty(ct.Lemma) ? ct.Text.ToLower() : ct.Lemma.ToLower()); if (ic > icTreshold && (!char.IsUpper(ct.Text[0]) || sentence.Tokens.IndexOf(ct) == 0)) { if (word == null || ct.Text.ToLower() == word.ToLower()) { var candidates = new List<Tuple<string, double>>(); var cands1 = VectorSpace.GetMostSimilar(ct.Text.ToLower(), 30); if (cands1 != null) candidates.AddRange(cands1); if (!string.IsNullOrEmpty(ct.Lemma)) { var cands2 = VectorSpace.GetMostSimilar(ct.Lemma.ToLower(), 30); if (cands2 != null) candidates.AddRange(cands2); } candidates = candidates.OrderByDescending(x => x.Item2).ToList(); substCandidates.Add(new Tuple<TokenAnnotation, List<Tuple<string, double>>>(ct, candidates)); } } }); }