コード例 #1
0
ファイル: LexicalSimplifier.cs プロジェクト: gglavas/light-ls
        /// <summary>
        /// Method for collecting candidates for replacing target words (at the sentence level). Candidates are retrieved as most similar words obtained via word embeddings (cosine similarity between embedding vectors)
        /// </summary>
        /// <param name="sentence">Sentence to be simplified</param>
        /// <param name="substCandidates">List of all substitution candidates</param>
        /// <param name="icTreshold">The information content treshold</param>
        /// <param name="word">Target word to be replaced (if not provided, all content words are considered for replacing)</param>
        public void CollectCandidates(SentenceAnnotation sentence, List <Tuple <TokenAnnotation, List <Tuple <string, double> > > > substCandidates, double icTreshold, string word = null)
        {
            List <string> sides = new List <string> {
                "north", "west", "south", "east"
            };
            var contentTokens = sentence.Tokens.Where(t => t.IsContent() && !t.POSTag.StartsWith("C") && string.IsNullOrEmpty(t.NamedEntity) && !sides.Any(si => t.Text.ToLower().Contains(si))).ToList();

            contentTokens.ForEach(ct => {
                var ic = InformationContent.GetRelativeInformationContent(string.IsNullOrEmpty(ct.Lemma) ? ct.Text.ToLower() : ct.Lemma.ToLower());
                if (ic > icTreshold && (!char.IsUpper(ct.Text[0]) || sentence.Tokens.IndexOf(ct) == 0))
                {
                    if (word == null || ct.Text.ToLower() == word.ToLower())
                    {
                        var candidates = new List <Tuple <string, double> >();
                        var cands1     = VectorSpace.GetMostSimilar(ct.Text.ToLower(), 30);
                        if (cands1 != null)
                        {
                            candidates.AddRange(cands1);
                        }
                        if (!string.IsNullOrEmpty(ct.Lemma))
                        {
                            var cands2 = VectorSpace.GetMostSimilar(ct.Lemma.ToLower(), 30);
                            if (cands2 != null)
                            {
                                candidates.AddRange(cands2);
                            }
                        }
                        candidates = candidates.OrderByDescending(x => x.Item2).ToList();
                        substCandidates.Add(new Tuple <TokenAnnotation, List <Tuple <string, double> > >(ct, candidates));
                    }
                }
            });
        }
コード例 #2
0
ファイル: LexicalSimplifier.cs プロジェクト: gglavas/light-ls
 /// <summary>
 /// Method for collecting candidates for replacing target words (at the sentence level). Candidates are retrieved as most similar words obtained via word embeddings (cosine similarity between embedding vectors)
 /// </summary>
 /// <param name="sentence">Sentence to be simplified</param>
 /// <param name="substCandidates">List of all substitution candidates</param>
 /// <param name="icTreshold">The information content treshold</param>
 /// <param name="word">Target word to be replaced (if not provided, all content words are considered for replacing)</param>
 public void CollectCandidates(SentenceAnnotation sentence, List<Tuple<TokenAnnotation, List<Tuple<string, double>>>> substCandidates, double icTreshold, string word = null)
 {
     List<string> sides = new List<string> { "north", "west", "south", "east" };
     var contentTokens = sentence.Tokens.Where(t => t.IsContent() && !t.POSTag.StartsWith("C") && string.IsNullOrEmpty(t.NamedEntity) &&  !sides.Any(si => t.Text.ToLower().Contains(si))).ToList();
     contentTokens.ForEach(ct => {
         var ic = InformationContent.GetRelativeInformationContent(string.IsNullOrEmpty(ct.Lemma) ? ct.Text.ToLower() : ct.Lemma.ToLower());
         if (ic > icTreshold && (!char.IsUpper(ct.Text[0]) || sentence.Tokens.IndexOf(ct) == 0))
         {
             if (word == null || ct.Text.ToLower() == word.ToLower())
             {
                 var candidates = new List<Tuple<string, double>>();
                 var cands1 = VectorSpace.GetMostSimilar(ct.Text.ToLower(), 30);
                 if (cands1 != null) candidates.AddRange(cands1);
                 if (!string.IsNullOrEmpty(ct.Lemma))
                 {
                     var cands2 = VectorSpace.GetMostSimilar(ct.Lemma.ToLower(), 30);
                     if (cands2 != null) candidates.AddRange(cands2);
                 }
                 candidates = candidates.OrderByDescending(x => x.Item2).ToList();
                 substCandidates.Add(new Tuple<TokenAnnotation, List<Tuple<string, double>>>(ct, candidates));
             }
         }
     });
 }