예제 #1
0
        // Add WordNet search paths to this as the 'object' parameter?
        /// <summary>
        /// Oxymoron: A terse paradox; the yoking of two contradictory terms.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        public static void FindOxymoron(Analyzer a, int? windowSize, object greedy)
        {
            int ws = windowSize ?? 1; // Not used. The window size is one sentence.
              bool greedySearch = (bool?)greedy ?? false;

              GetDependencyIndexDelegate GetDependencyIndex = delegate(TreeGraphNode t)
              {
            return Convert.ToInt32(Regex.Match(t.toString(), "^.*?-(\\d+)\\'*$").Result("$1")) - 1;
              };

              Action<Miscellaneous.TreeNode<Analyzer.WordNetRelation>, object> WordNetRelationVisitor =
            (Miscellaneous.TreeNode<Analyzer.WordNetRelation> n, object o) =>
              {
            if (n.IsRoot())
              return;

            var oxymoronData = (OxymoronData)o;

            if (oxymoronData.Overlap.Value != 0)
              return;

            var w1 = oxymoronData.W1;
            var derivedFormsW2 = oxymoronData.GetDerivedFormsW2();

            bool checkedAntonyms = false;
            var currentNode = n;
            while (!currentNode.Parent.IsRoot()) {
              currentNode = currentNode.Parent;
              if (currentNode.Value.Relation == WordNetEngine.SynSetRelation.Antonym) {
            checkedAntonyms = true;
            break;
              }
            }

            var p = n.Parent;

            var candidates = new List<string> { w1 };
            if (!p.IsRoot())
              candidates = p.Value.Words;

            var relation = n.Value.Relation;

            switch(relation) {
              case WordNetEngine.SynSetRelation.SimilarTo:
            n.Value.Words = Token.FindSynonyms(candidates);
            break;

              case WordNetEngine.SynSetRelation.Antonym:
            n.Value.Words = Token.FindAntonyms(candidates);
            if (!checkedAntonyms)
              checkedAntonyms = true;
            break;

              case WordNetEngine.SynSetRelation.DerivationallyRelated:
            n.Value.Words = Token.FindDerivationalForms(candidates, Analyzer.SimilarityPrefixes, Analyzer.MostCommonSimilaritySuffixes, useAllForms: greedySearch ? true : false);
            if (checkedAntonyms) {
              var negations = new List<string>(Analyzer.NegationPrefixes.Select(x => (string)(x.Clone()) + w1));

              n.Value.Words.AddRange(Token.FindDerivationalForms(negations, null, null, useAllForms: greedySearch ? true : false));
            }
            break;
            }

            if (!checkedAntonyms)
              n.Value.Words.AddRange(candidates);

            n.Value.Words = n.Value.Words.Distinct().ToList(); // Remove duplicates.

            if (oxymoronData.Debug) {
              Console.WriteLine("===================================================");
              Console.WriteLine("Relation: " + relation.ToString());
              //Console.WriteLine("Parent relation: " + p.Value.Relation.ToString());
              Console.WriteLine("Child count: " + n.Children.Count());
              Console.WriteLine("Node candidates:");
              if (n.IsRoot() || n.Value.Words.Count == 0) Console.WriteLine("  None");
              else {
            foreach (var w in n.Value.Words)
              Console.WriteLine("  " + w.ToString());
              }
              if (n.IsLeaf()) Console.WriteLine("LEAF NODE");
              Console.WriteLine("===================================================");
            }

            if (checkedAntonyms)
              oxymoronData.Overlap.Value = n.Value.Words.Intersect(derivedFormsW2).Count();
              };

              Action<Miscellaneous.TreeNode<Analyzer.WordNetRelation>, object> WordNetRelationNullVisitor =
            (Miscellaneous.TreeNode<Analyzer.WordNetRelation> n, object o) =>
              {
            //Console.WriteLine(n.Value.Relation.ToString());
            n.Value.Words = null;
              };

              string dependencySymbols = @"^(amod|advmod|acomp|dobj|nsubj|prep)$";

              var allSubsequences = new List<List<Subsequence>>();

              TreebankLanguagePack tlp = new PennTreebankLanguagePack();
              GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var sentence = a.Document.Sentences[i];
            var subsequenceTokens = new List<SubsequenceToken>();
            foreach (var token in sentence.Tokens)
              subsequenceTokens.Add(new SubsequenceToken(token, sentence));
            var phrases = sentence.Phrases;
            if (phrases.Count > 0) {
              var subsequence = new Subsequence(subsequenceTokens, sentence, phrases[0].Subsequences[0].ContainingSubsequence, i);

              var tree = sentence.Tree;
              GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
              java.util.Collection tdc = gs.typedDependenciesCollapsed();

              var candidates = new List<Subsequence>();
              for (java.util.Iterator j = tdc.iterator(); j.hasNext(); ) {
            var td = (TypedDependency)j.next();
            var relation = td.reln().getShortName();
            if (Regex.IsMatch(relation, dependencySymbols)) {
              var governorIndex = GetDependencyIndex(td.gov());
              var dependentIndex = GetDependencyIndex(td.dep());

              var index = Math.Min(governorIndex, dependentIndex);
              var count = Math.Abs(dependentIndex - governorIndex) + 1;
              var ss = relation == "prep" ? subsequence.GetRange(index, count) : subsequence.Where((n, k) => k == governorIndex | k == dependentIndex).ToList();

              // Remove any leftover punctuation from the candidate subsequences.
              ss.RemoveAll(n => Regex.IsMatch(n.Tag, Analyzer.PunctuationPatterns));

              candidates.Add(new Subsequence(ss, sentence, subsequence.ContainingSubsequence, i));
            }
              }

              // Determine whether the candidate pairs are oxymorons.
              for (int k = 0; k < candidates.Count; ++k) {
            var list = new List<Subsequence>();

            Token[] pair = { candidates[k][0], candidates[k][candidates[k].Count - 1] };

            // Clear (i.e. null) all the word lists in the WordNet search-path tree.
            a.WordNetSearchPath.Traverse(WordNetRelationNullVisitor);

            var overlap = new OxymoronData.IntClass(0);
            a.WordNetSearchPath.Traverse(WordNetRelationVisitor, new OxymoronData(pair, overlap, greedy: greedySearch, debug: false));
            if (overlap.Value == 0) {
              a.WordNetSearchPath.Traverse(WordNetRelationNullVisitor);
              a.WordNetSearchPath.Traverse(WordNetRelationVisitor, new OxymoronData(pair.Reverse().ToArray(), overlap, greedy: greedySearch, debug: false));
            }

            if (overlap.Value != 0) {
              list.Add(candidates[k]);
              allSubsequences.Add(list);
            }
              }
            }
              }

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Oxymoron, multiWindow: true);

              a.Figures.AddRange(figures);
        }
예제 #2
0
        /// <summary>
        /// Symploce: Repetition of a word or phrase at the beginning, and of another at the end, of successive clauses; the combination of Anaphora and Epistrophe.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        /// <param name="minLength"></param>
        public static void FindSymploce(Analyzer a, int? windowSize, object minLength)
        {
            int ws = windowSize ?? 3; // Use default window size of 3.
              int ml = Convert.ToInt32(minLength ?? 2); // V. Gawryjolek, p. 23

              var allSubsequences = new List<List<Subsequence>>();
              var rejections = new List<Subsequence>();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            for (int j = 0; j < ws; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            for (int k = 0; k < a.Document.Sentences[i + j].Clauses.Count; ++k) { // Or 'Phrases', but the clauses may be more apt.
              var startEndSubsequence = new List<Subsequence>();
              var subsequences = a.Document.Sentences[i + j].Clauses[k].SubsequencesNoBoundaryConjunctions; // Added 29 Mar. 2015.
              //var subsequences = a.Document.Sentences[i + j].Clauses[k].SubsequencesNoBoundaryDeterminersEtc;
              //var subsequences = a.Document.Sentences[i + j].Clauses[k].Subsequences;
              if (subsequences.Count > 0)
                startEndSubsequence.Add(subsequences[0]);
              window.AddRange(startEndSubsequence);
            }
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              if (window[j].Count >= ml && window[j].First().IsStart && window[j].Last().IsEnd) // Some (complete?) redundancy here with the 'IsStart' and 'IsEnd' tests.
            list.Add(new Subsequence(window[j], i));
              else
            continue;

              if (list.Count != 0) {
            var comparer = list.Last();
            for (int k = j + 1; k < window.Count; ++k) {
              var current = new Subsequence(window[k], i);
              var shorter = Math.Min(comparer.Count, current.Count);
              for (int l = 1; l < shorter; ++l) {
                var comparerStart = new Subsequence(comparer.GetRange(0, l), comparer.ContainingSentence, comparer.ContainingSubsequence, comparer.WindowId);
                var currentStart = new Subsequence(current.GetRange(0, l), current.ContainingSentence, current.ContainingSubsequence, current.WindowId);
                for (int m = 1; m < shorter; ++m) {
                  var comparerEnd = new Subsequence(comparer.GetRange(comparer.Count - m, m), comparer.ContainingSentence, comparer.ContainingSubsequence, comparer.WindowId);
                  var currentEnd = new Subsequence(current.GetRange(current.Count - m, m), current.ContainingSentence, current.ContainingSubsequence, current.WindowId);
                  if (comparerStart.Equivalent(currentStart) && comparerEnd.Equivalent(currentEnd)) {
                    var figureList = new List<Subsequence>();
                    comparerEnd.InsertRange(0, comparerStart);
                    currentEnd.InsertRange(0, currentStart);
                    figureList.Add(comparerEnd);
                    figureList.Add(currentEnd);

                    if (figureList.Count > 0)
                      allSubsequences.Add(figureList);
                  }
                }
              }
            }
              }
            }
              }

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Symploce, multiWindow: true);

              a.Figures.AddRange(figures);
        }