Пример #1
0
        // Add WordNet search paths to this as the 'object' parameter?
        /// <summary>
        /// Oxymoron: A terse paradox; the yoking of two contradictory terms.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        public static void FindOxymoron(Analyzer a, int? windowSize, object greedy)
        {
            int ws = windowSize ?? 1; // Not used. The window size is one sentence.
              bool greedySearch = (bool?)greedy ?? false;

              GetDependencyIndexDelegate GetDependencyIndex = delegate(TreeGraphNode t)
              {
            return Convert.ToInt32(Regex.Match(t.toString(), "^.*?-(\\d+)\\'*$").Result("$1")) - 1;
              };

              Action<Miscellaneous.TreeNode<Analyzer.WordNetRelation>, object> WordNetRelationVisitor =
            (Miscellaneous.TreeNode<Analyzer.WordNetRelation> n, object o) =>
              {
            if (n.IsRoot())
              return;

            var oxymoronData = (OxymoronData)o;

            if (oxymoronData.Overlap.Value != 0)
              return;

            var w1 = oxymoronData.W1;
            var derivedFormsW2 = oxymoronData.GetDerivedFormsW2();

            bool checkedAntonyms = false;
            var currentNode = n;
            while (!currentNode.Parent.IsRoot()) {
              currentNode = currentNode.Parent;
              if (currentNode.Value.Relation == WordNetEngine.SynSetRelation.Antonym) {
            checkedAntonyms = true;
            break;
              }
            }

            var p = n.Parent;

            var candidates = new List<string> { w1 };
            if (!p.IsRoot())
              candidates = p.Value.Words;

            var relation = n.Value.Relation;

            switch(relation) {
              case WordNetEngine.SynSetRelation.SimilarTo:
            n.Value.Words = Token.FindSynonyms(candidates);
            break;

              case WordNetEngine.SynSetRelation.Antonym:
            n.Value.Words = Token.FindAntonyms(candidates);
            if (!checkedAntonyms)
              checkedAntonyms = true;
            break;

              case WordNetEngine.SynSetRelation.DerivationallyRelated:
            n.Value.Words = Token.FindDerivationalForms(candidates, Analyzer.SimilarityPrefixes, Analyzer.MostCommonSimilaritySuffixes, useAllForms: greedySearch ? true : false);
            if (checkedAntonyms) {
              var negations = new List<string>(Analyzer.NegationPrefixes.Select(x => (string)(x.Clone()) + w1));

              n.Value.Words.AddRange(Token.FindDerivationalForms(negations, null, null, useAllForms: greedySearch ? true : false));
            }
            break;
            }

            if (!checkedAntonyms)
              n.Value.Words.AddRange(candidates);

            n.Value.Words = n.Value.Words.Distinct().ToList(); // Remove duplicates.

            if (oxymoronData.Debug) {
              Console.WriteLine("===================================================");
              Console.WriteLine("Relation: " + relation.ToString());
              //Console.WriteLine("Parent relation: " + p.Value.Relation.ToString());
              Console.WriteLine("Child count: " + n.Children.Count());
              Console.WriteLine("Node candidates:");
              if (n.IsRoot() || n.Value.Words.Count == 0) Console.WriteLine("  None");
              else {
            foreach (var w in n.Value.Words)
              Console.WriteLine("  " + w.ToString());
              }
              if (n.IsLeaf()) Console.WriteLine("LEAF NODE");
              Console.WriteLine("===================================================");
            }

            if (checkedAntonyms)
              oxymoronData.Overlap.Value = n.Value.Words.Intersect(derivedFormsW2).Count();
              };

              Action<Miscellaneous.TreeNode<Analyzer.WordNetRelation>, object> WordNetRelationNullVisitor =
            (Miscellaneous.TreeNode<Analyzer.WordNetRelation> n, object o) =>
              {
            //Console.WriteLine(n.Value.Relation.ToString());
            n.Value.Words = null;
              };

              string dependencySymbols = @"^(amod|advmod|acomp|dobj|nsubj|prep)$";

              var allSubsequences = new List<List<Subsequence>>();

              TreebankLanguagePack tlp = new PennTreebankLanguagePack();
              GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var sentence = a.Document.Sentences[i];
            var subsequenceTokens = new List<SubsequenceToken>();
            foreach (var token in sentence.Tokens)
              subsequenceTokens.Add(new SubsequenceToken(token, sentence));
            var phrases = sentence.Phrases;
            if (phrases.Count > 0) {
              var subsequence = new Subsequence(subsequenceTokens, sentence, phrases[0].Subsequences[0].ContainingSubsequence, i);

              var tree = sentence.Tree;
              GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
              java.util.Collection tdc = gs.typedDependenciesCollapsed();

              var candidates = new List<Subsequence>();
              for (java.util.Iterator j = tdc.iterator(); j.hasNext(); ) {
            var td = (TypedDependency)j.next();
            var relation = td.reln().getShortName();
            if (Regex.IsMatch(relation, dependencySymbols)) {
              var governorIndex = GetDependencyIndex(td.gov());
              var dependentIndex = GetDependencyIndex(td.dep());

              var index = Math.Min(governorIndex, dependentIndex);
              var count = Math.Abs(dependentIndex - governorIndex) + 1;
              var ss = relation == "prep" ? subsequence.GetRange(index, count) : subsequence.Where((n, k) => k == governorIndex | k == dependentIndex).ToList();

              // Remove any leftover punctuation from the candidate subsequences.
              ss.RemoveAll(n => Regex.IsMatch(n.Tag, Analyzer.PunctuationPatterns));

              candidates.Add(new Subsequence(ss, sentence, subsequence.ContainingSubsequence, i));
            }
              }

              // Determine whether the candidate pairs are oxymorons.
              for (int k = 0; k < candidates.Count; ++k) {
            var list = new List<Subsequence>();

            Token[] pair = { candidates[k][0], candidates[k][candidates[k].Count - 1] };

            // Clear (i.e. null) all the word lists in the WordNet search-path tree.
            a.WordNetSearchPath.Traverse(WordNetRelationNullVisitor);

            var overlap = new OxymoronData.IntClass(0);
            a.WordNetSearchPath.Traverse(WordNetRelationVisitor, new OxymoronData(pair, overlap, greedy: greedySearch, debug: false));
            if (overlap.Value == 0) {
              a.WordNetSearchPath.Traverse(WordNetRelationNullVisitor);
              a.WordNetSearchPath.Traverse(WordNetRelationVisitor, new OxymoronData(pair.Reverse().ToArray(), overlap, greedy: greedySearch, debug: false));
            }

            if (overlap.Value != 0) {
              list.Add(candidates[k]);
              allSubsequences.Add(list);
            }
              }
            }
              }

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Oxymoron, multiWindow: true);

              a.Figures.AddRange(figures);
        }
Пример #2
0
        /// <summary>
        /// Polyptoton: Repetition of a word in a different form; having cognate words in close proximity.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        public static void FindPolyptoton(Analyzer a, int? windowSize)
        {
            int ws = windowSize ?? 3; // Use default window size of 3.

              var allSubsequences = new List<List<Subsequence>>();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            for (int j = 0; j < ws; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            var phrases = a.Document.Sentences[i + j].Phrases;
            if (phrases.Count > 0)
              window.AddRange(phrases[0].SubsequencesNoStopWords);
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              if (window[j].Count != 1)
            continue;
              else
            list.Add(new Subsequence(window[j], i));

              if (list.Count != 0) {
            for (int k = j + 1; k < window.Count; ++k) {
              if (window[k].Count != 1 || list.Last().Equivalent(window[k]))
                continue;
              var comparer = list.Last().Last().DerivationalForms;
              var current = window[k].Last().DerivationalForms;
              if (comparer.Intersect(current).Any())
                list.Add(new Subsequence(window[k], i));
            }
              }

              if (list.Count > 1)
            allSubsequences.Add(list);
            }
              }

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Polyptoton, multiWindow: true);

              a.Figures.AddRange(figures);
        }
Пример #3
0
        /// <summary>
        /// Epizeuxis: Repetition of a word or phrase with no others between.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        public static void FindEpizeuxis(Analyzer a, int? windowSize)
        {
            int ws = windowSize ?? 2; // Use default window size of 2.

              var allSubsequences = new List<List<Subsequence>>();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            for (int j = 0; j < ws; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            var phrases = a.Document.Sentences[i + j].Phrases;
            if (phrases.Count > 0)
              window.AddRange(phrases[0].Subsequences);
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              list.Add(new Subsequence(window[j], i));

              if (list.Count != 0) {
            for (int k = j + 1; k < window.Count; ++k) {
              var comparer = list.Last();
              var current = window[k];
              if (comparer.Equivalent(current) && comparer.IsRightContiguous(current))
                list.Add(new Subsequence(current, i));
            }
              }

              if (list.Count > 1)
            allSubsequences.Add(list);
            }
              }

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Epizeuxis, multiWindow: true);

              a.Figures.AddRange(figures);
        }
Пример #4
0
        /// <summary>
        /// Isocolon: Repetition of grammatical structure in nearby phrases or clauses of approximately equal length.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        /// <param name="similarityThresholdObject"></param>
        public static void FindIsocolon(Analyzer a, int? windowSize, object similarityThresholdObject)
        {
            int ws = windowSize ?? 3; // Use default window size of 3.
              int similarityThreshold = Convert.ToInt32(similarityThresholdObject ?? 0); // Was 1, but that's perhaps too greedy.

              int minPhraseLength = 2;

              var allSubsequences = new List<List<Subsequence>>();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            for (int j = 0; j < ws; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            for (int k = 0; k < a.Document.Sentences[i + j].Phrases.Count; ++k) {
              var first = new List<Subsequence>(a.Document.Sentences[i + j].Phrases[k].Subsequences);
              if (first.Count == 0) // Necessary when the parser encounters certain non-standard punctuation -- make sure text is clean to avoid hitting it!
                continue;
              first.RemoveRange(1, first.Count - 1);
              window.AddRange(first);
            }
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              if (window[j].Count < minPhraseLength)
            continue;
              list.Add(new Subsequence(window[j], i));

              if (list.Count != 0) {
            for (int k = j + 1; k < window.Count; ++k) {
              var comparer = list.Last(); // Was 'list.First()'.
              var current = window[k];
              if (current.Count < minPhraseLength)
                continue;
              if (comparer - current <= similarityThreshold) {
                if (!comparer.IsSupersetOf(current) &&
                    comparer[0].TagEquivalent == current[0].TagEquivalent && comparer.Last().TagEquivalent == current.Last().TagEquivalent) // Changed 28 Mar. 2015.
                    //comparer[0].Tag == current[0].Tag && comparer.Last().TagEquivalent == current.Last().TagEquivalent)
                    //comparer[0].Tag == current[0].Tag && comparer.Last().Tag == current.Last().Tag)
                  list.Add(new Subsequence(current, i));
              }
            }
              }

              // Remove any subsequences that are subsets of other subsequences.
              for (int k = 0; k < list.Count - 1; ++k) {
            for (int l = k + 1; l < list.Count; ++l) {
              if (list[k].IsSupersetOf(list[l])) {
                list.RemoveAt(l);
                l -= 1;
                continue;
              }
              else if (list[l].IsSupersetOf(list[k])) {
                list[k] = list[l];
                list.RemoveAt(l);
                k = 0;
                break;
              }
            }
              }

              if (list.Count > 1)
            allSubsequences.Add(list);
            }
              }

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Isocolon, multiWindow: true);

              a.Figures.AddRange(figures);
        }
Пример #5
0
        /// <summary>
        /// Epanalepsis: Repetition at the end of a clause of the word or phrase that began it.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        /// <param name="minLength"></param>
        public static void FindEpanalepsis(Analyzer a, int? windowSize, object minLength)
        {
            int ws = windowSize ?? 3; // Use default window size of 3.
              int ml = Convert.ToInt32(minLength ?? 2); // V. Gawryjolek, p. 23

              var allSubsequences = new List<List<Subsequence>>();

             for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            // No need for the usual search window here; just check every phrase.
            for (int j = 0; j < a.Document.Sentences[i].Clauses.Count; ++j) {
              var window = a.Document.Sentences[i].Clauses[j].SubsequencesNoStartDeterminersEtc;
              //var window = a.Document.Sentences[i].Phrases[j].Subsequences;
              for (int k = 0; k < window.Count; ++k) {
            var list = new List<Subsequence>();
            if (window[k][0].IsStart)
              list.Add(new Subsequence(window[k], j));
            else
              continue;

            if (list.Count != 0) {
              for (int l = k + 1; l < window.Count; ++l) {
                var comparer = list.Last();
                var current = window[l];
                if (comparer.Equivalent(current) && current.Last().IsEnd) {
                  list.Add(new Subsequence(current, j));
                  break;
                }
              }
            }

            if (list.Count > 1)
              allSubsequences.Add(list);
              }
            }
              }

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Epanalepsis, multiWindow: true);

              a.Figures.AddRange(figures);
        }
Пример #6
0
        /// <summary>
        /// Epistrophe: Repetition of the same word or phrase at the end of successive clauses.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        /// <param name="minLength"></param>
        public static void FindEpistrophe(Analyzer a, int? windowSize, object minLength)
        {
            int ws = windowSize ?? 3; // Use default window size of 3.
              int ml = Convert.ToInt32(minLength ?? 2); // V. Gawryjolek, p. 23

              var allSubsequences = new List<List<Subsequence>>();
              var rejections = new List<Subsequence>();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            for (int j = 0; j < ws; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            for (int k = 0; k < a.Document.Sentences[i + j].Clauses.Count; ++k) // Or 'Phrases', but the clauses may be more apt.
              window.AddRange(a.Document.Sentences[i + j].Clauses[k].SubsequencesNoBoundaryDeterminersEtc);
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              if (window[j].Last().IsEnd) {
            if (window[j].Count == 1 && window[j].StopWordsStatus.HasFlag(StopWordsOptions.LastWord)) {
              rejections.Add(new Subsequence(window[j], i));
              continue;
            }
            list.Add(new Subsequence(window[j], i));
              }
              else
            continue;

              if (list.Count != 0) {
            for (int k = j + 1; k < window.Count; ++k) {
              var comparer = list.Last();
              var current = window[k];
              if (comparer.Equivalent(current) && current.Last().IsEnd)
                list.Add(new Subsequence(current, i));
            }
              }

              if (list.Count > 1)
            allSubsequences.Add(list);
            }
              }

              // Check for false epistrophes and remove them. V. Gawryjolek, p. 59, where a natural epistrophe is incorrectly identified as anaphora.
              rejections = rejections.Distinct().ToList();

              for (int i = 0; i < allSubsequences.Count; ++i) {
            for (int j = 0; j < allSubsequences[i].Count; ++j) {
              var falseEpistrophe = false;
              for (int k = 0; k < rejections.Count; ++k) {
            if (allSubsequences[i][j] == rejections[k])
              falseEpistrophe = true;
              }
              if (falseEpistrophe) {
            allSubsequences.RemoveAt(i);
            i -= 1;
            break;
              }
            }
              }

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Epistrophe, multiWindow: true);

              a.Figures.AddRange(figures);
        }
Пример #7
0
        /// <summary>
        /// Chiasmus: Repetition of grammatical structures in reverse order.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        /// <param name="minLength"></param>
        public static void FindChiasmus(Analyzer a, int? windowSize, object minLength)
        {
            int ws = windowSize ?? 3; // Use default window size of 3.
              int ml = Convert.ToInt32(minLength ?? 3);

              var allSubsequences = new List<List<Subsequence>>();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            var reverseWindow = new List<Subsequence>();
            for (int j = 0; j < ws; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            var pptp = a.Document.Sentences[i + j].GetPrePreTerminalPhrases();
            //pptp.AddRange(a.Document.Sentences[i + j].Phrases); pptp = pptp.Distinct().ToList(); // Test code.
            var pptpSubsequences = new List<Subsequence>();
            for (int k = 0; k < pptp.Count; ++k)
              pptpSubsequences.Add(pptp[k].Subsequences[0]);
            var pptpContiguousSubsequences = pptpSubsequences.ContiguousSubsequences().ToList();

            foreach (var s in pptpContiguousSubsequences) {
              if (s.Count > 1) { // Because reversal on a single element returns the same element.
                window.Add(new Subsequence(s.SelectMany(x => x), a.Document.Sentences[i + j], s[0], s[0].WindowId));
                reverseWindow.Add(new Subsequence(s.Reverse().SelectMany(x => x), a.Document.Sentences[i + j], s[0], s[0].WindowId));
              }
            }
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              if (window[j].Count < ml)
            continue;
              list.Add(new Subsequence(window[j], i));

              if (list.Count != 0) {
            for (int k = j + 1; k < window.Count; ++k) {
              var comparer = list.Last();
              var current = reverseWindow[k];
              if (current.Count < ml
                || comparer.First().TagEquivalent == "CC" || comparer.Last().TagEquivalent == "CC")
                continue;
              if (comparer.EqualsInTagEquivalent(current) && !comparer.Intersect(current).Any()) {
                list.Add(new Subsequence(current, i));
                break;
              }
            }
              }

              if (list.Count > 1)
            allSubsequences.Add(list);
            }
              }

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Chiasmus, multiWindow: true);

              a.Figures.AddRange(figures);
        }
Пример #8
0
        /// <summary>
        /// Conduplicatio: The repetition of a word or phrase.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        /// <param name="minLength"></param>
        public static void FindConduplicatio(Analyzer a, int? windowSize, object minLength)
        {
            int ws = windowSize ?? 2; // Use default window size of 2.
              int ml = Convert.ToInt32(minLength ?? 2); // With 'minlength' ≥ 2, this figure might be closer to "epimone."

              var allSubsequences = new List<List<Subsequence>>();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            for (int j = 0; j < ws; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            var phrases = a.Document.Sentences[i + j].Phrases;
            if (phrases.Count > 0)
              window.AddRange(phrases[0].Subsequences);
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              //if (window[j].Count < ml)
              if (window[j].Count < ml || window[j].All(t => t.IsStopWord())) // Reject if subsequence contains all stop words.
            continue;
              else
            list.Add(new Subsequence(window[j], i));

              if (list.Count != 0) {
            for (int k = j + 1; k < window.Count; ++k) {
              var comparer = list.Last();
              var current = window[k];
              if (comparer.Equivalent(current) && !comparer.IsRightContiguous(current))
                list.Add(new Subsequence(current, i));
            }
              }

              if (list.Count > 1)
            allSubsequences.Add(list);
            }
              }

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Conduplicatio, multiWindow: true);

              a.Figures.AddRange(figures);
        }
Пример #9
0
        /// <summary>
        /// Antimetabole: Repetition of words in reverse grammatical order.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        /// <param name="minLength"></param>
        public static void FindAntimetabole(Analyzer a, int? windowSize, object minLength)
        {
            int ws = windowSize ?? 1; // Use default window size of 1.
              int ml = Convert.ToInt32(minLength ?? 2);

              var allSubsequences = new List<List<Subsequence>>();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            for (int j = 0; j < ws; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            var phrases = a.Document.Sentences[i + j].Phrases;
            if (phrases.Count > 0)
              window.AddRange(phrases[0].SubsequencesKeepNounsVerbsAdjectivesAdverbsTag);
              //window.AddRange(phrases[0].SubsequencesKeepNounsVerbsAdjectivesAdverbsPronounsTagEquivalent);
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              list.Add(new Subsequence(window[j], i));

              if (list.Count != 0) {
            for (int k = j + 1; k < window.Count; ++k) {
              var comparer = list.Last();
              var current = window[k];
              if (comparer.Equivalent(current))
                list.Add(new Subsequence(current, i));
            }
              }

              if (list.Count == 2)
            allSubsequences.Add(list);
            }
              }

              var repetitions = MergeFigures(allSubsequences, RhetoricalFigures.Antimetabole, multiWindow: true, demarcation: null);

              var figures = new List<RhetoricalFigure>();

              for (int i = 0; i < repetitions.Count - 1; ++i) {
            var al = repetitions[i].Tokens.Split();
            for (int j = i + 1; j < repetitions.Count; ++j) {
              if (repetitions[i].WindowId != repetitions[j].WindowId)
            continue;
              var bl = repetitions[j].Tokens.Split();

              if ((al[0].Last().Right <= bl[0].First().Left || al[0].Last().SentenceId < bl[0].First().SentenceId) &&
              (al[1].First().Left >= bl[1].Last().Right || al[1].First().SentenceId > bl[1].Last().SentenceId)) {
            var subsequence = new Subsequence();
            subsequence.AddRange(al[0]);
            bl[0].Add(new SubsequenceToken(new Token(FigureComponentsSeparator, "", 0)));
            subsequence.AddRange(bl[0]);
            subsequence.AddRange(bl[1]);
            al[1].Add(new SubsequenceToken(new Token(FigureComponentsSeparator, "", 0)));
            subsequence.AddRange(al[1]);

            figures.Add(new RhetoricalFigure(subsequence, RhetoricalFigures.Antimetabole, repetitions[i].WindowId));
              }
            }
              }

              a.Figures.AddRange(figures);
        }
Пример #10
0
        /// <summary>
        /// Anadiplosis: Repetition of the ending word or phrase from the previous clause at the beginning of the next.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        /// <param name="minLength"></param>
        public static void FindAnadiplosis(Analyzer a, int? windowSize, object minLength)
        {
            int ws = windowSize ?? 2; // Use default window size of 2.
              int ml = Convert.ToInt32(minLength ?? 2); // V. Gawryjolek, p. 23

              var allSubsequences = new List<List<Subsequence>>();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            for (int j = 0; j < ws; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            var containingSubsequence = new Subsequence();
            if (a.Document.Sentences[i + j].Clauses.Count > 0) {
              var containingSubsequences = a.Document.Sentences[i + j].Clauses[0].SubsequencesNoDeterminersEtc;
              if (containingSubsequences.Count > 0)
                containingSubsequence = a.Document.Sentences[i + j].Clauses[0].SubsequencesNoDeterminersEtc[0]; // No determiners etc. needed here.
              else
                containingSubsequence = a.Document.Sentences[i + j].Clauses[0].Subsequences[0];
            }
            for (int k = 0; k < a.Document.Sentences[i + j].Clauses.Count; ++k) {
              var subsequences = a.Document.Sentences[i + j].Clauses[k].SubsequencesNoStartDeterminersEtc;
              for (int l = 0; l < subsequences.Count; ++l)
                subsequences[l].ContainingSubsequence = containingSubsequence; // To check for contiguity.
              window.AddRange(subsequences);
            }
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              if (window[j].Last().IsEnd)
            list.Add(new Subsequence(window[j], i));
              else
            continue;

              if (list.Count != 0) {
            for (int k = j + 1; k < window.Count; ++k) {
              var comparer = list.Last();
              var current = window[k];
              if (comparer.Equivalent(current) && current.First().IsStart) {
                if (comparer.IsRightContiguous(current)) {
                  list.Add(new Subsequence(current, i));
                  break;
                }
              }
            }
              }

              if (list.Count > 1)
            allSubsequences.Add(list);
            }

            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              if (window[j].First().IsStart)
            list.Add(new Subsequence(window[j], i));
              else
            continue;

              if (list.Count != 0) {
            for (int k = j + 1; k < window.Count; ++k) {
              var comparer = list.Last();
              var current = window[k];
              if (comparer.Equivalent(current) && current.Last().IsEnd) {
                if (comparer.IsLeftContiguous(current)) {
                  list.Add(new Subsequence(current, i));
                  break;
                }
              }
            }
              }

              if (list.Count > 1) {
            list = list.OrderBy(s => s.SentenceId).ThenBy(s => s[0].Left).ToList();
            allSubsequences.Add(list);
              }
            }
              }

              // Some figures may be out of order WRT the start of the text; reorder them here.
              //allSubsequences = allSubsequences.OrderBy(s => s[0].SentenceId).ThenBy(s => s[0][0].Left).ToList();

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Anadiplosis, multiWindow: true);

              a.Figures.AddRange(figures);
        }
Пример #11
0
        /// <summary>
        /// Symploce: Repetition of a word or phrase at the beginning, and of another at the end, of successive clauses; the combination of Anaphora and Epistrophe.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        /// <param name="minLength"></param>
        public static void FindSymploce(Analyzer a, int? windowSize, object minLength)
        {
            int ws = windowSize ?? 3; // Use default window size of 3.
              int ml = Convert.ToInt32(minLength ?? 2); // V. Gawryjolek, p. 23

              var allSubsequences = new List<List<Subsequence>>();
              var rejections = new List<Subsequence>();

              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            for (int j = 0; j < ws; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            for (int k = 0; k < a.Document.Sentences[i + j].Clauses.Count; ++k) { // Or 'Phrases', but the clauses may be more apt.
              var startEndSubsequence = new List<Subsequence>();
              var subsequences = a.Document.Sentences[i + j].Clauses[k].SubsequencesNoBoundaryConjunctions; // Added 29 Mar. 2015.
              //var subsequences = a.Document.Sentences[i + j].Clauses[k].SubsequencesNoBoundaryDeterminersEtc;
              //var subsequences = a.Document.Sentences[i + j].Clauses[k].Subsequences;
              if (subsequences.Count > 0)
                startEndSubsequence.Add(subsequences[0]);
              window.AddRange(startEndSubsequence);
            }
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              if (window[j].Count >= ml && window[j].First().IsStart && window[j].Last().IsEnd) // Some (complete?) redundancy here with the 'IsStart' and 'IsEnd' tests.
            list.Add(new Subsequence(window[j], i));
              else
            continue;

              if (list.Count != 0) {
            var comparer = list.Last();
            for (int k = j + 1; k < window.Count; ++k) {
              var current = new Subsequence(window[k], i);
              var shorter = Math.Min(comparer.Count, current.Count);
              for (int l = 1; l < shorter; ++l) {
                var comparerStart = new Subsequence(comparer.GetRange(0, l), comparer.ContainingSentence, comparer.ContainingSubsequence, comparer.WindowId);
                var currentStart = new Subsequence(current.GetRange(0, l), current.ContainingSentence, current.ContainingSubsequence, current.WindowId);
                for (int m = 1; m < shorter; ++m) {
                  var comparerEnd = new Subsequence(comparer.GetRange(comparer.Count - m, m), comparer.ContainingSentence, comparer.ContainingSubsequence, comparer.WindowId);
                  var currentEnd = new Subsequence(current.GetRange(current.Count - m, m), current.ContainingSentence, current.ContainingSubsequence, current.WindowId);
                  if (comparerStart.Equivalent(currentStart) && comparerEnd.Equivalent(currentEnd)) {
                    var figureList = new List<Subsequence>();
                    comparerEnd.InsertRange(0, comparerStart);
                    currentEnd.InsertRange(0, currentStart);
                    figureList.Add(comparerEnd);
                    figureList.Add(currentEnd);

                    if (figureList.Count > 0)
                      allSubsequences.Add(figureList);
                  }
                }
              }
            }
              }
            }
              }

              // Remove duplicate instances and merge those contained in others.
              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Symploce, multiWindow: true);

              a.Figures.AddRange(figures);
        }
Пример #12
0
        /// <summary>
        /// Polysyndeton: "Excessive" repetition of conjunctions between clauses.
        /// </summary>
        /// <param name="a"></param>
        /// <param name="windowSize"></param>
        /// <param name="consecutiveStarts"></param>
        public static void FindPolysyndeton(Analyzer a, int? windowSize, object consecutiveStarts)
        {
            int ws = windowSize ?? 1; // Use default window size of 1.
              int cs = Convert.ToInt32(consecutiveStarts ?? 2); // Use default of 2 consecutive sentences for leading polysyndeton.

              var allSubsequences = new List<List<Subsequence>>();

              // Find conjunctions within clauses.
              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            for (int j = 0; j < ws; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            var phrases = a.Document.Sentences[i + j].Phrases;
            if (phrases.Count > 0)
              window.AddRange(phrases[0].Subsequences);
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              if (window[j].Count != 1)
            continue;
              else {
            if (window[j][0].TagEquivalent == "CC")
              list.Add(new Subsequence(window[j], i));
              }

              if (list.Count != 0) {
            for (int k = j + 1; k < window.Count; ++k) {
              var comparer = list.Last();
              var current = window[k];
              if (comparer.Equivalent(current))
                list.Add(new Subsequence(current, i));
            }
              }

              if (list.Count > 2) // "Excessive" should mean more than 2.
            allSubsequences.Add(list);
            }
              }

              var figures = MergeFigures(allSubsequences, RhetoricalFigures.Polysyndeton);

              allSubsequences.Clear();

              // Now find conjunctions starting consecutive clauses.
              for (int i = 0; i < a.Document.Sentences.Count; ++i) {
            var window = new List<Subsequence>(); // Search window
            for (int j = 0; j < cs; ++j) {
              if (i + j < a.Document.Sentences.Count) {
            var phrases = a.Document.Sentences[i + j].Phrases;
            if (phrases.Count > 0)
              window.AddRange(phrases[0].Subsequences);
              }
            }

            // Search.
            for (int j = 0; j < window.Count; ++j) {
              var list = new List<Subsequence>();
              if (window[j].Count != 1)
            continue;
              else {
            if (window[j][0].TagEquivalent == "CC" && window[j][0].IsStart)
              list.Add(new Subsequence(window[j], i));
              }

              if (list.Count != 0) {
            for (int k = j + 1; k < window.Count; ++k) {
              var comparer = list.Last();
              var current = window[k];
              if (comparer.Equivalent(current) && current[0].IsStart)
                list.Add(new Subsequence(current, i));
            }
              }

              if (list.Count > 1)
            allSubsequences.Add(list);
            }
              }

              // Some figures may be out of order WRT the start of the text; reorder them here.
              //allSubsequences = allSubsequences.OrderBy(s => s[0].SentenceId).ThenBy(s => s[0][0].Left).ToList();

              // Remove duplicate instances and merge those contained in others.
              figures.AddRange(MergeFigures(allSubsequences, RhetoricalFigures.Polysyndeton, multiWindow: true));

              figures = figures.OrderBy(x => x.Tokens.First().SentenceId).ThenBy(x => x.Tokens.First().Left).ToList();

              a.Figures.AddRange(figures);
        }
Пример #13
0
        protected string DoStuff(string[] args)
        {
            string rv = string.Empty;

              // Put test methods here:
              string[] pathParts = {
            //Repository.LocalTextPath,
            Repository.NlpTextsPath,
            //"Washington - Inaugural Address (1789).txt"
            "Obama - Inaugural Address (2009).txt"
            //"Obama - Inaugural Address (excerpt, 2009).txt"
            //"Churchill - We Shall Fight on the Beaches (1940).txt"
            //"Churchill - We Shall Fight on the Beaches (excerpt, 1940).txt"
            //"Test Sentences.txt"
            //"test.txt"
            //"epizeuxis_test.txt" // and ploce
            //"polysyndeton_test.txt"
            //"anaphora_test.txt" // and epistrophe
            //"epistrophe_test.txt"
            //"symploce_test.txt"
            //"epanalepsis_test.txt"
            //"anadiplosis_test.txt"
            //"antimetabole_test.txt"
            //"polyptoton_test.txt"
            //"isocolon_test.txt"
            //"chiasmus_test.txt"
            //"oxymoron_test.txt"
            //"Stevens - Farewell to Florida.txt"
              };

              var path = Path.Combine(pathParts);

              if (args.Count() > 0) {
            var args0 = args[0].Trim();
            if (args0 != string.Empty) {
              if (File.Exists(args0))
            path = args0;
              else if (File.Exists(Repository.NlpTextsPath + args0))
            path = Repository.NlpTextsPath + args0;
            }
              }

              //var result = Miscellaneous.GetPermutationTree<string>("root", new List<string>() { "antonym", "synonym", "derived" }, 3);

              AnalyzerOptions options = AnalyzerOptions.OmitPunctuationTokens | AnalyzerOptions.OmitFalseDuplicatePhrases | AnalyzerOptions.UsePunctuationDelimitedPhrases;
              string ignore = "";
              Analyzer a = new Analyzer(path, ignore: ignore, options: options);

              TimeSpan begin = Process.GetCurrentProcess().TotalProcessorTime;

              if (args.Count() > 1) { // Deserialize JSON
            var args1 = args[1].Trim();

            var all = false;
            if (args1 == string.Empty)
              args1 = "{ All: {} }";

            var rhetoricalFigureParameters = JsonConvert.DeserializeObject<Dictionary<string, RhetoricalFigureParameters>>(args1);

            RhetoricalFigures exclusions = RhetoricalFigures.None;

            foreach (var rfp in rhetoricalFigureParameters) {
              var key = rfp.Key;
              RhetoricalFigures rhetoricalFigure;
              if (!Enum.TryParse(key, out rhetoricalFigure))
            continue;

              if (rhetoricalFigure == RhetoricalFigures.All) {
            all = true;
            continue;
              }

              var windowSize = rfp.Value.windowSize;
              var extra = rfp.Value.extra;

              exclusions |= rhetoricalFigure;

              a.FindRhetoricalFigures(rhetoricalFigure, windowSize, extra, Callback);
            }

            if (all)
              a.FindRhetoricalFigures(RhetoricalFigures.All, callback: Callback, exclusions: exclusions);
              }
              else {
            //a.FindRhetoricalFigures(RhetoricalFigures.Epizeuxis, callback: Callback);
            //a.FindRhetoricalFigures(RhetoricalFigures.Ploce, callback: Callback);
            //a.FindRhetoricalFigures(RhetoricalFigures.Conduplicatio, callback: Callback);
            //a.FindRhetoricalFigures(RhetoricalFigures.Polysyndeton, callback: Callback);
            //a.FindRhetoricalFigures(RhetoricalFigures.Anaphora, callback: Callback);
            //a.FindRhetoricalFigures(RhetoricalFigures.Epistrophe, callback: Callback);
            //a.FindRhetoricalFigures(RhetoricalFigures.Symploce, callback: Callback);
            //a.FindRhetoricalFigures(RhetoricalFigures.Epanalepsis, callback: Callback);
            //a.FindRhetoricalFigures(RhetoricalFigures.Anadiplosis, callback: Callback);
            //a.FindRhetoricalFigures(RhetoricalFigures.Antimetabole, callback: Callback);
            //a.FindRhetoricalFigures(RhetoricalFigures.Polyptoton, callback: Callback);
            //a.FindRhetoricalFigures(RhetoricalFigures.Isocolon, callback: Callback);
            //a.FindRhetoricalFigures(RhetoricalFigures.Chiasmus, callback: Callback);
            //a.FindRhetoricalFigures(RhetoricalFigures.Oxymoron, callback: Callback);
            a.FindRhetoricalFigures(RhetoricalFigures.All, callback: Callback);
              }

              TimeSpan end = Process.GetCurrentProcess().TotalProcessorTime;

              Console.WriteLine();
              a.Document.WriteLine();
              Console.WriteLine();

              var figureRows = new List<string>();
              var figureColumns = new string[] {
            "figure_id",
            "token_id",
            "type",
            "word",
            "sentence_id",
            "left_edge",
            "right_edge",
            "tag",
            "tag_equiv",
            "depth",
            "stem"
              };

              string sep = ",";
              var header = String.Join(sep, figureColumns);
              figureRows.Add(header);

              int i = 0;
              foreach (var figure in a.Figures) {
            int j = 0;
            foreach (var token in figure.Tokens) {
              if (token.ContainingSentence == null) continue;
              var rowArray = new object[] {
            i, j,
            figure.Type,
            "\"" + token.Word + "\"",
            token.SentenceId, token.Left, token.Right,
            "\"" + token.Tag + "\"",
            "\"" + token.TagEquivalent + "\"",
            token.Depth,
            "\"" + token.Stem + "\""
              };
              var row = String.Join(sep, rowArray);
              figureRows.Add(row);
              j++;
            }
            i++;
              }

              var sentenceRows = new List<string>();
              var sentenceColumns = new string[] {
            "sentence_id",
            "token_id",
            "word",
            "left_edge",
            "right_edge",
            "tag",
            "tag_equiv",
            "depth",
            "stem"
              };

              header = String.Join(sep, sentenceColumns);
              sentenceRows.Add(header);

              i = 0;
              foreach (var sentence in a.Document.Sentences) {
            int j = 0;
            foreach (var token in sentence.Tokens) {
              var rowArray = new object[] {
            i, j,
            "\"" + token.Word + "\"",
            token.Left, token.Right,
            "\"" + token.Tag + "\"",
            "\"" + token.TagEquivalent + "\"",
            token.Depth,
            "\"" + token.Stem + "\""
              };
              var row = String.Join(sep, rowArray);
              sentenceRows.Add(row);
              j++;
            }
            i++;
              }

              figureRows.ForEach(x => Console.WriteLine("{0}", x));
              Console.WriteLine();

              if (args.Count() > 2) { // Write CSV representations of figures to file.
            var args2 = args[2].Trim();
            var args2Csv = args2 + ".csv";
            var args2Doc = args2 + ".doc.csv";

            Console.WriteLine("Writing document: " + args2Csv + Environment.NewLine);
            File.WriteAllLines(args2Csv, figureRows);

            Console.WriteLine("Writing document: " + args2Doc + Environment.NewLine);
            File.WriteAllLines(args2Doc, sentenceRows);
              }

              foreach (var figure in a.Figures)
            Console.WriteLine(figure.Text);
              Console.WriteLine();

              Console.WriteLine("Measured time: " + (end - begin).TotalMilliseconds + " ms; " + (end - begin).TotalSeconds + " s; " + (end - begin).TotalMinutes + "m.");

              return rv;

              // N.B. This returns to method "Main()", in which a console pause may be commented out; uncomment it for testing.
        }