// Add WordNet search paths to this as the 'object' parameter? /// <summary> /// Oxymoron: A terse paradox; the yoking of two contradictory terms. /// </summary> /// <param name="a"></param> /// <param name="windowSize"></param> public static void FindOxymoron(Analyzer a, int? windowSize, object greedy) { int ws = windowSize ?? 1; // Not used. The window size is one sentence. bool greedySearch = (bool?)greedy ?? false; GetDependencyIndexDelegate GetDependencyIndex = delegate(TreeGraphNode t) { return Convert.ToInt32(Regex.Match(t.toString(), "^.*?-(\\d+)\\'*$").Result("$1")) - 1; }; Action<Miscellaneous.TreeNode<Analyzer.WordNetRelation>, object> WordNetRelationVisitor = (Miscellaneous.TreeNode<Analyzer.WordNetRelation> n, object o) => { if (n.IsRoot()) return; var oxymoronData = (OxymoronData)o; if (oxymoronData.Overlap.Value != 0) return; var w1 = oxymoronData.W1; var derivedFormsW2 = oxymoronData.GetDerivedFormsW2(); bool checkedAntonyms = false; var currentNode = n; while (!currentNode.Parent.IsRoot()) { currentNode = currentNode.Parent; if (currentNode.Value.Relation == WordNetEngine.SynSetRelation.Antonym) { checkedAntonyms = true; break; } } var p = n.Parent; var candidates = new List<string> { w1 }; if (!p.IsRoot()) candidates = p.Value.Words; var relation = n.Value.Relation; switch(relation) { case WordNetEngine.SynSetRelation.SimilarTo: n.Value.Words = Token.FindSynonyms(candidates); break; case WordNetEngine.SynSetRelation.Antonym: n.Value.Words = Token.FindAntonyms(candidates); if (!checkedAntonyms) checkedAntonyms = true; break; case WordNetEngine.SynSetRelation.DerivationallyRelated: n.Value.Words = Token.FindDerivationalForms(candidates, Analyzer.SimilarityPrefixes, Analyzer.MostCommonSimilaritySuffixes, useAllForms: greedySearch ? true : false); if (checkedAntonyms) { var negations = new List<string>(Analyzer.NegationPrefixes.Select(x => (string)(x.Clone()) + w1)); n.Value.Words.AddRange(Token.FindDerivationalForms(negations, null, null, useAllForms: greedySearch ? true : false)); } break; } if (!checkedAntonyms) n.Value.Words.AddRange(candidates); n.Value.Words = n.Value.Words.Distinct().ToList(); // Remove duplicates. if (oxymoronData.Debug) { Console.WriteLine("==================================================="); Console.WriteLine("Relation: " + relation.ToString()); //Console.WriteLine("Parent relation: " + p.Value.Relation.ToString()); Console.WriteLine("Child count: " + n.Children.Count()); Console.WriteLine("Node candidates:"); if (n.IsRoot() || n.Value.Words.Count == 0) Console.WriteLine(" None"); else { foreach (var w in n.Value.Words) Console.WriteLine(" " + w.ToString()); } if (n.IsLeaf()) Console.WriteLine("LEAF NODE"); Console.WriteLine("==================================================="); } if (checkedAntonyms) oxymoronData.Overlap.Value = n.Value.Words.Intersect(derivedFormsW2).Count(); }; Action<Miscellaneous.TreeNode<Analyzer.WordNetRelation>, object> WordNetRelationNullVisitor = (Miscellaneous.TreeNode<Analyzer.WordNetRelation> n, object o) => { //Console.WriteLine(n.Value.Relation.ToString()); n.Value.Words = null; }; string dependencySymbols = @"^(amod|advmod|acomp|dobj|nsubj|prep)$"; var allSubsequences = new List<List<Subsequence>>(); TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); for (int i = 0; i < a.Document.Sentences.Count; ++i) { var sentence = a.Document.Sentences[i]; var subsequenceTokens = new List<SubsequenceToken>(); foreach (var token in sentence.Tokens) subsequenceTokens.Add(new SubsequenceToken(token, sentence)); var phrases = sentence.Phrases; if (phrases.Count > 0) { var subsequence = new Subsequence(subsequenceTokens, sentence, phrases[0].Subsequences[0].ContainingSubsequence, i); var tree = sentence.Tree; GrammaticalStructure gs = gsf.newGrammaticalStructure(tree); java.util.Collection tdc = gs.typedDependenciesCollapsed(); var candidates = new List<Subsequence>(); for (java.util.Iterator j = tdc.iterator(); j.hasNext(); ) { var td = (TypedDependency)j.next(); var relation = td.reln().getShortName(); if (Regex.IsMatch(relation, dependencySymbols)) { var governorIndex = GetDependencyIndex(td.gov()); var dependentIndex = GetDependencyIndex(td.dep()); var index = Math.Min(governorIndex, dependentIndex); var count = Math.Abs(dependentIndex - governorIndex) + 1; var ss = relation == "prep" ? subsequence.GetRange(index, count) : subsequence.Where((n, k) => k == governorIndex | k == dependentIndex).ToList(); // Remove any leftover punctuation from the candidate subsequences. ss.RemoveAll(n => Regex.IsMatch(n.Tag, Analyzer.PunctuationPatterns)); candidates.Add(new Subsequence(ss, sentence, subsequence.ContainingSubsequence, i)); } } // Determine whether the candidate pairs are oxymorons. for (int k = 0; k < candidates.Count; ++k) { var list = new List<Subsequence>(); Token[] pair = { candidates[k][0], candidates[k][candidates[k].Count - 1] }; // Clear (i.e. null) all the word lists in the WordNet search-path tree. a.WordNetSearchPath.Traverse(WordNetRelationNullVisitor); var overlap = new OxymoronData.IntClass(0); a.WordNetSearchPath.Traverse(WordNetRelationVisitor, new OxymoronData(pair, overlap, greedy: greedySearch, debug: false)); if (overlap.Value == 0) { a.WordNetSearchPath.Traverse(WordNetRelationNullVisitor); a.WordNetSearchPath.Traverse(WordNetRelationVisitor, new OxymoronData(pair.Reverse().ToArray(), overlap, greedy: greedySearch, debug: false)); } if (overlap.Value != 0) { list.Add(candidates[k]); allSubsequences.Add(list); } } } } // Remove duplicate instances and merge those contained in others. var figures = MergeFigures(allSubsequences, RhetoricalFigures.Oxymoron, multiWindow: true); a.Figures.AddRange(figures); }
/// <summary> /// Symploce: Repetition of a word or phrase at the beginning, and of another at the end, of successive clauses; the combination of Anaphora and Epistrophe. /// </summary> /// <param name="a"></param> /// <param name="windowSize"></param> /// <param name="minLength"></param> public static void FindSymploce(Analyzer a, int? windowSize, object minLength) { int ws = windowSize ?? 3; // Use default window size of 3. int ml = Convert.ToInt32(minLength ?? 2); // V. Gawryjolek, p. 23 var allSubsequences = new List<List<Subsequence>>(); var rejections = new List<Subsequence>(); for (int i = 0; i < a.Document.Sentences.Count; ++i) { var window = new List<Subsequence>(); // Search window for (int j = 0; j < ws; ++j) { if (i + j < a.Document.Sentences.Count) { for (int k = 0; k < a.Document.Sentences[i + j].Clauses.Count; ++k) { // Or 'Phrases', but the clauses may be more apt. var startEndSubsequence = new List<Subsequence>(); var subsequences = a.Document.Sentences[i + j].Clauses[k].SubsequencesNoBoundaryConjunctions; // Added 29 Mar. 2015. //var subsequences = a.Document.Sentences[i + j].Clauses[k].SubsequencesNoBoundaryDeterminersEtc; //var subsequences = a.Document.Sentences[i + j].Clauses[k].Subsequences; if (subsequences.Count > 0) startEndSubsequence.Add(subsequences[0]); window.AddRange(startEndSubsequence); } } } // Search. for (int j = 0; j < window.Count; ++j) { var list = new List<Subsequence>(); if (window[j].Count >= ml && window[j].First().IsStart && window[j].Last().IsEnd) // Some (complete?) redundancy here with the 'IsStart' and 'IsEnd' tests. list.Add(new Subsequence(window[j], i)); else continue; if (list.Count != 0) { var comparer = list.Last(); for (int k = j + 1; k < window.Count; ++k) { var current = new Subsequence(window[k], i); var shorter = Math.Min(comparer.Count, current.Count); for (int l = 1; l < shorter; ++l) { var comparerStart = new Subsequence(comparer.GetRange(0, l), comparer.ContainingSentence, comparer.ContainingSubsequence, comparer.WindowId); var currentStart = new Subsequence(current.GetRange(0, l), current.ContainingSentence, current.ContainingSubsequence, current.WindowId); for (int m = 1; m < shorter; ++m) { var comparerEnd = new Subsequence(comparer.GetRange(comparer.Count - m, m), comparer.ContainingSentence, comparer.ContainingSubsequence, comparer.WindowId); var currentEnd = new Subsequence(current.GetRange(current.Count - m, m), current.ContainingSentence, current.ContainingSubsequence, current.WindowId); if (comparerStart.Equivalent(currentStart) && comparerEnd.Equivalent(currentEnd)) { var figureList = new List<Subsequence>(); comparerEnd.InsertRange(0, comparerStart); currentEnd.InsertRange(0, currentStart); figureList.Add(comparerEnd); figureList.Add(currentEnd); if (figureList.Count > 0) allSubsequences.Add(figureList); } } } } } } } // Remove duplicate instances and merge those contained in others. var figures = MergeFigures(allSubsequences, RhetoricalFigures.Symploce, multiWindow: true); a.Figures.AddRange(figures); }