static void Callback(RhetoricalFigures type) { var s = string.Empty; s += "Finding " + type + "...."; Console.WriteLine(s); }
public RhetoricalFigure(Subsequence subsequence, RhetoricalFigures type, int windowId) { Tokens = subsequence; Type = type; WindowId = windowId; }
// Methods: public static List<RhetoricalFigure> MergeFigures(List<List<Subsequence>> subsequences, RhetoricalFigures type, bool multiWindow = true, string demarcation = FigureComponentsSeparator) { // Some figures may be out of order WRT the start of the text; reorder them here. subsequences = subsequences.OrderBy(s => s[0].SentenceId).ThenBy(s => s[0][0].Left).ToList(); var deepSubsequences = subsequences; // Merge multi-window figures. if (multiWindow) { for (int i = 0; i < deepSubsequences.Count - 1; ++i) { for (int j = i + 1; j < deepSubsequences.Count; ++j) { var intersection = deepSubsequences[i].Intersect(deepSubsequences[j]); if (intersection.Any()) { var intersectionList = intersection.ToList(); if (deepSubsequences[i].Last() == intersectionList.Last() && deepSubsequences[j].First() == intersectionList.First()) { var merger = deepSubsequences[i].Union(deepSubsequences[j]).ToList(); deepSubsequences[i] = merger; deepSubsequences.RemoveAt(j); i -= 1; break; } } } } } // At this point, no subsequence component of any figure should contain part of any other subsequence component. for (int i = 0; i < deepSubsequences.Count; ++i) deepSubsequences[i] = deepSubsequences[i].Distinct().ToList(); // N.B. V. the following for a discussion of how 'Distinct()' results are ordered: // http://stackoverflow.com/questions/4734852/does-c-sharp-distinct-method-keep-original-ordering-of-sequence-intact // Flatten out subsequence lists. var flatSubsequences = new List<Subsequence>(); foreach (var s in deepSubsequences) flatSubsequences.Add(new Subsequence(s.SelectMany(x => x), s[0].WindowId)); // At this point, no subsequence component of any figure should contain part of any other subsequence component. //for (int i = 0; i < flatSubsequences.Count; ++i) { // if (flatSubsequences[i].Distinct().Count() < flatSubsequences[i].Count) { // flatSubsequences.RemoveAt(i); // deepSubsequences.RemoveAt(i); // i -= 1; // continue; // } //} // Remove duplicate list instances and merge those contained in others. for (int i = 0; i < flatSubsequences.Count - 1; ++i) { for (int j = i + 1; j < flatSubsequences.Count; ++j) { if (flatSubsequences[i].IsSupersetOf(flatSubsequences[j])) { flatSubsequences.RemoveAt(j); deepSubsequences.RemoveAt(j); i -= 1; break; } else if (flatSubsequences[j].IsSupersetOf(flatSubsequences[i])) { flatSubsequences[i] = flatSubsequences[j]; deepSubsequences[i] = new List<Subsequence>(deepSubsequences[j]); i -= 1; break; } } } // Remove any duplicate subsequences within each figure. for (int i = 0; i < deepSubsequences.Count; ++i) deepSubsequences[i] = deepSubsequences[i].Distinct().ToList(); // Make sure figure constituents are properly ordered. for (int i = 0; i < deepSubsequences.Count; ++i) { for (int j = 0; j < deepSubsequences[i].Count; ++j) deepSubsequences[i][j].OrderBy(s => s.SentenceId).ThenBy(s => s.Left).ToList(); } for (int i = 0; i < deepSubsequences.Count; ++i) { deepSubsequences[i] = deepSubsequences[i].OrderBy(s => s[0].SentenceId).ToList(); for (int j = 0; j < deepSubsequences[i].Count; ++j) { var dsij = deepSubsequences[i][j]; deepSubsequences[i][j] = new Subsequence(dsij.OrderBy(s => s.Left), dsij.ContainingSentence, dsij.ContainingSubsequence, dsij.WindowId); if (demarcation != null) deepSubsequences[i][j].Add(new SubsequenceToken(new Token(demarcation, "", 0))); } } var figures = new List<RhetoricalFigure>(); foreach (var deepSubsequence in deepSubsequences) { var d = deepSubsequence.OrderBy(x => x.SentenceId).ThenBy(x => x[0].Left).ToList(); // Sort figure constituents so leftmost in text appears first, etc. var figure = new RhetoricalFigure(new Subsequence(d.SelectMany(x => x)), type, d[0].WindowId); //figure.Tokens = figure.Tokens.OrderBy(s => s.SentenceId).ThenBy(s => s.Left).ToList(); // This sort could cause problems with the collapsed 'figure'. Stick to the one just above. figures.Add(figure); } return figures; }
protected string DoStuff(string[] args) { string rv = string.Empty; // Put test methods here: string[] pathParts = { //Repository.LocalTextPath, Repository.NlpTextsPath, "sonnets.txt" //"Washington - Inaugural Address (1789).txt" //"Obama - Inaugural Address (2009).txt" //"Obama - Inaugural Address (excerpt, 2009).txt" //"Churchill - We Shall Fight on the Beaches (1940).txt" //"Churchill - We Shall Fight on the Beaches (excerpt, 1940).txt" //"Test Sentences.txt" //"test.txt" //"epizeuxis_test.txt" // and ploce //"polysyndeton_test.txt" //"anaphora_test.txt" // and epistrophe //"epistrophe_test.txt" //"symploce_test.txt" //"epanalepsis_test.txt" //"anadiplosis_test.txt" //"antimetabole_test.txt" //"polyptoton_test.txt" //"isocolon_test.txt" //"chiasmus_test.txt" //"oxymoron_test.txt" //"Stevens - Farewell to Florida.txt" }; var path = Path.Combine(pathParts); if (args.Count() > 0) { var args0 = args[0].Trim(); if (args0 != string.Empty) { if (File.Exists(args0)) { path = args0; } else if (File.Exists(Repository.NlpTextsPath + args0)) { path = Repository.NlpTextsPath + args0; } } } //var result = Miscellaneous.GetPermutationTree<string>("root", new List<string>() { "antonym", "synonym", "derived" }, 3); AnalyzerOptions options = AnalyzerOptions.OmitPunctuationTokens | AnalyzerOptions.OmitFalseDuplicatePhrases | AnalyzerOptions.UsePunctuationDelimitedPhrases; string ignore = ""; Analyzer a = new Analyzer(path, ignore: ignore, options: options); TimeSpan begin = Process.GetCurrentProcess().TotalProcessorTime; if (args.Count() > 1) // Deserialize JSON { var args1 = args[1].Trim(); var all = false; if (args1 == string.Empty) { args1 = "{ All: {} }"; } var rhetoricalFigureParameters = JsonConvert.DeserializeObject <Dictionary <string, RhetoricalFigureParameters> >(args1); RhetoricalFigures exclusions = RhetoricalFigures.None; foreach (var rfp in rhetoricalFigureParameters) { var key = rfp.Key; RhetoricalFigures rhetoricalFigure; if (!Enum.TryParse(key, out rhetoricalFigure)) { continue; } if (rhetoricalFigure == RhetoricalFigures.All) { all = true; continue; } var windowSize = rfp.Value.windowSize; var extra = rfp.Value.extra; exclusions |= rhetoricalFigure; a.FindRhetoricalFigures(rhetoricalFigure, windowSize, extra, Callback); } if (all) { a.FindRhetoricalFigures(RhetoricalFigures.All, callback: Callback, exclusions: exclusions); } } else { //a.FindRhetoricalFigures(RhetoricalFigures.Epizeuxis, callback: Callback); //a.FindRhetoricalFigures(RhetoricalFigures.Ploce, callback: Callback); //a.FindRhetoricalFigures(RhetoricalFigures.Conduplicatio, callback: Callback); //a.FindRhetoricalFigures(RhetoricalFigures.Polysyndeton, callback: Callback); //a.FindRhetoricalFigures(RhetoricalFigures.Anaphora, callback: Callback); //a.FindRhetoricalFigures(RhetoricalFigures.Epistrophe, callback: Callback); //a.FindRhetoricalFigures(RhetoricalFigures.Symploce, callback: Callback); //a.FindRhetoricalFigures(RhetoricalFigures.Epanalepsis, callback: Callback); //a.FindRhetoricalFigures(RhetoricalFigures.Anadiplosis, callback: Callback); //a.FindRhetoricalFigures(RhetoricalFigures.Antimetabole, callback: Callback); //a.FindRhetoricalFigures(RhetoricalFigures.Polyptoton, callback: Callback); //a.FindRhetoricalFigures(RhetoricalFigures.Isocolon, callback: Callback); //a.FindRhetoricalFigures(RhetoricalFigures.Chiasmus, callback: Callback); //a.FindRhetoricalFigures(RhetoricalFigures.Oxymoron, callback: Callback); a.FindRhetoricalFigures(RhetoricalFigures.All, callback: Callback); } TimeSpan end = Process.GetCurrentProcess().TotalProcessorTime; Console.WriteLine(); a.Document.WriteLine(); Console.WriteLine(); var figureRows = new List <string>(); var figureColumns = new string[] { "figure_id", "token_id", "type", "word", "sentence_id", "left_edge", "right_edge", "tag", "tag_equiv", "depth", "stem" }; string sep = ","; var header = String.Join(sep, figureColumns); figureRows.Add(header); int i = 0; foreach (var figure in a.Figures) { int j = 0; foreach (var token in figure.Tokens) { if (token.ContainingSentence == null) { continue; } var rowArray = new object[] { i, j, figure.Type, "\"" + token.Word + "\"", token.SentenceId, token.Left, token.Right, "\"" + token.Tag + "\"", "\"" + token.TagEquivalent + "\"", token.Depth, "\"" + token.Stem + "\"" }; var row = String.Join(sep, rowArray); figureRows.Add(row); j++; } i++; } var sentenceRows = new List <string>(); var sentenceColumns = new string[] { "sentence_id", "token_id", "word", "left_edge", "right_edge", "tag", "tag_equiv", "depth", "stem" }; header = String.Join(sep, sentenceColumns); sentenceRows.Add(header); i = 0; foreach (var sentence in a.Document.Sentences) { int j = 0; foreach (var token in sentence.Tokens) { var rowArray = new object[] { i, j, "\"" + token.Word + "\"", token.Left, token.Right, "\"" + token.Tag + "\"", "\"" + token.TagEquivalent + "\"", token.Depth, "\"" + token.Stem + "\"" }; var row = String.Join(sep, rowArray); sentenceRows.Add(row); j++; } i++; } figureRows.ForEach(x => Console.WriteLine("{0}", x)); Console.WriteLine(); if (args.Count() > 2) // Write CSV representations of figures to file. { var args2 = args[2].Trim(); var args2Csv = args2 + ".csv"; var args2Doc = args2 + ".doc.csv"; Console.WriteLine("Writing document: " + args2Csv + Environment.NewLine); File.WriteAllLines(args2Csv, figureRows); Console.WriteLine("Writing document: " + args2Doc + Environment.NewLine); File.WriteAllLines(args2Doc, sentenceRows); } foreach (var figure in a.Figures) { Console.WriteLine(figure.Text); } Console.WriteLine(); Console.WriteLine("Measured time: " + (end - begin).TotalMilliseconds + " ms; " + (end - begin).TotalSeconds + " s; " + (end - begin).TotalMinutes + "m."); return(rv); // N.B. This returns to method "Main()", in which a console pause may be commented out; uncomment it for testing. }
public void FindRhetoricalFigures(RhetoricalFigures type = RhetoricalFigures.All, int?windowSize = null, object extra = null, FindFiguresCallback callback = null, RhetoricalFigures exclusions = RhetoricalFigures.None) { bool anadiplosis = (type.HasFlag(RhetoricalFigures.Anadiplosis) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Anadiplosis); bool anaphora = (type.HasFlag(RhetoricalFigures.Anaphora) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Anaphora); bool antimetabole = (type.HasFlag(RhetoricalFigures.Antimetabole) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Antimetabole); bool chiasmus = (type.HasFlag(RhetoricalFigures.Chiasmus) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Chiasmus); bool conduplicatio = (type.HasFlag(RhetoricalFigures.Conduplicatio) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Conduplicatio); bool epanalepsis = (type.HasFlag(RhetoricalFigures.Epanalepsis) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Epanalepsis); bool epistrophe = (type.HasFlag(RhetoricalFigures.Epistrophe) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Epistrophe); bool epizeuxis = (type.HasFlag(RhetoricalFigures.Epizeuxis) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Epizeuxis); bool isocolon = (type.HasFlag(RhetoricalFigures.Isocolon) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Isocolon); bool oxymoron = (type.HasFlag(RhetoricalFigures.Oxymoron) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Oxymoron); bool ploce = (type.HasFlag(RhetoricalFigures.Ploce) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Ploce); bool polyptoton = (type.HasFlag(RhetoricalFigures.Polyptoton) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Polyptoton); bool polysyndeton = (type.HasFlag(RhetoricalFigures.Polysyndeton) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Polysyndeton); bool symploce = (type.HasFlag(RhetoricalFigures.Symploce) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Symploce); if (epizeuxis) { if (callback != null) { callback(RhetoricalFigures.Epizeuxis); } RhetoricalFigure.FindEpizeuxis(this, windowSize); } if (ploce) { if (callback != null) { callback(RhetoricalFigures.Ploce); } RhetoricalFigure.FindPloce(this, windowSize); } if (conduplicatio) { if (callback != null) { callback(RhetoricalFigures.Conduplicatio); } RhetoricalFigure.FindConduplicatio(this, windowSize, extra); } if (polysyndeton) { if (callback != null) { callback(RhetoricalFigures.Polysyndeton); } RhetoricalFigure.FindPolysyndeton(this, windowSize, extra); } if (anaphora) { if (callback != null) { callback(RhetoricalFigures.Anaphora); } RhetoricalFigure.FindAnaphora(this, windowSize, extra); } if (epistrophe) { if (callback != null) { callback(RhetoricalFigures.Epistrophe); } RhetoricalFigure.FindEpistrophe(this, windowSize, extra); } if (symploce) { if (callback != null) { callback(RhetoricalFigures.Symploce); } RhetoricalFigure.FindSymploce(this, windowSize, extra); } if (epanalepsis) { if (callback != null) { callback(RhetoricalFigures.Epanalepsis); } RhetoricalFigure.FindEpanalepsis(this, windowSize, extra); } if (anadiplosis) { if (callback != null) { callback(RhetoricalFigures.Anadiplosis); } RhetoricalFigure.FindAnadiplosis(this, windowSize, extra); } if (antimetabole) { if (callback != null) { callback(RhetoricalFigures.Antimetabole); } RhetoricalFigure.FindAntimetabole(this, windowSize, extra); } if (polyptoton) { if (callback != null) { callback(RhetoricalFigures.Polyptoton); } RhetoricalFigure.FindPolyptoton(this, windowSize); } if (isocolon) { if (callback != null) { callback(RhetoricalFigures.Isocolon); } RhetoricalFigure.FindIsocolon(this, windowSize, extra); } if (chiasmus) { if (callback != null) { callback(RhetoricalFigures.Chiasmus); } RhetoricalFigure.FindChiasmus(this, windowSize, extra); } if (oxymoron) { if (callback != null) { callback(RhetoricalFigures.Oxymoron); } RhetoricalFigure.FindOxymoron(this, windowSize, extra); } }
public void FindRhetoricalFigures(RhetoricalFigures type = RhetoricalFigures.All, int? windowSize = null, object extra = null, FindFiguresCallback callback = null, RhetoricalFigures exclusions = RhetoricalFigures.None) { bool anadiplosis = (type.HasFlag(RhetoricalFigures.Anadiplosis) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Anadiplosis); bool anaphora = (type.HasFlag(RhetoricalFigures.Anaphora) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Anaphora); bool antimetabole = (type.HasFlag(RhetoricalFigures.Antimetabole) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Antimetabole); bool chiasmus = (type.HasFlag(RhetoricalFigures.Chiasmus) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Chiasmus); bool conduplicatio = (type.HasFlag(RhetoricalFigures.Conduplicatio) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Conduplicatio); bool epanalepsis = (type.HasFlag(RhetoricalFigures.Epanalepsis) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Epanalepsis); bool epistrophe = (type.HasFlag(RhetoricalFigures.Epistrophe) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Epistrophe); bool epizeuxis = (type.HasFlag(RhetoricalFigures.Epizeuxis) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Epizeuxis); bool isocolon = (type.HasFlag(RhetoricalFigures.Isocolon) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Isocolon); bool oxymoron = (type.HasFlag(RhetoricalFigures.Oxymoron) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Oxymoron); bool ploce = (type.HasFlag(RhetoricalFigures.Ploce) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Ploce); bool polyptoton = (type.HasFlag(RhetoricalFigures.Polyptoton) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Polyptoton); bool polysyndeton = (type.HasFlag(RhetoricalFigures.Polysyndeton) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Polysyndeton); bool symploce = (type.HasFlag(RhetoricalFigures.Symploce) || type.HasFlag(RhetoricalFigures.All)) && !exclusions.HasFlag(RhetoricalFigures.Symploce); if (epizeuxis) { if (callback != null) callback(RhetoricalFigures.Epizeuxis); RhetoricalFigure.FindEpizeuxis(this, windowSize); } if (ploce) { if (callback != null) callback(RhetoricalFigures.Ploce); RhetoricalFigure.FindPloce(this, windowSize); } if (conduplicatio) { if (callback != null) callback(RhetoricalFigures.Conduplicatio); RhetoricalFigure.FindConduplicatio(this, windowSize, extra); } if (polysyndeton) { if (callback != null) callback(RhetoricalFigures.Polysyndeton); RhetoricalFigure.FindPolysyndeton(this, windowSize, extra); } if (anaphora) { if (callback != null) callback(RhetoricalFigures.Anaphora); RhetoricalFigure.FindAnaphora(this, windowSize, extra); } if (epistrophe) { if (callback != null) callback(RhetoricalFigures.Epistrophe); RhetoricalFigure.FindEpistrophe(this, windowSize, extra); } if (symploce) { if (callback != null) callback(RhetoricalFigures.Symploce); RhetoricalFigure.FindSymploce(this, windowSize, extra); } if (epanalepsis) { if (callback != null) callback(RhetoricalFigures.Epanalepsis); RhetoricalFigure.FindEpanalepsis(this, windowSize, extra); } if (anadiplosis) { if (callback != null) callback(RhetoricalFigures.Anadiplosis); RhetoricalFigure.FindAnadiplosis(this, windowSize, extra); } if (antimetabole) { if (callback != null) callback(RhetoricalFigures.Antimetabole); RhetoricalFigure.FindAntimetabole(this, windowSize, extra); } if (polyptoton) { if (callback != null) callback(RhetoricalFigures.Polyptoton); RhetoricalFigure.FindPolyptoton(this, windowSize); } if (isocolon) { if (callback != null) callback(RhetoricalFigures.Isocolon); RhetoricalFigure.FindIsocolon(this, windowSize, extra); } if (chiasmus) { if (callback != null) callback(RhetoricalFigures.Chiasmus); RhetoricalFigure.FindChiasmus(this, windowSize, extra); } if (oxymoron) { if (callback != null) callback(RhetoricalFigures.Oxymoron); RhetoricalFigure.FindOxymoron(this, windowSize, extra); } }