private static void Main(string[] args) { var ofd = new OpenFileDialog { Filter = "CEC6-Korpus|*.cec6", Multiselect = false, CheckFileExists = true }; Console.Write("Select corpus file: "); ofd.ShowDialog(); var corpus = CorpusAdapterWriteDirect.Create(ofd.FileName); Console.WriteLine(ofd.FileName); Console.WriteLine($"{corpus.CountDocuments} tweets"); var guids = OriginalTweetFilter.GetOriginalTweets(corpus); Console.WriteLine($"{guids.Count} tweets matches all criteria"); var all = corpus.ToSelection(); var selection = all.Create(guids, ""); var exporter = new ExporterCsv(); exporter.Export(selection, ofd.FileName.Replace(".cec6", ".csv")); Console.WriteLine("!END!"); Console.ReadLine(); }
protected override void GetExecuteRoute(HttpContext req) { try { var er = req.PostData <ExecuteRequest>(); if (er == null) { WriteError(req, Resources.WebErrorInvalidPostData); return; } var aCheck = Configuration.GetConsoleAction(er.Action); if (aCheck == null || !ExecuteActionFilter.Check(er.Action)) { WriteError(req, Resources.WebErrorActionUnavailable); return; } if (!File.Exists($"corpora/{er.CorpusId}.cec6")) { WriteError(req, Resources.WebErrorCorpusUnavailable); return; } var corpus = CorpusAdapterWriteDirect.Create($"corpora/{er.CorpusId}.cec6"); if (corpus == null) { WriteError(req, Resources.WebErrorCorpusUnavailable); return; } var selection = corpus.ToSelection(); var a = new ClusterAction(); var args = new List <string> { "XSGUID::TEXT", er.Action }; if (er.Arguments != null && er.Arguments.Length > 0) { args.AddRange(er.Arguments); } string response; using (var ms = new MemoryStream()) { var writer = Writer.Clone(ms); a.Execute(selection, args.ToArray(), writer); writer.Destroy(false); ms.Seek(0, SeekOrigin.Begin); response = Encoding.UTF8.GetString(ms.ToArray()); } req.Response.Send(response, Mime); } catch (Exception ex) { WriteError(req, ex.Message); } }
private static void CalculateBasicStatistics(CorpusAdapterWriteDirect corpus) { var stb = new StringBuilder(); stb.AppendLine($"Dokumente: {corpus.CountDocuments}"); stb.AppendLine($"Token: {corpus.CountToken}"); stb.AppendLine($"Types: {corpus.GetLayerValues("Wort").Count()}"); var autoren = new HashSet <string>(); var zeitung = new HashSet <string>(); var dtMin = DateTime.MaxValue; var dtMax = DateTime.MinValue; var meta = corpus.DocumentMetadata; foreach (var doc in meta) { if (doc.Value.ContainsKey("Autor")) { autoren.Add(doc.Value["Autor"]?.ToString()); } if (doc.Value.ContainsKey("Zeitung")) { zeitung.Add(doc.Value["Zeitung"]?.ToString()); } if (doc.Value.ContainsKey("Absender (Id)")) { autoren.Add(doc.Value["Absender (Id)"]?.ToString()); } if (doc.Value.ContainsKey("Datum") && doc.Value["Datum"] is DateTime) { var date = (DateTime)doc.Value["Datum"]; if (date == DateTime.MinValue || date == DateTime.MaxValue) { continue; } if (date < dtMin) { dtMin = date; } if (date > dtMax) { dtMax = date; } } } stb.AppendLine($"Autoren: {autoren.Count}"); stb.AppendLine($"Zeitungen: {zeitung.Count}"); if (zeitung.Count > _zeitungenMax) { _zeitungenMax = zeitung.Count; } stb.AppendLine($"DT-MIN: {dtMin:yyyy-MM-dd}"); stb.AppendLine($"DT-MAX: {dtMax:yyyy-MM-dd}"); File.WriteAllText(corpus.CorpusDisplayname + "_basicStat.csv", stb.ToString()); }
private static Selection[] GetDateClusters(CorpusAdapterWriteDirect corpus) { var select = corpus.ToSelection(); var blockCluster = select.CreateBlock <SelectionClusterBlock>(); blockCluster.ClusterGenerator = new SelectionClusterGeneratorDateTimeYearMonthDayOnlyValue(); blockCluster.MetadataKey = "Datum"; blockCluster.Calculate(); return(blockCluster.GetSelectionClusters().OrderBy(x => x.Displayname).ToArray()); }
private void btn_open_Click(object sender, EventArgs e) { var ofd = new OpenFileDialog { Filter = _filter, CheckFileExists = true }; if (ofd.ShowDialog() != DialogResult.OK) { return; } _corpus = CorpusAdapterWriteDirect.Create(ofd.FileName); CorpusToGrid(); }
private static void Main(string[] args) { Console.Write("How many corpora?: "); var max = int.Parse(Console.ReadLine()); for (int i = 0; i < max; i++) { Console.Write("Corpus type (t=tweet / n=news): "); var type = Console.ReadLine(); Console.Write("Select corpus: "); var corpus = SelectCorpus(); Console.Write(corpus); var cec = CorpusAdapterWriteDirect.Create(corpus); Console.WriteLine("...ok!"); CalculateBasicStatistics(cec); Console.WriteLine("BASIC DONE"); var cluster = GetDateClusters(cec); if (type == "n") { CalculateInfluence(cluster, cec.CorpusDisplayname, "Zeitung"); } if (type == "t") { CalculateInfluence(cluster, cec.CorpusDisplayname, "Absender (Id)"); } CalculateFrequency(cluster, cec.CorpusDisplayname); Console.WriteLine("DONE"); Console.ReadLine(); } }
public AbstractCorpusAdapter Load(string fileName) { return(CorpusAdapterWriteDirect.Create(Path.Combine(_filterBubble.IndexPath, fileName))); }