Exemple #1
0
        private static void Main(string[] args)
        {
            var ofd = new OpenFileDialog {
                Filter = "CEC6-Korpus|*.cec6", Multiselect = false, CheckFileExists = true
            };

            Console.Write("Select corpus file: ");
            ofd.ShowDialog();

            var corpus = CorpusAdapterWriteDirect.Create(ofd.FileName);

            Console.WriteLine(ofd.FileName);
            Console.WriteLine($"{corpus.CountDocuments} tweets");

            var guids = OriginalTweetFilter.GetOriginalTweets(corpus);

            Console.WriteLine($"{guids.Count} tweets matches all criteria");

            var all       = corpus.ToSelection();
            var selection = all.Create(guids, "");
            var exporter  = new ExporterCsv();

            exporter.Export(selection, ofd.FileName.Replace(".cec6", ".csv"));
            Console.WriteLine("!END!");
            Console.ReadLine();
        }
        protected override void GetExecuteRoute(HttpContext req)
        {
            try
            {
                var er = req.PostData <ExecuteRequest>();
                if (er == null)
                {
                    WriteError(req, Resources.WebErrorInvalidPostData);
                    return;
                }

                var aCheck = Configuration.GetConsoleAction(er.Action);
                if (aCheck == null || !ExecuteActionFilter.Check(er.Action))
                {
                    WriteError(req, Resources.WebErrorActionUnavailable);
                    return;
                }

                if (!File.Exists($"corpora/{er.CorpusId}.cec6"))
                {
                    WriteError(req, Resources.WebErrorCorpusUnavailable);
                    return;
                }

                var corpus = CorpusAdapterWriteDirect.Create($"corpora/{er.CorpusId}.cec6");
                if (corpus == null)
                {
                    WriteError(req, Resources.WebErrorCorpusUnavailable);
                    return;
                }

                var selection = corpus.ToSelection();
                var a         = new ClusterAction();
                var args      = new List <string>
                {
                    "XSGUID::TEXT",
                    er.Action
                };
                if (er.Arguments != null && er.Arguments.Length > 0)
                {
                    args.AddRange(er.Arguments);
                }

                string response;
                using (var ms = new MemoryStream())
                {
                    var writer = Writer.Clone(ms);
                    a.Execute(selection, args.ToArray(), writer);
                    writer.Destroy(false);

                    ms.Seek(0, SeekOrigin.Begin);
                    response = Encoding.UTF8.GetString(ms.ToArray());
                }
                req.Response.Send(response, Mime);
            }
            catch (Exception ex)
            {
                WriteError(req, ex.Message);
            }
        }
        private static void CalculateBasicStatistics(CorpusAdapterWriteDirect corpus)
        {
            var stb = new StringBuilder();

            stb.AppendLine($"Dokumente: {corpus.CountDocuments}");
            stb.AppendLine($"Token: {corpus.CountToken}");
            stb.AppendLine($"Types: {corpus.GetLayerValues("Wort").Count()}");

            var autoren = new HashSet <string>();
            var zeitung = new HashSet <string>();
            var dtMin   = DateTime.MaxValue;
            var dtMax   = DateTime.MinValue;

            var meta = corpus.DocumentMetadata;

            foreach (var doc in meta)
            {
                if (doc.Value.ContainsKey("Autor"))
                {
                    autoren.Add(doc.Value["Autor"]?.ToString());
                }
                if (doc.Value.ContainsKey("Zeitung"))
                {
                    zeitung.Add(doc.Value["Zeitung"]?.ToString());
                }
                if (doc.Value.ContainsKey("Absender (Id)"))
                {
                    autoren.Add(doc.Value["Absender (Id)"]?.ToString());
                }
                if (doc.Value.ContainsKey("Datum") && doc.Value["Datum"] is DateTime)
                {
                    var date = (DateTime)doc.Value["Datum"];
                    if (date == DateTime.MinValue || date == DateTime.MaxValue)
                    {
                        continue;
                    }
                    if (date < dtMin)
                    {
                        dtMin = date;
                    }
                    if (date > dtMax)
                    {
                        dtMax = date;
                    }
                }
            }

            stb.AppendLine($"Autoren: {autoren.Count}");
            stb.AppendLine($"Zeitungen: {zeitung.Count}");
            if (zeitung.Count > _zeitungenMax)
            {
                _zeitungenMax = zeitung.Count;
            }

            stb.AppendLine($"DT-MIN: {dtMin:yyyy-MM-dd}");
            stb.AppendLine($"DT-MAX: {dtMax:yyyy-MM-dd}");
            File.WriteAllText(corpus.CorpusDisplayname + "_basicStat.csv", stb.ToString());
        }
        private static Selection[] GetDateClusters(CorpusAdapterWriteDirect corpus)
        {
            var select       = corpus.ToSelection();
            var blockCluster = select.CreateBlock <SelectionClusterBlock>();

            blockCluster.ClusterGenerator = new SelectionClusterGeneratorDateTimeYearMonthDayOnlyValue();
            blockCluster.MetadataKey      = "Datum";
            blockCluster.Calculate();
            return(blockCluster.GetSelectionClusters().OrderBy(x => x.Displayname).ToArray());
        }
        private void btn_open_Click(object sender, EventArgs e)
        {
            var ofd = new OpenFileDialog {
                Filter = _filter, CheckFileExists = true
            };

            if (ofd.ShowDialog() != DialogResult.OK)
            {
                return;
            }

            _corpus = CorpusAdapterWriteDirect.Create(ofd.FileName);
            CorpusToGrid();
        }
        private static void Main(string[] args)
        {
            Console.Write("How many corpora?: ");
            var max = int.Parse(Console.ReadLine());

            for (int i = 0; i < max; i++)
            {
                Console.Write("Corpus type (t=tweet / n=news): ");
                var type = Console.ReadLine();

                Console.Write("Select corpus: ");
                var corpus = SelectCorpus();
                Console.Write(corpus);

                var cec = CorpusAdapterWriteDirect.Create(corpus);
                Console.WriteLine("...ok!");

                CalculateBasicStatistics(cec);
                Console.WriteLine("BASIC DONE");

                var cluster = GetDateClusters(cec);

                if (type == "n")
                {
                    CalculateInfluence(cluster, cec.CorpusDisplayname, "Zeitung");
                }
                if (type == "t")
                {
                    CalculateInfluence(cluster, cec.CorpusDisplayname, "Absender (Id)");
                }

                CalculateFrequency(cluster, cec.CorpusDisplayname);

                Console.WriteLine("DONE");
                Console.ReadLine();
            }
        }
 public AbstractCorpusAdapter Load(string fileName)
 {
     return(CorpusAdapterWriteDirect.Create(Path.Combine(_filterBubble.IndexPath, fileName)));
 }