Ejemplo n.º 1
0
        public IActionResult OnPostTag(string sequenceOfParallelTokens)
        {
            var parallels = sequenceOfParallelTokens.Trim().Split("[[Delete]]").Where(parallel => parallel != "").ToList();
            ParallelDocument parallelSubcorpus         = null;
            string           parallelSubcorpusFilePath = null;

            foreach (var parallel in parallels)
            {
                var sequencesSplit = parallel.Split("};").Where(token => token != "").ToList();
                var sequencesIDs   = sequencesSplit.Select(x => x.Trim().Trim('{').Split(" - ")[0]).ToList();
                if (parallelSubcorpus is null)
                {
                    string        documentID                  = sequencesIDs[0].Split('|')[0];
                    var           dirParallelCorpus           = Path.Combine(_environment.ContentRootPath, "database", "parallelizedDocuments");
                    DirectoryInfo directoryParallelCorpusInfo = new DirectoryInfo(dirParallelCorpus);
                    parallelSubcorpusFilePath = directoryParallelCorpusInfo.GetFiles().Where(f => f.Name.Split('_')[0] == documentID).FirstOrDefault().FullName;
                    using (StreamReader r = new StreamReader(new FileStream(parallelSubcorpusFilePath, FileMode.Open)))
                    {
                        parallelSubcorpus = JsonConvert.DeserializeObject <ParallelDocument>(r.ReadToEnd());
                    }
                    parallelSubcorpus.parallelTokens = new List <ParallelToken>();
                }
                var token2Add = new ParallelToken();
                foreach (var token in sequencesIDs)
                {
                    Console.WriteLine(token);
                    var splitID = token.Split('|').Where(x => x != "").ToList();
                    RealizationGroup currentGroup;
                    if (token2Add.Where(g => g.documentID == splitID[0] && g.textID == splitID[2] && g.clauseID == splitID[1]).ToList().Count == 0)
                    {
                        currentGroup            = new RealizationGroup();
                        currentGroup.documentID = splitID[0];
                        currentGroup.textID     = splitID[2];
                        currentGroup.clauseID   = splitID[1];
                        token2Add.Add(currentGroup);
                    }
                    currentGroup = token2Add.Where(g => g.documentID == splitID[0] && g.textID == splitID[2] && g.clauseID == splitID[1]).FirstOrDefault();

                    var singleToken = parallelSubcorpus.parallelClauses[Convert.ToInt32(splitID[2]), Convert.ToInt32(splitID[1])].clause.realizations.Where(r => r.realizationID == splitID[3]).FirstOrDefault();
                    singleToken.documentID = splitID[0];
                    singleToken.textID     = splitID[2];
                    singleToken.clauseID   = splitID[1];
                    if (!currentGroup.Contains(singleToken))
                    {
                        currentGroup.Add(singleToken);
                    }
                }
                if (!parallelSubcorpus.parallelTokens.Contains(token2Add))
                {
                    parallelSubcorpus.parallelTokens.Add(token2Add);
                }
            }
            if (!(parallelSubcorpus is null) && !(parallelSubcorpusFilePath is null))
            {
                var parallelSubcorpusInJSON = parallelSubcorpus.Jsonize();
                using (StreamWriter w = new StreamWriter(new FileStream(parallelSubcorpusFilePath, FileMode.Create)))
                {
                    w.Write(parallelSubcorpusInJSON);
                }
            }
            return(RedirectToPage());
        }
        public void OnPostParallel()
        {
            var files     = new DirectoryInfo(Path.Combine(_environment.ContentRootPath, "database", "parallelizedDocuments")).GetFiles();
            var documents = new List <ParallelDocument>();

            if (files.Length > 0)
            {
                for (int f = 0; f < files.Length; f++)
                {
                    using (StreamReader r = new StreamReader(new FileStream(files[f].FullName, FileMode.Open, FileAccess.Read)))
                    {
                        documents.Add(JsonConvert.DeserializeObject <ParallelDocument>(r.ReadToEnd()));
                    }
                }
                List <string> lemmata = new List <string>();
                chosenTexts.ForEach(x =>
                {
                    var docName        = x.Split(": ")[0];
                    var textID         = x.Split(": ")[1].Split(" (")[1].Split(')')[0];
                    var currentLemmata = documents
                                         .Where(d => d.name == docName)
                                         .SelectMany(d => d.parallelTokens)
                                         .SelectMany(t => t.ToList())
                                         .SelectMany(rg => rg.ToList())
                                         .Where(r => r.clauseID == textID)
                                         .Select(r => r.realizationFields is null ? r.lexemeTwo : r.realizationFields.Any(f => f.ContainsKey("Lemma")) ? r.realizationFields.Where(t => t.ContainsKey("Lemma")).SelectMany(t => t["Lemma"]).Select(v => v.name).FirstOrDefault() : r.lexemeTwo)
                                         .Distinct()
                                         .ToList();
                    currentLemmata.ForEach(l => lemmata.Add(l));
                });
                lemmata = lemmata.Distinct().ToList();
                var finalDictionary = new List <ParallelDictionaryUnit>();
                lemmata.ForEach(lemma => finalDictionary.Add(new ParallelDictionaryUnit(lemma, documents
                                                                                        .SelectMany(d => d.parallelTokens)
                                                                                        .Where(t => t.Any(rg => rg.Any(r => r.lexemeTwo == lemma || (r.realizationFields != null ? (r.realizationFields.Any(f => f.ContainsKey("Lemma")) ? r.realizationFields.Where(f => f.ContainsKey("Lemma")).SelectMany(kvp => kvp["Lemma"]).Any(v => v.name == lemma) : false) : false))))
                                                                                        .ToList())));
                finalDictionary = finalDictionary.OrderBy(unit => unit.lemma).ToList();
                if (parallelDictsToFiles == true)
                {
                    string path = Path.Combine(_environment.ContentRootPath, "database");
                    if (!Directory.Exists(path))
                    {
                        Directory.CreateDirectory(path);
                    }
                    path = Path.Combine(path, "parallelDictionary");
                    if (!Directory.Exists(path))
                    {
                        Directory.CreateDirectory(path);
                    }
                    using (StreamWriter w = new StreamWriter(new FileStream(Path.Combine(path, Regex.Replace(string.Join("_", chosenTexts), @"[*""><:/\\|?\.,]", "") + "_parallelDict.json"), FileMode.CreateNew, FileAccess.Write)))
                    {
                        for (int u = 0; u < finalDictionary.Count; u++)
                        {
                            w.Write(JsonConvert.SerializeObject(finalDictionary[u], Formatting.Indented));
                        }
                    }
                }
                foreach (var unit in finalDictionary)
                {
                    string output = "<b>" + unit.lemma + "</b>";
                    output += " (" + unit.realizations.Count.ToString() + ")<br />";
                    output += "<ul>";
                    foreach (var token in unit.realizations)
                    {
                        RealizationGroup coreGroup       = null;
                        Realization      coreRealization = null;
                        foreach (var rg in token)
                        {
                            foreach (var realization in rg)
                            {
                                if (realization.realizationFields != null)
                                {
                                    if (realization.realizationFields.Any(f => f.ContainsKey("Lemma")))
                                    {
                                        if (realization.realizationFields.Where(f => f.ContainsKey("Lemma")).SelectMany(kvp => kvp["Lemma"]).Any(v => v.name == unit.lemma))
                                        {
                                            coreGroup       = rg;
                                            coreRealization = realization;
                                            break;
                                        }
                                    }
                                }
                                if (realization.lexemeTwo == unit.lemma)
                                {
                                    coreGroup       = rg;
                                    coreRealization = realization;
                                    break;
                                }
                            }
                        }
                        output += "<li>";
                        var currentDocument = documents.Where(d => d.id == coreRealization.documentID).Single();
                        output += coreRealization.lexemeTwo + " (" + String.Join(' ', coreGroup.Select(r => r.lexemeTwo)) + ", " + currentDocument.name + " - " + currentDocument.parallelClauses[Convert.ToInt32(coreRealization.textID), Convert.ToInt32(coreRealization.clauseID)].textName + "); <span class=\"clause\" id=\"clauseExtractionButton\" clause=\"clause: " + currentDocument.parallelClauses[Convert.ToInt32(coreRealization.textID), Convert.ToInt32(coreRealization.clauseID)].clause.clauseText + "\">see text segment</span><br />";
                        output += ((coreRealization.realizationFields != null && coreRealization.realizationFields.Count > 0) ? String.Join(" - ", coreRealization.realizationFields.SelectMany(t => t).Where(a => a.Key != "Lemma").Select(a => String.Join("", a.Value.SelectMany(v => v.name).ToList())).Distinct().ToList()) + "<br />" : "");
                        output += coreRealization.letters.Any(l => !(l.graphemeFields is null) && l.graphemeFields.Count > 0) ? ("<ul class=\"graphemeFeatures\">" + String.Join("", coreRealization.letters.Select(l => ((l.graphemeFields != null && l.graphemeFields.Count > 0) ? "<li>" + l.grapheme + ": " + String.Join(" - ", l.graphemeFields.SelectMany(t => t).Select(a => a.Key + "(" + String.Join("", a.Value.SelectMany(v => v.name).ToList()) + ")").Distinct().ToList()) + "<br />" : ""))) + "</ul>") : "";
                        output += "<ul class=\"parallels\"><li>" + String.Join(";<br/><li>", token.GetParallels(coreGroup).Select(rg => String.Join(' ', rg.Select(r => r.lexemeTwo).ToList()) + "(" + currentDocument.name + " - " + currentDocument.parallelClauses[Convert.ToInt32(rg[0].textID), Convert.ToInt32(rg[0].clauseID)].textName + ")" + "; <span class=\"clause\" id=\"clauseExtractionButton\" clause=\"text segment:\r" + String.Join('\r', rg.Select(r => r.clauseID).Distinct().ToList().OrderBy(id => Convert.ToInt32(id)).Select(id => currentDocument.parallelClauses[Convert.ToInt32(rg.textID), Convert.ToInt32(id)].clause.clauseText)) + "\">see text segment</span>")) + "</ul></li>";
                    }
                    output += "</ul><br /><br />";
                    convertedTexts.Add(output);
                }
            }
        }