/// <summary> /// Loads the lexic resource. /// </summary> /// <param name="output">The output.</param> /// <param name="resourceFilePath">The resource file path.</param> public void LoadLexicResource(ILogBuilder output, String resourceFilePath) { List <String> lines = new List <String>(); // <---------------------------------------------- [ if (isLoaded) { return; } String pt = ""; if (!localCache.isNullOrEmpty()) { pt = localCache; lines.AddRange(File.ReadLines(localCache)); } if (lines.Count < 100) { pt = resourceFilePath; lines = new List <string>(); lines.AddRange(File.ReadAllLines(resourceFilePath)); } Int32 i = 0; Int32 iCycle = lines.Count() / 20; Int32 l = lines.Count(); Int32 c = 0; Double p = 0; output.logStartPhase("Loading", "Loading the lexic resource - with mode: " + mode.ToString()); output.log("Start of loading lexic resource [" + pt + "]"); // Parallel.ForEach(lines, new ParallelOptions { MaxDegreeOfParallelism=1 }, (line) => Parallel.ForEach(lines, new ParallelOptions { MaxDegreeOfParallelism = 1 }, (line) => // Parallel.ForEach(lines, (line) => { string inflectForm = ""; string lemma = ""; string gramTag = ""; SelectFromLine(line, out inflectForm, out lemma, out gramTag); lexicInflection inflect = null; if (!inflectForm.isNullOrEmpty()) { if (!ContainsKey(inflectForm)) { inflect = new lexicInflection(line); inflect.lemmaForm = lemma; inflect.name = inflectForm; inflect.inflectedForm = inflectForm; inflect.lexicalDefinitionLine = line; if (spellAlternator.IsInitiated) { String altInflectedForm = spellAlternator.ConvertFromAtoB(inflectForm); spellAlternatives.GetOrAdd(altInflectedForm, inflectForm); } Add(inflectForm, inflect); } else { inflect = base[inflectForm]; } lexicGrammarCase gramCase = null; if (mode == textResourceIndexResolveMode.resolveOnLoad) { var gramTagColl = grammTagConverter.ConvertFromString(gramTag); gramCase = inflect.AddGrammarCase(gramTagColl); gramCase.lexicalDefinitionLine = gramTag; } else { gramCase = new lexicGrammarCase(); gramCase.lexicalDefinitionLine = gramTag; gramCase.name = "gc" + i.ToString(); inflect.Add(gramCase); } // <----------------- construction of Lemma centered dictionary lexicGraphSetWithLemma lxSet = null; if (!registratedLemmaIndex.ContainsKey(lemma)) { lock (LemmaIndexLock) { if (!registratedLemmaIndex.ContainsKey(lemma)) { lxSet = new lexicGraphSetWithLemma(); lxSet.lemmaForm = lemma; registratedLemmaIndex.TryAdd(lemma, lxSet); } } } lxSet = registratedLemmaIndex[lemma]; if (!lxSet.ContainsKey(inflectForm)) { lock (SetLock) { if (!lxSet.ContainsKey(inflectForm)) { lxSet.TryAdd(inflect.name, inflect); } } } Interlocked.Increment(ref c); Interlocked.Increment(ref i); if (c > iCycle) { lock (loadStatusLock) { if (c > iCycle) { c = 0; p = i.GetRatio(l); output.AppendLine("Done: _" + p.ToString("P2") + "_"); } } } } }); output.logEndPhase(); output.log("End of loading process"); isLoaded = true; }
/// <summary> /// Renders the token into string form /// </summary> /// <param name="token">The token.</param> /// <param name="mode">The mode.</param> /// <returns></returns> private static String renderString(pipelineTaskSubjectContentToken token, contentTokenSubjectRenderMode mode) { StringBuilder sb = new StringBuilder(); if (token == null) { sb.Append(textMapBase.SEPARATOR); return(sb.ToString()); } switch (mode) { default: case contentTokenSubjectRenderMode.currentForm: sb.Append(token.currentForm); break; case contentTokenSubjectRenderMode.lemmaForm: if (token.graph != null) { sb.Append(token.graph.lemmaForm); } else { sb.Append(token.currentForm); } break; case contentTokenSubjectRenderMode.descriptive: sb.Append(token.currentForm); if (token.graph != null) { sb.Append(textMapBase.MAINLEVEL_COMMA); sb.Append(token.graph.lemmaForm); } sb.Append(textMapBase.MAINLEVEL_COMMA); renderGramCase(sb, token.flagBag, false); break; case contentTokenSubjectRenderMode.flagsForm: renderGramCase(sb, token.flagBag, false); break; case contentTokenSubjectRenderMode.flagsFullForm: renderGramCase(sb, token.flagBag, true); break; case contentTokenSubjectRenderMode.initialForm: sb.Append(token.initialForm); break; case contentTokenSubjectRenderMode.none: break; case contentTokenSubjectRenderMode.posTypeAndGramTagForm: //sb.Append("["); sb.Append(renderString(token, contentTokenSubjectRenderMode.posTypeTagForm)); //sb.Backspace(textMapBase.SEPARATOR); if (token.graph != null) { for (int i = 0; i < token.graph.Count(); i++) { lexicGrammarCase pt = token.graph[i] as lexicGrammarCase; renderGramCase(sb, pt.tags.GetTags(), false); if (i < token.graph.Count() - 1) { sb.Append(textMapBase.MAINLEVEL_COMMA); } } } //sb.Append("]"); break; case contentTokenSubjectRenderMode.posTypeTagForm: List <pos_type> posTypeTags = new List <pos_type>(); Boolean ok = false; if (token.graph != null) { var pst = token.graph.GetTagFromGramTags <pos_type>(); foreach (var ps in pst) { posTypeTags.AddUnique(ps); } if (posTypeTags.Any()) { ok = true; } } if (ok == false) { var pst = token.flagBag.getAllOfType <pos_type>(false); foreach (var ps in pst) { posTypeTags.AddUnique(ps); } } if (!posTypeTags.Any()) { posTypeTags.Add(pos_type.none); } foreach (pos_type pt in posTypeTags) { if (pt != pos_type.none) { sb.Append(pt.ToString()); if (pt != posTypeTags.Last()) { sb.Append(textMapBase.SUBLEVEL_COMMA); } } } break; } sb.Append(" "); return(sb.ToString()); }