Example #1
0
        /// <summary>
        /// Loads the lexic resource.
        /// </summary>
        /// <param name="output">The output.</param>
        /// <param name="resourceFilePath">The resource file path.</param>
        public void LoadLexicResource(ILogBuilder output, String resourceFilePath)
        {
            List <String> lines = new List <String>();

            // <---------------------------------------------- [
            if (isLoaded)
            {
                return;
            }
            String pt = "";

            if (!localCache.isNullOrEmpty())
            {
                pt = localCache;
                lines.AddRange(File.ReadLines(localCache));
            }

            if (lines.Count < 100)
            {
                pt    = resourceFilePath;
                lines = new List <string>();
                lines.AddRange(File.ReadAllLines(resourceFilePath));
            }

            Int32  i      = 0;
            Int32  iCycle = lines.Count() / 20;
            Int32  l      = lines.Count();
            Int32  c      = 0;
            Double p      = 0;

            output.logStartPhase("Loading", "Loading the lexic resource - with mode: " + mode.ToString());
            output.log("Start of loading lexic resource [" + pt + "]");
            //   Parallel.ForEach(lines, new ParallelOptions { MaxDegreeOfParallelism=1 }, (line) =>

            Parallel.ForEach(lines, new ParallelOptions {
                MaxDegreeOfParallelism = 1
            }, (line) =>
                             //  Parallel.ForEach(lines, (line) =>
            {
                string inflectForm = "";
                string lemma       = "";
                string gramTag     = "";

                SelectFromLine(line, out inflectForm, out lemma, out gramTag);

                lexicInflection inflect = null;

                if (!inflectForm.isNullOrEmpty())
                {
                    if (!ContainsKey(inflectForm))
                    {
                        inflect                       = new lexicInflection(line);
                        inflect.lemmaForm             = lemma;
                        inflect.name                  = inflectForm;
                        inflect.inflectedForm         = inflectForm;
                        inflect.lexicalDefinitionLine = line;

                        if (spellAlternator.IsInitiated)
                        {
                            String altInflectedForm = spellAlternator.ConvertFromAtoB(inflectForm);
                            spellAlternatives.GetOrAdd(altInflectedForm, inflectForm);
                        }

                        Add(inflectForm, inflect);
                    }
                    else
                    {
                        inflect = base[inflectForm];
                    }

                    lexicGrammarCase gramCase = null;

                    if (mode == textResourceIndexResolveMode.resolveOnLoad)
                    {
                        var gramTagColl = grammTagConverter.ConvertFromString(gramTag);

                        gramCase = inflect.AddGrammarCase(gramTagColl);
                        gramCase.lexicalDefinitionLine = gramTag;
                    }
                    else
                    {
                        gramCase = new lexicGrammarCase();
                        gramCase.lexicalDefinitionLine = gramTag;
                        gramCase.name = "gc" + i.ToString();
                        inflect.Add(gramCase);
                    }

                    // <----------------- construction of Lemma centered dictionary

                    lexicGraphSetWithLemma lxSet = null;

                    if (!registratedLemmaIndex.ContainsKey(lemma))
                    {
                        lock (LemmaIndexLock)
                        {
                            if (!registratedLemmaIndex.ContainsKey(lemma))
                            {
                                lxSet           = new lexicGraphSetWithLemma();
                                lxSet.lemmaForm = lemma;
                                registratedLemmaIndex.TryAdd(lemma, lxSet);
                            }
                        }
                    }

                    lxSet = registratedLemmaIndex[lemma];

                    if (!lxSet.ContainsKey(inflectForm))
                    {
                        lock (SetLock)
                        {
                            if (!lxSet.ContainsKey(inflectForm))
                            {
                                lxSet.TryAdd(inflect.name, inflect);
                            }
                        }
                    }

                    Interlocked.Increment(ref c);
                    Interlocked.Increment(ref i);
                    if (c > iCycle)
                    {
                        lock (loadStatusLock)
                        {
                            if (c > iCycle)
                            {
                                c = 0;
                                p = i.GetRatio(l);
                                output.AppendLine("Done: _" + p.ToString("P2") + "_");
                            }
                        }
                    }
                }
            });

            output.logEndPhase();
            output.log("End of loading process");
            isLoaded = true;
        }
Example #2
0
        /// <summary>
        /// Renders the token into string form
        /// </summary>
        /// <param name="token">The token.</param>
        /// <param name="mode">The mode.</param>
        /// <returns></returns>
        private static String renderString(pipelineTaskSubjectContentToken token, contentTokenSubjectRenderMode mode)
        {
            StringBuilder sb = new StringBuilder();

            if (token == null)
            {
                sb.Append(textMapBase.SEPARATOR);
                return(sb.ToString());
            }

            switch (mode)
            {
            default:
            case contentTokenSubjectRenderMode.currentForm:
                sb.Append(token.currentForm);
                break;

            case contentTokenSubjectRenderMode.lemmaForm:
                if (token.graph != null)
                {
                    sb.Append(token.graph.lemmaForm);
                }
                else
                {
                    sb.Append(token.currentForm);
                }
                break;

            case contentTokenSubjectRenderMode.descriptive:
                sb.Append(token.currentForm);
                if (token.graph != null)
                {
                    sb.Append(textMapBase.MAINLEVEL_COMMA);
                    sb.Append(token.graph.lemmaForm);
                }
                sb.Append(textMapBase.MAINLEVEL_COMMA);
                renderGramCase(sb, token.flagBag, false);
                break;

            case contentTokenSubjectRenderMode.flagsForm:
                renderGramCase(sb, token.flagBag, false);
                break;

            case contentTokenSubjectRenderMode.flagsFullForm:
                renderGramCase(sb, token.flagBag, true);
                break;

            case contentTokenSubjectRenderMode.initialForm:
                sb.Append(token.initialForm);
                break;

            case contentTokenSubjectRenderMode.none:
                break;

            case contentTokenSubjectRenderMode.posTypeAndGramTagForm:
                //sb.Append("[");
                sb.Append(renderString(token, contentTokenSubjectRenderMode.posTypeTagForm));
                //sb.Backspace(textMapBase.SEPARATOR);

                if (token.graph != null)
                {
                    for (int i = 0; i < token.graph.Count(); i++)
                    {
                        lexicGrammarCase pt = token.graph[i] as lexicGrammarCase;

                        renderGramCase(sb, pt.tags.GetTags(), false);

                        if (i < token.graph.Count() - 1)
                        {
                            sb.Append(textMapBase.MAINLEVEL_COMMA);
                        }
                    }
                }

                //sb.Append("]");
                break;

            case contentTokenSubjectRenderMode.posTypeTagForm:

                List <pos_type> posTypeTags = new List <pos_type>();
                Boolean         ok          = false;

                if (token.graph != null)
                {
                    var pst = token.graph.GetTagFromGramTags <pos_type>();
                    foreach (var ps in pst)
                    {
                        posTypeTags.AddUnique(ps);
                    }

                    if (posTypeTags.Any())
                    {
                        ok = true;
                    }
                }

                if (ok == false)
                {
                    var pst = token.flagBag.getAllOfType <pos_type>(false);
                    foreach (var ps in pst)
                    {
                        posTypeTags.AddUnique(ps);
                    }
                }

                if (!posTypeTags.Any())
                {
                    posTypeTags.Add(pos_type.none);
                }

                foreach (pos_type pt in posTypeTags)
                {
                    if (pt != pos_type.none)
                    {
                        sb.Append(pt.ToString());
                        if (pt != posTypeTags.Last())
                        {
                            sb.Append(textMapBase.SUBLEVEL_COMMA);
                        }
                    }
                }

                break;
            }
            sb.Append(" ");
            return(sb.ToString());
        }