/// <summary>
        /// Describes the specified logger.
        /// </summary>
        /// <param name="logger">The logger.</param>
        public void Describe(ILogBuilder logger)
        {
            logger.AppendLine("Feature Weighting model");

            if (DoUseLocalFunction)
            {
                logger.AppendLine("Local weight model");
                logger.nextTabLevel();
                LocalFunction.Describe(logger);
                logger.prevTabLevel();
            }


            if (DoUseLocalFunction)
            {
                logger.AppendLine("Global weight model(s)");
            }

            logger.nextTabLevel();
            foreach (var lf in GlobalFactors)
            {
                lf.Describe(logger);
            }
            logger.prevTabLevel();
        }
Ejemplo n.º 2
0
        protected override void stageExecute(ILogBuilder response)
        {
            foreach (string word in state.entryList)
            {
                // <------------------------------------------------------------------------------------------------ Exploration


                termExploreModel output = null;
                if (state.verbose)
                {
                    output = termExploreProcedures.exploreWithUnitex(word, response);
                }
                else
                {
                    output = termExploreProcedures.exploreWithUnitex(word, null);
                }

                if (output.wasExploreFailed)
                {
                    if (state.debug)
                    {
                        response.consoleAltColorToggle();
                        response.AppendLine("--- running debug search for [" + word + "]");
                        var exp = languageManagerUnitex.manager.operatorDelaf.Search(word, false, 25);
                        exp.ToString(response, true);

                        string debugLines = exp.ToString();
                        string debugPath  = semanticLexiconManager.manager.constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].pathFor(word + "_failDebug.txt");
                        debugLines.saveStringToFile(debugPath, getWritableFileMode.overwrite);

                        response.consoleAltColorToggle();
                    }
                    state.shadowBuffer.Add(word);
                    state.failedBuffer.Add(word);
                }
                else
                {
                    if (state.saveModel)
                    {
                        semanticLexiconManager.manager.constructor.saveTermModel(output);
                    }
                    state.shadowBuffer.Add(word);
                    state.shadowBuffer.AddRangeUnique(output.GetShadow());
                    try
                    {
                        semanticLexiconManager.manager.constructor.addTermModelToLexicon(output);
                        response.AppendLine("Lexicon update: Lemma [" + output.lemma.inputForm + "][" + output.instances.Count() + "]");
                        state.processedBuffer.Add(output.lemma.inputForm);
                    }
                    catch (Exception ex)
                    {
                        state.failedBuffer.Add(word);
                        response.AppendLine("Lexicon term update failed for [" + word + "][" + output.lemmaForm + "]");
                        output.ToString(response, true);
                    }
                }
            }
        }
Ejemplo n.º 3
0
        public static termExploreModelSet exploreWithApertiumAndWordnet(string word, ILogBuilder response)
        {
            //List<TermLemma> lemmas = semanticLexiconManager.manager.resolve(word);

            termExploreModelSet outset = semanticLexiconManager.manager.constructor.loadTermModels(word);

            response.AppendLine("term[" + word + "]->models[" + Enumerable.Count(outset) + "]");

            if (Enumerable.Count(outset) == 0)
            {
                outset.missingLemmas.Add(word);
                response.AppendLine("term[" + word + "]->missingLemma[]");
                return(outset);
            }

            int c = 0;

            foreach (termExploreModel model in outset)
            {
                var result = languageManagerApertium.manager.queryForSynonyms(model.lemma.inputForm, apertiumDictQueryScope.exact);
                var srb    = result.GetNativeWords();
                var eng    = result.GetTranslatedWords();
                model.translations.AddRange(eng);
                model.synonyms.AddRange(srb);
                response.AppendLine("term[" + word + "]->model[" + c.ToString() + "]->lemma[" + model.lemma.inputForm + "] --> Apertium.dic ==> srb[" + srb.Count() + "] eng[" + eng.Count() + "]");
                // <-- wordnet

                wordnetSymsetResults wordnet_res = languageManagerWordnet.manager.query_eng(model.translations, response);

                model.wordnetSecondarySymsets.AddRange(wordnet_res.GetKeys());

                wordnetSymsetResults wordnet_2nd = languageManagerWordnet.manager.query_eng_symset(model.wordnetSecondarySymsets, response);

                model.wordnetSynonyms.AddRange(wordnet_2nd.GetEnglish());



                var synTrans = languageManagerApertium.manager.query(model.wordnetSynonyms, apertiumDictQueryScope.exact, apertiumDictNeedleSide.translated);

                model.wordnetSynonymSerbian.AddRange(synTrans.GetNativeWords());

                response.AppendLine("WordNet(" + eng.Count() + ") ==> synsets[" + Enumerable.Count(model.wordnetSecondarySymsets) + "]  synEng[" + Enumerable.Count(model.wordnetSynonyms) + "] ==> synSrb[" + Enumerable.Count(model.wordnetSynonymSerbian) + "]");

                semanticLexiconManager.manager.constructor.saveTermModel(model);

                c++;
            }


            //termExploreModel output = semanticLexiconManager.manager.constructor.mode
            return(outset);
        }
 public override void Describe(ILogBuilder logger)
 {
     if (IsEnabled)
     {
         logger.AppendLine("Document filter enabled");
         logger.AppendPair("Ranking function", function.GetType().Name, true, "\t\t\t");
         logger.AppendPair("Select top", limit, true, "\t\t\t");
     }
     else
     {
         logger.AppendLine("Document filter disabled");
     }
 }
        /// <summary>
        /// Reduces the document set.
        /// </summary>
        /// <param name="docSet">The document set - web site.</param>
        /// <param name="settings">The settings.</param>
        /// <param name="logger">The logger.</param>
        /// <returns>Rate of reduction</returns>
        public Double ReduceDocumentSet(WebSiteDocuments docSet, HtmlDocumentReductionSettings settings, ILogBuilder logger)
        {
            Int32 input  = 0;
            Int32 output = 0;

            foreach (WebSiteDocument document in docSet.documents)
            {
                input += document.HTMLSource.Length;

                String newHtml = ReduceDocument(document.HTMLSource, settings, logger);

                output += newHtml.Length;


                document.HTMLSource = newHtml;
            }

            Double reduction = output.GetRatio(input);

            if (settings.logSiteLevel)
            {
                logger.AppendLine("[" + docSet.domain + "] reduced to: " + reduction.ToString("P2"));
            }

            return(reduction);
        }
        public void Describe(ILogBuilder logger)
        {
            if (logger != null)
            {
                logger.AppendPair("Stemmer", stemmer.GetType().Name, true, "\t\t\t");

                logger.AppendPair("Tokenizer", tokenizer.GetType().Name, true, "\t\t\t");

                if (transliteration != null)
                {
                    logger.AppendPair("Transliteration", transliteration.lang_A_id + "-" + transliteration.lang_B_id, true, "\t\t\t");
                }
                else
                {
                    logger.AppendPair("Transliteration", "Disabled", true, "\t\t\t");
                }

                filter.Describe(logger);

                weightModel.Describe(logger);

                if (constructorSettings != null)
                {
                    constructorSettings.Describe(logger);
                }
                else
                {
                    logger.AppendLine("Vector model constructor - not set");
                }
            }
        }
Ejemplo n.º 7
0
 public void Describe(ILogBuilder logger)
 {
     logger.AppendLine("Factor [" + GlobalFunction.shortName + "] W[" + weight.ToString("F2") + "]");
     logger.nextTabLevel();
     GlobalFunction.Describe(logger);
     logger.prevTabLevel();
 }
Ejemplo n.º 8
0
        public virtual void report(ILogBuilder output)
        {
            output.AppendLine("Node - click depth[" + clickDepth + "] - links[" + links.Count + "]");

            reportLink(output);

            links.reportCollection(output);
        }
 /// <summary>
 /// Describes the specified logger.
 /// </summary>
 /// <param name="logger">The logger.</param>
 public override void Describe(ILogBuilder logger)
 {
     if (instructions.Any())
     {
         logger.AppendLine("Rendering instructions:");
         Int32 c = 1;
         foreach (var inst in instructions)
         {
             logger.AppendLine("[" + c.ToString("D2") + "] " + inst.name + " Code=[" + inst.code + "] Flags=[" + inst.instructionFlags.ToString() + "]");
             c++;
         }
     }
     else
     {
         logger.AppendLine("No rendering instructions");
     }
 }
Ejemplo n.º 10
0
        /// <summary>
        /// Logs a multiline description of the gramCaseSet
        /// </summary>
        /// <param name="log">The log.</param>
        public void ToString(ILogBuilder log, bool expanded = false)
        {
            //StringBuilder sb = new StringBuilder();
            log.AppendLine("Grammatical sets");
            int c = 0;

            foreach (KeyValuePair <string, gramFlags> gf in items)
            {
                log.AppendLine("[" + c + "] " + gf.Key);
                if (expanded)
                {
                    log.AppendLine(gf.Value.ToString(!expanded));
                    log.AppendLine("");
                }
                c++;
            }
        }
        /// <summary>
        /// Describes the cross validation settings
        /// </summary>
        /// <param name="logger">The logger.</param>
        public void Describe(ILogBuilder logger)
        {
            logger.AppendLine("--- k-fold crossvalidation settings---");

            logger.AppendPair("Number of folds [k]", K, true, "\t\t\t");
            logger.AppendPair("Training folds [t]", TrainingFolds, true, "\t\t\t");
            logger.AppendPair("Fold randomization", randomFolds.ToString(), true, "\t\t\t");
        }
Ejemplo n.º 12
0
 /// <summary>
 /// Logs a multiline description of the gramCaseSet
 /// </summary>
 /// <param name="log">The log.</param>
 public void ToString(ILogBuilder log, string itemName = "Instance", bool expanded = false)
 {
     //StringBuilder sb = new StringBuilder();
     log.AppendLine(itemName + ": " + inputForm);
     log.consoleAltColorToggle();
     gramSet.ToString(log, expanded);
     log.consoleAltColorToggle();
 }
Ejemplo n.º 13
0
        /// <summary>
        /// Creates message collections
        /// </summary>
        /// <param name="loger">The loger.</param>
        /// <param name="workspace">The workspace.</param>
        /// <param name="showMessage">if set to <c>true</c> [show message].</param>
        /// <returns></returns>
        public List <emailMessageCollection> createCollections(ILogBuilder loger, folderNode workspace, Boolean showMessage = true)
        {
            List <emailMessageCollection> output = new List <emailMessageCollection>();

            stringTemplate subject = new stringTemplate(subjectTemplate);

            stringTemplate content = new stringTemplate(contentTemplate);

            stringTemplate address = new stringTemplate(addressTemplate);

            emailMessageCollection messageCollection = new emailMessageCollection();

            messageCollection.name = "block01";
            output.Add(messageCollection);

            foreach (DataRow row in dataSource.Rows)
            {
                emailMessage message = new emailMessage();
                message.from    = fromAddress;
                message.content = content.applyToContent(row);
                message.subject = subject.applyToContent(row);
                message.address = address.applyToContent(row);

                if (showMessage)
                {
                    loger.AppendLine("Subject: " + message.subject);
                    loger.AppendLine("Content: " + message.content);
                }

                messageCollection.Add(message);
                if (messageCollection.Count >= collectionSize)
                {
                    messageCollection.Save(workspace.pathFor(messageCollection.name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Block of messages", true));
                    messageCollection      = new emailMessageCollection();
                    messageCollection.name = "block" + output.Count.ToString("D2");
                    output.Add(messageCollection);
                    loger.log("Message block created: " + messageCollection.name);
                }
            }

            messageCollection.Save(workspace.pathFor(messageCollection.name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Block of messages", true));

            return(output);
        }
Ejemplo n.º 14
0
        public void Describe(ILogBuilder logger)
        {
            if (IsEnabled)
            {
                logger.AppendLine("Feature Selection");
                logger.AppendPair("Limit", limit, true, "\t\t\t");

                logger.AppendLine("Ranking method for n-dimensional Feature Weights: " + nVectorValueSelectionOperation.ToString());


                logger.AppendPair("Function", WeightModel.GetSignature(), true, "\t\t\t");

                WeightModel.Describe(logger);
            }
            else
            {
                logger.AppendLine("Feature Selection method is disabled");
            }
        }
Ejemplo n.º 15
0
        /// <summary>
        /// To the string.
        /// </summary>
        /// <param name="loger">The loger.</param>
        /// <param name="expanded">if set to <c>true</c> [expanded].</param>
        public void ToString(ILogBuilder loger, bool expanded = false, bool showInstances = true)
        {
            if (loger == null)
            {
                return;
            }

            loger.AppendLine("Input term: " + inputForm);
            if (!rootWord.isNullOrEmpty())
            {
                loger.log("Root: " + rootWord);
            }

            if (lemma != null)
            {
                lemma.ToString(loger, "Lemma");
            }
            if (showInstances)
            {
                foreach (termExploreItem sug in instances)
                {
                    sug.ToString(loger, "Instance", expanded);
                }
            }

            if (synonyms.Any())
            {
                loger.AppendLine("Related terms: " + synonyms.Join(','));
            }
            if (wordnetSynonyms.Any())
            {
                loger.AppendLine("Not accepted: " + wordnetSynonyms.Join(','));
            }


            if (translations.Any())
            {
                loger.AppendLine("Translations: " + translations.Join(','));
            }
            if (translationRelated.Any())
            {
                loger.AppendLine("Not accepted: " + translationRelated.Join(','));
            }


            // if (wordnetSynonymSerbian.Any()) loger.AppendLine("WordNet translated: " + wordnetSynonymSerbian.Join(','));
            if (wordnetPrimarySymsets.Any())
            {
                loger.AppendLine("Symsets: " + wordnetPrimarySymsets.Join(','));
            }
            if (wordnetSecondarySymsets.Any())
            {
                loger.AppendLine("Not accepted: " + wordnetSecondarySymsets.Join(','));
            }
        }
        public override void Describe(ILogBuilder logger)
        {
            constructor.Describe(logger);

            if (!ExportClassifierMemory.isNullOrEmpty())
            {
                logger.AppendLine("Export memory to: " + ExportClassifierMemory);
            }

            if (classifierSettings != null)
            {
            }
        }
        protected void DescribeDimensions(ILogBuilder logger, List <dimensionSpecification> dimensions, string heading)
        {
            if (dimensions.Any())
            {
                logger.AppendLine(heading);
                logger.nextTabLevel();

                for (int i = 0; i < labelDimensions.Count; i++)
                {
                    dimensionSpecification ds = labelDimensions[i];
                    logger.AppendPair("[" + i.ToString("D2") + "]", ds.functionName, true, "\t\t\t");
                }

                logger.prevTabLevel();
            }
        }
Ejemplo n.º 18
0
        /// <summary>
        /// Describes the unit via specified loger
        /// </summary>
        /// <param name="loger">The loger.</param>
        public void describe(ILogBuilder loger)
        {
            //            loger.log("deliveryUnit describe() call started");

            loger.AppendHeading("Delivery unit (" + GetType().Name + ")", 2);

            loger.AppendLine("Logical name: " + name);

            loger.open("items", "Delivery items", "List of all deliveryUnit items contained here");
            foreach (IDeliveryUnitItem item in items)
            {
                //loger.AppendHeading(this.name + " (" + this.GetType().Name + ")", 3);

                loger.AppendLine(" > " + item.name + ":" + item.itemType.ToString());
                loger.AppendLine(" > > Location: " + item.location.ToString());
                loger.AppendLine(" > > Description: " + item.description);
            }
            loger.close();

            loger.open("items", "Items by level", "Showing items triggered by scope level");
            reportElementLevel lev = reportElementLevel.none;

            foreach (KeyValuePair <reportElementLevel, List <deliveryUnitItem> > pair in itemByLevel)
            {
                lev = pair.Key;
                foreach (deliveryUnitItem it in pair.Value)
                {
                    loger.AppendLine(lev.ToString() + " --> " + it.name + " (" + it.GetType().Name + ")");
                }
            }
            loger.close();

            loger.open("items", "Output by level", "Showing items designated as output items and triggered by scope level");
            foreach (KeyValuePair <reportElementLevel, List <deliveryUnitItem> > pair in outputByLevel)
            {
                lev = pair.Key;
                foreach (deliveryUnitItem it in pair.Value)
                {
                    loger.AppendLine(lev.ToString() + " --> " + it.name + " (" + it.GetType().Name + ")");
                }
            }
            loger.close();

            //  loger.log("deliveryUnit describe() call finished");
        }
Ejemplo n.º 19
0
        /// <summary>
        /// Generates a compiled version of TF-IDF table. <see cref="weightTableCompiled"/>
        /// </summary>
        /// <param name="loger">The loger - for diagnostics</param>
        /// <returns></returns>
        public weightTableCompiled GetCompiledTable(ILogBuilder loger = null)
        {
            weightTableCompiled output = new weightTableCompiled(name);

            int    ti       = 0;
            int    ts       = 10;
            int    c        = 0;
            int    tc       = Count();
            int    input_c  = 0;
            int    output_c = 0;
            double io_r     = 0;

            updateMaxValues();

            foreach (IWeightTableTerm t in terms.Values)
            {
                double tp = ti.GetRatio(tc);

                weightTableTermCompiled cterm = GetCompiledTerm(t); //output.Add(t, GetAFreq(t.nominalForm)) as weightTableTermCompiled;

                output.AddOrUpdate(cterm);

                if (c > 10)
                {
                    c    = 0;
                    io_r = input_c.GetRatio(output_c);
                    if (loger != null)
                    {
                        loger.AppendLine("TF-IDF [" + name + "] table compiled [" + tp.ToString("P2") + "]");
                    }
                }
            }

            output.updateMaxValues();

            return(output);
        }
Ejemplo n.º 20
0
        /// <summary>
        /// Method: word -- translation --- synset ---- other synsets --- collecting all words --- translation --- word
        /// </summary>
        /// <param name="model">The model.</param>
        /// <param name="response">The response.</param>
        /// <param name="disableCodePrefixFilter">if set to <c>true</c> [disable code prefix filter].</param>
        /// <param name="disableCodeBranchFilter">if set to <c>true</c> [disable code branch filter].</param>
        /// <returns></returns>
        public static termExploreModel getSynonymsWithWordnetViaApertium(termExploreModel model, ILogBuilder response, bool disableCodePrefixFilter = false, bool disableCodeBranchFilter = false)
        {
            tokenGraph result = model.graph;


            result = languageManagerApertium.manager.queryForGraph(model.lemma.inputForm, apertiumDictQueryScope.exact);
            model.translations.AddRange(result.getAllLeafs().getNames());


            if (response != null)
            {
                response.consoleAltColorToggle();
                string rst = result.ToStringTreeview();

                response.Append(rst);
                response.consoleAltColorToggle();
            }

            languageManagerWordnet.manager.queryWithGraph(result, response, WordnetSource.english, WordnetQueryType.getSymsetCodesByWord);


            if (response != null)
            {
                response.consoleAltColorToggle();
                string st = result.ToStringTreeview();

                response.Append(st);
                response.consoleAltColorToggle();
            }

            model.wordnetSecondarySymsets.AddRange(result.getAllLeafs().getDeepest().getNames());

            if (!disableCodePrefixFilter)
            {
                string codeStart    = model.lemma.gramSet.getPosType().GetWordNetCodeStart().ToString();
                Regex  codeCriteria = new Regex("^" + codeStart + "");

                var badCodes = result.getAllLeafs(codeCriteria, true);

                if (response != null)
                {
                    response.AppendHorizontalLine();
                    response.AppendLine("Reducing to proper codes [" + codeStart + "]->filtered-out[" + badCodes.Count() + "]");
                }

                badCodes.removeFromParent();

                if (response != null)
                {
                    response.consoleAltColorToggle();
                    string rst = result.ToStringTreeview();

                    response.Append(rst);
                    response.consoleAltColorToggle();
                }
            }


            model.wordnetPrimarySymsets.AddRange(result.getAllLeafs().getDeepest().getNames());

            languageManagerWordnet.manager.queryWithGraph(result, response, WordnetSource.english, WordnetQueryType.getWordsBySymsetCode);

            model.translationRelated.AddRange(result.getAllLeafs().getDeepest().getNames());


            if (response != null)
            {
                response.AppendHorizontalLine();
                response.AppendLine("Getting English words by symsetcodes via WordNet");

                response.consoleAltColorToggle();
                string rst = result.ToStringTreeview();

                response.Append(rst);
                response.consoleAltColorToggle();
            }

            languageManagerApertium.manager.queryByGraphNode(result, apertiumDictQueryScope.exact, apertiumDictNeedleSide.translated);
            model.wordnetSynonyms.AddRange(result.getAllLeafs().getDeepest().getNames());

            if (response != null)
            {
                response.AppendHorizontalLine();
                response.AppendLine("Translating back to Serbian via Apertium");

                response.consoleAltColorToggle();
                string rst = result.ToStringTreeview();

                response.Append(rst);
                response.consoleAltColorToggle();
            }

            if (!disableCodeBranchFilter) // <------ removes the symset nodes that contain none of first-level translation words
            {
                var codeLevel = result.getAllChildren().getOnLevel(3);
                List <IObjectWithPathAndChildren> toTakeOut = new List <IObjectWithPathAndChildren>();

                foreach (var clb in codeLevel)
                {
                    foreach (var clb_c in clb)
                    {
                        bool takeOut = true;
                        foreach (var clb_cc in clb_c)
                        {
                            if (clb_cc.name == model.lemma.inputForm)
                            {
                                takeOut = false;
                                break;
                            }
                        }
                        if (takeOut)
                        {
                            if (response != null)
                            {
                                response.AppendLine("-- take out: " + clb.path);
                            }
                            toTakeOut.Add(clb);
                            break;
                        }
                    }
                }

                toTakeOut.removeFromParent();

                int wps = Enumerable.Count(model.wordnetSecondarySymsets);
                int tr  = Enumerable.Count(model.translationRelated);
                int ws  = Enumerable.Count(model.wordnetSynonyms);

                if (response != null)
                {
                    response.AppendLine("----- Branch-node filter ----");

                    response.AppendLine("Symsets: " + wps);
                    response.AppendLine("Translations: " + tr);
                    response.AppendLine("Terms: " + ws);



                    response.consoleAltColorToggle();
                    string rst = result.ToStringTreeview();

                    response.Append(rst);
                    response.consoleAltColorToggle();
                }



                model.wordnetPrimarySymsets = result.getAllChildren().getOnLevel(3).getNames(true);
                model.translations          = result.getAllChildren().getOnLevel(4).getNames(true);
                model.synonyms = result.getAllChildren().getOnLevel(5).getNames(true);

                wps = wps - Enumerable.Count(model.wordnetPrimarySymsets);
                tr  = tr - Enumerable.Count(model.translations);
                ws  = ws - Enumerable.Count(model.synonyms);

                if (response != null)
                {
                    //response.AppendLine("----- Branch-node filter ----");

                    response.AppendLine("Reduction of Symsets: " + wps);
                    response.AppendLine("Reduction of Translations: " + tr);
                    response.AppendLine("Reduction of Terms: " + ws);



                    response.consoleAltColorToggle();
                    string rst = result.ToStringTreeview();

                    response.Append(rst);
                    response.consoleAltColorToggle();
                }
            }

            /*
             * String rgex_pat = "^([\\w]*\\\\[\\w]*\\\\[\\w]*\\\\[\\w]*\\\\{0}$)";
             *
             * Regex rgex = new Regex(String.Format(rgex_pat, model.lemma.inputForm));
             *
             * var onlyWithLemma = result.getAllLeafs().getFilterOut(rgex);
             */
            //languageManagerApertium.manager.queryByGraphNode(result, apertiumDictQueryScope.exact, apertiumDictNeedleSide.english);


            model.graph = result;
            return(model);
        }
Ejemplo n.º 21
0
        /// <summary>
        /// Loads the lexic resource.
        /// </summary>
        /// <param name="output">The output.</param>
        /// <param name="resourceFilePath">The resource file path.</param>
        public void LoadLexicResource(ILogBuilder output, String resourceFilePath)
        {
            List <String> lines = new List <String>();

            // <---------------------------------------------- [
            if (isLoaded)
            {
                return;
            }
            String pt = "";

            if (!localCache.isNullOrEmpty())
            {
                pt = localCache;
                lines.AddRange(File.ReadLines(localCache));
            }

            if (lines.Count < 100)
            {
                pt    = resourceFilePath;
                lines = new List <string>();
                lines.AddRange(File.ReadAllLines(resourceFilePath));
            }

            Int32  i      = 0;
            Int32  iCycle = lines.Count() / 20;
            Int32  l      = lines.Count();
            Int32  c      = 0;
            Double p      = 0;

            output.logStartPhase("Loading", "Loading the lexic resource - with mode: " + mode.ToString());
            output.log("Start of loading lexic resource [" + pt + "]");
            //   Parallel.ForEach(lines, new ParallelOptions { MaxDegreeOfParallelism=1 }, (line) =>

            Parallel.ForEach(lines, new ParallelOptions {
                MaxDegreeOfParallelism = 1
            }, (line) =>
                             //  Parallel.ForEach(lines, (line) =>
            {
                string inflectForm = "";
                string lemma       = "";
                string gramTag     = "";

                SelectFromLine(line, out inflectForm, out lemma, out gramTag);

                lexicInflection inflect = null;

                if (!inflectForm.isNullOrEmpty())
                {
                    if (!ContainsKey(inflectForm))
                    {
                        inflect                       = new lexicInflection(line);
                        inflect.lemmaForm             = lemma;
                        inflect.name                  = inflectForm;
                        inflect.inflectedForm         = inflectForm;
                        inflect.lexicalDefinitionLine = line;

                        if (spellAlternator.IsInitiated)
                        {
                            String altInflectedForm = spellAlternator.ConvertFromAtoB(inflectForm);
                            spellAlternatives.GetOrAdd(altInflectedForm, inflectForm);
                        }

                        Add(inflectForm, inflect);
                    }
                    else
                    {
                        inflect = base[inflectForm];
                    }

                    lexicGrammarCase gramCase = null;

                    if (mode == textResourceIndexResolveMode.resolveOnLoad)
                    {
                        var gramTagColl = grammTagConverter.ConvertFromString(gramTag);

                        gramCase = inflect.AddGrammarCase(gramTagColl);
                        gramCase.lexicalDefinitionLine = gramTag;
                    }
                    else
                    {
                        gramCase = new lexicGrammarCase();
                        gramCase.lexicalDefinitionLine = gramTag;
                        gramCase.name = "gc" + i.ToString();
                        inflect.Add(gramCase);
                    }

                    // <----------------- construction of Lemma centered dictionary

                    lexicGraphSetWithLemma lxSet = null;

                    if (!registratedLemmaIndex.ContainsKey(lemma))
                    {
                        lock (LemmaIndexLock)
                        {
                            if (!registratedLemmaIndex.ContainsKey(lemma))
                            {
                                lxSet           = new lexicGraphSetWithLemma();
                                lxSet.lemmaForm = lemma;
                                registratedLemmaIndex.TryAdd(lemma, lxSet);
                            }
                        }
                    }

                    lxSet = registratedLemmaIndex[lemma];

                    if (!lxSet.ContainsKey(inflectForm))
                    {
                        lock (SetLock)
                        {
                            if (!lxSet.ContainsKey(inflectForm))
                            {
                                lxSet.TryAdd(inflect.name, inflect);
                            }
                        }
                    }

                    Interlocked.Increment(ref c);
                    Interlocked.Increment(ref i);
                    if (c > iCycle)
                    {
                        lock (loadStatusLock)
                        {
                            if (c > iCycle)
                            {
                                c = 0;
                                p = i.GetRatio(l);
                                output.AppendLine("Done: _" + p.ToString("P2") + "_");
                            }
                        }
                    }
                }
            });

            output.logEndPhase();
            output.log("End of loading process");
            isLoaded = true;
        }
Ejemplo n.º 22
0
        /// <summary>
        /// Connects to the hooks
        /// </summary>
        /// <param name="response">The response.</param>
        /// <param name="simulation">if set to <c>true</c> [simulation].</param>
        public void connectToHooks(ILogBuilder response, Boolean simulation = true)
        {
            response.AppendLine("Connecting concepts by the hooks");

            if (simulation)
            {
                response.AppendLine("Running in the simulation mode -- no changes will be saved. ");
            }

            foreach (domainConceptEntry parent in toConnect)
            {
                List <Concept>   concepts = new List <Concept>();
                List <TermLemma> lemmas   = new List <TermLemma>();
                foreach (String needle in parent.needles)
                {
                    var               reLem       = semanticLexiconManager.manager.resolve(needle);
                    List <IConcept>   re_concepts = new List <IConcept>();
                    List <ITermLemma> re_synonyms = new List <ITermLemma>();
                    Boolean           found       = false;
                    foreach (var re in reLem)
                    {
                        found = true;
                        //re_concepts.AddRange(re.concepts);
                        foreach (IConcept rec in re.concepts)
                        {
                            if (!re_concepts.Any(x => x.name == rec.name))
                            {
                                re_concepts.Add(rec);
                            }
                        }
                        foreach (TermLemma rel in re.relatedTo)
                        {
                            if (!re_synonyms.Any(x => x.name == rel.name))
                            {
                                re_synonyms.Add(rel);
                            }
                        }
                        foreach (TermLemma rel in re.relatedFrom)
                        {
                            if (!re_synonyms.Any(x => x.name == rel.name))
                            {
                                re_synonyms.Add(rel);
                            }
                        }
                        re_synonyms.Add(re);
                    }

                    if (!simulation)
                    {
                        if (re_concepts.Contains(parent.concept))
                        {
                            response.log("[" + parent.concept.name + "] [" + needle + "] hook is already deployed ");
                            continue;
                        }
                    }

                    if (found)
                    {
                        String pname = parent.name;
                        if (!simulation)
                        {
                            pname = parent.concept.name;
                        }

                        response.log("[" + pname + "] [" + needle + "] lemmas[" + re_synonyms.Count + "] concepts[" + re_concepts.Count + "] ");

                        if (re_concepts.Any())
                        {
                            response.log("Connecting [" + pname + "] -->  concepts[" + re_concepts.Count + "] ");

                            foreach (Concept c in re_concepts)
                            {
                                if (c != parent.concept)
                                {
                                    response.log("--- [" + pname + "] -->  concept[" + c.name + "] ");

                                    if (!simulation)
                                    {
                                        parent.concept.hypoConcepts.Add(c);
                                    }
                                }
                            }
                        }
                        else
                        {
                            response.log("Connecting [" + pname + "] -->  lemmas[" + re_synonyms.Count() + "] ");

                            foreach (var c in re_synonyms)
                            {
                                response.log("--- [" + pname + "] -->  lemma[" + c.name + "] ");

                                if (!simulation)
                                {
                                    parent.concept.lemmas.Add(c);
                                }
                            }
                        }
                    }
                    else
                    {
                        response.log("Hook [" + needle + "] failed as no lemma found");
                    }
                }
            }

            if (simulation)
            {
            }
            else
            {
                Boolean doSave = aceTerminalInput.askYesNo("Do you want to save changes to the triplestore?");
                if (doSave)
                {
                    semanticLexiconManager.manager.lexiconContext.SaveChanges();
                }
            }
        }
Ejemplo n.º 23
0
        /// <summary>
        /// Builds the conceptual mesh.
        /// </summary>
        /// <param name="response">The response.</param>
        public void buildConceptualMesh(ILogBuilder response, Boolean simulation = true)
        {
            List <graphWrapNode <domainConceptEntry> > output = new List <graphWrapNode <domainConceptEntry> >();
            List <graphWrapNode <domainConceptEntry> > stack  = new List <graphWrapNode <domainConceptEntry> >();

            response.AppendLine("Building the conceptual mesh");

            if (simulation)
            {
                response.AppendLine("Running in the simulation mode -- no changes will be saved. ");
            }

            stack.AddRange(this);

            while (stack.Any())
            {
                var n_stack = new List <graphWrapNode <domainConceptEntry> >();

                foreach (graphWrapNode <domainConceptEntry> parent in stack)
                {
                    var pConcept = semanticLexiconManager.manager.getConcept(parent.path, !simulation, parent.item.description, false);
                    parent.item.concept = pConcept;
                    //

                    foreach (graphWrapNode <domainConceptEntry> child in parent)
                    {
                        if (!child.isNullNode)
                        {
                            var cConcept = semanticLexiconManager.manager.getConcept(child.path, !simulation, child.item.description, false);
                            child.item.concept = cConcept;
                            if (!simulation)
                            {
                                pConcept.hypoConcepts.Add(cConcept);
                            }

                            response.AppendLine("[" + parent.name + "]->[" + child.name + "]");
                        }
                        else
                        {
                            domainConceptEntry newit = new domainConceptEntry(child.name, "", "");
                            child.SetItem(newit);
                            var cConcept = semanticLexiconManager.manager.getConcept(child.path, !simulation, child.item.description, false);
                            child.item.concept = cConcept;
                            if (!simulation)
                            {
                                pConcept.hypoConcepts.Add(cConcept);
                            }

                            response.AppendLine("[" + parent.name + "]->[" + child.name + "] is a null node");
                        }
                    }

                    if (!parent.isNullNode)
                    {
                        if (parent.item.needles.Any())
                        {
                            toConnect.Add(parent);
                        }
                    }
                    n_stack.AddRange(parent);
                }
                stack = n_stack;
            }

            response.AppendLine("Concepts with hooks to connect: " + toConnect.Count());

            if (simulation)
            {
            }
            else
            {
                semanticLexiconManager.manager.lexiconContext.SaveChanges();
            }
        }
 protected void AppendStatus(ILogBuilder loger)
 {
     loger.AppendLine("Semantic Lexicon => Lemma[" + lemmaCount + "] Instance[" + instanceCount + "] Concept[" + conceptCount + "]");
     loger.AppendLine("Semantic Lexicon Cache => FQueries[" + failedCount + "]:[" + failed.Count + "] encoding twins[" + twins.LoadCount + "]");
 }
Ejemplo n.º 25
0
        public bool discoverGram(termExploreItem item, ILogBuilder loger, bool debug = true)
        {
            //List<termExploreItem> inst = new List<termExploreItem>();
            //exploreModel.instances.ForEach(x => inst.Add(x));

            //inst.Add(exploreModel);

            // instanceCountCollection<pos_type> pct = new instanceCountCollection<pos_type>();
            bool failed = false;

            //// <--------------- Trying to resolve alone
            //foreach (termExploreItem item in inst)
            //{


            if (loger != null)
            {
                loger.AppendLine("Item:" + item.inputForm);
            }

            instanceCountCollection <object> res = termDiscoveryResolver.resolveQuery(item.inputForm);

            res.reCalculate();

            if (res.Count > 0)
            {
                List <object> sorted = res.getSorted();

                if (item.gramSet.getPosType() != pos_type.none)
                {
                    sorted.RemoveAll(x => x is pos_type);
                }

                gramFlags gf = new gramFlags();

                if (sorted.Any(x => x is pos_type))
                {
                    gf.Set((pos_type)sorted.First(x => x is pos_type));
                }
                //pct.AddInstance(gf.type, 1);

                var tl = posConverter.posTypeVsPattern[gf.type];
                sorted.RemoveAll(x => !tl.Contains(x.GetType()));

                if (loger != null)
                {
                    loger.AppendLine("Votes:");
                    for (int i = 0; i < Math.Max(sorted.Count(), 20); i++)
                    {
                        loger.Append(sorted[i].ToString() + "; ");
                    }
                }

                if (sorted.Any(x => x is pos_gender))
                {
                    gf.Set((pos_gender)sorted.First(x => x is pos_gender));
                }
                if (sorted.Any(x => x is pos_gramaticalCase))
                {
                    gf.Set((pos_gramaticalCase)sorted.First(x => x is pos_gramaticalCase));
                }
                if (sorted.Any(x => x is pos_verbform))
                {
                    gf.Set((pos_verbform)sorted.First(x => x is pos_verbform));
                }
                if (sorted.Any(x => x is pos_number))
                {
                    gf.Set((pos_number)sorted.First(x => x is pos_number));
                }
                if (sorted.Any(x => x is pos_degree))
                {
                    gf.Set((pos_degree)sorted.First(x => x is pos_degree));
                }
                if (sorted.Any(x => x is pos_person))
                {
                    gf.Set((pos_person)sorted.First(x => x is pos_person));
                }


                if (loger != null)
                {
                    loger.AppendLine("Final gram:" + gf.ToString());
                }
                item.gramSet.Add(gf);
            }
            else
            {
                if (item.inputForm.Length < 4)
                {
                    return(false);
                }
                //item.flags = termExploreItemEnumFlag.none;
                failed = true;
            }

            return(failed);
        }
Ejemplo n.º 26
0
        /// <summary>
        /// Explores definition on an unknown term
        /// </summary>
        /// <param name="term">The term.</param>
        /// <param name="loger">The loger.</param>
        /// <param name="shortExplore">if set to <c>true</c> [short explore].</param>
        /// <param name="debug">if set to <c>true</c> [debug].</param>
        /// <returns></returns>
        public List <termExploreModel> explore(string term, ILogBuilder loger, bool shortExplore = true, bool debug = true, termExploreModel exploreModel = null)
        {
            term = term.Trim();
            List <termExploreModel> output = new List <termExploreModel>();


            if (modelRegistry.ContainsKey(term))
            {
                return(modelRegistry[term]);
            }
            if (missing.Contains(term))
            {
                return(GetModels(term));
            }

            if (term.isNumber())
            {
                termExploreModel tmp = makeTempModel(term, pos_type.NUMnumerical);
                tmp.flags = termExploreItemEnumFlag.datapoint;
                if (loger != null)
                {
                    loger.AppendLine("Term [" + term + "] detected to be number.");
                }
                AddModel(tmp);
                return(GetModels(term));
            }

            // <----- drugi test
            exploreModel = termExploreProcedures.exploreWithHunspell(new termExploreItem(term), loger);
            List <string> suggests = new List <string>();

            exploreModel.instances.ForEach(x => suggests.Add(x.inputForm));


            //languageManagerDBNamedEntities.manager.exploreEntities(exploreModel.rootWord, exploreModel);

            suggests.Add(exploreModel.rootWord);

            // s



            apertiumDictionaryResult result = languageManagerApertium.manager.query(suggests, apertiumDictQueryScope.exact, apertiumDictNeedleSide.serbian);

            if (result.Any())
            {
                List <termExploreItem> gramCheck = new List <termExploreItem>();

                gramFlags gr = null;



                if (result.termVsGramFlags.ContainsKey(exploreModel.inputForm))
                {
                    exploreModel.gramSet.Add(new gramFlags(result.termVsGramFlags[exploreModel.inputForm]));

                    if (exploreModel.lemma == null)
                    {
                        exploreModel.lemma = exploreModel.instances[exploreModel.inputForm];
                    }

                    gramCheck.Add(exploreModel);
                    if (debug)
                    {
                        if (loger != null)
                        {
                            loger.AppendLine("Apertium discovered model [" + exploreModel.inputForm + "]");
                        }
                    }
                }
                else
                {
                    //if (loger != null) loger.AppendLine("Apertium failed to discover [" + exploreModel.inputForm + "]");
                }

                foreach (termExploreItem item in exploreModel.instances)
                {
                    if (result.termVsGramFlags.ContainsKey(item.inputForm))
                    {
                        item.gramSet.Add(new gramFlags(result.termVsGramFlags[item.inputForm]));
                        gramCheck.Add(exploreModel);
                        exploreModel.lemmaForm = item.inputForm;
                        if (exploreModel.lemma == null)
                        {
                            exploreModel.lemma = item;
                        }


                        if (debug)
                        {
                            if (loger != null)
                            {
                                loger.AppendLine("Apertium discovered model [" + item.inputForm + "]");
                            }
                        }
                    }
                    else
                    {
                        //if (loger != null) loger.AppendLine("Apertium failed to discover [" + item.inputForm + "]");
                    }
                }

                exploreModel.translations.AddRange(result.GetEnglish());



                gramCheck.RemoveAll(x => posConverter.posTypeVsPattern[x.gramSet.getPosType()].Count() == 0);

                int disc = 0;
                foreach (var gram in gramCheck)
                {
                    if (discoverGram(gram, loger, debug))
                    {
                        disc++;
                    }
                }

                if (loger != null)
                {
                    loger.AppendLine("Gram [" + term + "] autodiscovered for [" + disc + "] / [" + gramCheck.Count() + "]");
                }
                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "Apertium_");
                    }
                }

                AddModel(exploreModel);
                exploreModel.flags = termExploreItemEnumFlag.aper;


                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    loger.AppendLine("Apertium failed to discover any information on [" + term + "]");
                }
            }


            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ APERTIUM ^^

            foreach (string s in suggests)
            {
                languageManagerDBNamedEntities.manager.exploreEntities(s, exploreModel);
            }

            if (exploreModel.flags == termExploreItemEnumFlag.namedEntity)
            {
                AddModel(exploreModel);

                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "NamedEntity_");
                        loger.AppendLine("Named entities discovered model [" + exploreModel.inputForm + "]:" + exploreModel.gramSet.ToString());
                    }
                }



                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    if (debug)
                    {
                        loger.AppendLine("Named entities found nothing for [" + exploreModel.inputForm + "]:" + exploreModel.gramSet.ToString());
                    }
                }
            }


            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ NAMED ENTITY ^^

            // <----------------- Wordnet
            wordnetSymsetResults resSrWordnet = languageManagerWordnet.manager.query_srb(suggests, loger);
            bool found = false;

            if (resSrWordnet.Any())
            {
                foreach (termExploreItem item in exploreModel.instances)
                {
                    if (resSrWordnet.GetByKey(item.inputForm).Any())
                    {
                        exploreModel.lemma     = item;
                        exploreModel.lemmaForm = item.inputForm;
                        exploreModel.translations.AddRange(resSrWordnet.GetValues());
                        exploreModel.synonyms.AddRange(resSrWordnet.GetKeys());
                        exploreModel.flags = termExploreItemEnumFlag.srWNet;
                        found = true;

                        item.gramSet.Add(new gramFlags(new Enum[] { resSrWordnet.models[item.inputForm].gramSet.getPosType() }));
                    }
                }

                foreach (termExploreItem item in exploreModel.instances)
                {
                    discoverGram(item, loger, debug);
                }
            }

            if (found)
            {
                if (loger != null)
                {
                    loger.AppendLine("SerbianWordNet discovered model [" + term + "]:" + exploreModel.gramSet.ToString());
                }
                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "SrWordNet_");;
                    }
                }

                AddModel(exploreModel);
                exploreModel.flags = termExploreItemEnumFlag.srWNet;

                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    if (debug)
                    {
                        loger.AppendLine("Serbian wordnet found nothing for [" + term + "]");
                    }
                }
            }

            // <------------------ SERBIAN WORD NET ^^

            bool failed = discoverGram(exploreModel, loger, debug);

            exploreModel.instances.ForEach(x => discoverGram(x, loger, debug));

            int d = 0;

            List <termExploreItem> lastCheck = new List <termExploreItem>();

            foreach (var gram in lastCheck)
            {
                if (discoverGram(gram, loger, debug))
                {
                    d++;
                }
            }

            if (debug)
            {
                if (loger != null)
                {
                    loger.AppendLine("The last check [" + term + "] autodiscovered for [" + d + "] / [" + lastCheck.Count() + "]");
                }
            }

            if (d == 0)
            {
                failed = true;
            }

            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ LAST CHECK ^^



            if (!failed)
            {
                exploreModel.flags = termExploreItemEnumFlag.termExplorer;
                AddModel(exploreModel);
                return(GetModels(term));
            }
            else
            {
                if (debug)
                {
                    if (loger != null)
                    {
                        loger.AppendLine("Exploration failed for [" + term + "] -- creating temporary term model");
                    }
                }
                output.Add(makeTempModel(term, pos_type.TEMP));
                missing.Add(term);
                return(output);
            }
        }
Ejemplo n.º 27
0
        public List <ILexiconItem> exploreAndSave(string term, ILogBuilder loger, bool shortExplore = true, bool debug = true, params termExploreItemEnumFlag[] flagsToSave)
        {
            List <ILexiconItem> output = new List <ILexiconItem>();

            if (lexicalItemRegistry.ContainsKey(term))
            {
                return(lexicalItemRegistry[term]);
            }


            if (!manager.settings.doAutoexplore)
            {
                output.Add(makeTempLemma(term));
                if (loger != null)
                {
                    loger.AppendLine("Autoexplore off [" + term + "] is temporarly created.");
                }
                return(output);
            }

            var res = failedList.Search(term, false, 1);

            if (res.getLineContentList().Contains(term))
            {
                output.Add(makeTempLemma(term));
                if (loger != null)
                {
                    loger.AppendLine("Term [" + term + "] is on black list - making temp term.");
                }
                return(output);
            }


            List <termExploreModel> models = explore(term, loger, shortExplore, debug);

            if (flagsToSave == null)
            {
                flagsToSave = new termExploreItemEnumFlag[] { termExploreItemEnumFlag.aper, termExploreItemEnumFlag.namedEntity, termExploreItemEnumFlag.srWNet }
            }
            ;

            foreach (termExploreModel model in models)
            {
                if (debug)
                {
                    manager.constructor.saveTermModel(model);
                }
                if (flagsToSave.Contains(model.flags))
                {
                    manager.constructor.output.AppendLine("New term for Lexicon: " + model.inputForm);
                    manager.constructor.addTermModelToLexicon(model);
                    if (manager.settings.doAutoMakeSynonymRelationship)
                    {
                        manager.constructor.addSynonymsAndConceptLinks(model, true);
                    }
                    output.AddRange(manager.getLexiconItems(model.inputForm, loger, false));
                }
                else
                {
                    output.Add(modelToLemma(model));
                }
            }

            if (!output.Any())
            {
                var md = new TempLemma(term);
                output.Add(makeTempLemma(term));
                md.type = pos_type.TEMP.ToString();

                failedList.Append(new string[] { md.name });

                if (loger != null)
                {
                    loger.AppendLine("Term [" + term + "] not found. Using single-instance spark.");
                }
            }

            lexicalItemRegistry.Add(term, output);


            return(output);
        }
Ejemplo n.º 28
0
        /// <summary>
        /// Describes the sample group to <c>output</c>
        /// </summary>
        /// <param name="output">The output object</param>
        public void describe(ILogBuilder output = null)
        {
            if (output == null)
            {
                return;
            }
            //output.log();
            int tl = output.tabLevel;

            output.rootTabLevel();

            //  output.AppendHeading("SampleGroup description", 1);

            output.open("desc", name, "");

            //  output.AppendHeading(name, 2);
            // output.AppendHorizontalLine();

            int ci = 1;

            foreach (sampleGroupItem item in this)
            {
                item.parent = this;
                output.open("group", item.groupTitle, item.groupDescription);
                //output.AppendHeading(, 3);


//                output.AppendPair("Description", );

                output.AppendPair("ID", ci);
                output.AppendPair("Tag", item.groupTag);
                if (item.groupSizeLimit == -1)
                {
                    output.AppendPair("Size (ratio)", item.weight + " / " + totalWeight.ToString());
                }
                else
                {
                    output.AppendPair("Size (limit)", item.groupSizeLimit);
                }

                output.AppendPair("Count", item.count);
                output.AppendPair("Border", item.groupBorder);
                if (item.isClosed)
                {
                    output.AppendLine("The group is closed for new members");
                }
                else
                {
                    output.AppendLine("The group may receive new members");
                }
                ci++;

                output.close();
            }

            output.AppendHorizontalLine();

            output.open("info", "Info", "");
            output.AppendPair("Counted collection", countedCollectionName);
            output.AppendPair("Items with group tag/s", countHasGroup);
            output.AppendPair("Items without group tag", countNoGroup);
            output.close();

            output.log("-- end");

            output.close();

            output.tabLevel = tl;
        }
Ejemplo n.º 29
0
        /// <summary>
        /// Builds a term model out from Word input
        /// </summary>
        /// <param name="word">The word.</param>
        /// <param name="response">The response.</param>
        /// <returns></returns>
        public static termExploreModel exploreWithUnitex(string word, ILogBuilder response, bool wordIsLemma = false)
        {
            termExploreModel output = new termExploreModel();

            output.modelSource = termExploreModelSource.fromToken;
            output.inputForm   = word;

            string lemma = word;

            var tls = semanticLexiconManager.manager.resolve(word);

            if (tls != null)
            {
                if (Enumerable.Count(tls) > 0)
                {
                    if (response != null)
                    {
                        response.AppendLine("#1 Lemma already defined in the triplestore [" + word + "] ");
                    }
                    output = semanticLexiconManager.manager.constructor.getTermModel(Enumerable.First(tls));
                    return(output);
                }
            }

            if (!wordIsLemma)
            {
                if (response != null)
                {
                    response.AppendLine("#1 Finding Lemma for [" + word + "] ");
                }
                string query = string.Format(posConverter.REGEX_UNITEX_InstanceToLemmaFormat, word);
                fileTextSearchResult reslt = languageManagerUnitex.manager.operatorDelaf.Search(query, true, 1, RegexOptions.IgnoreCase);

                if (response != null)
                {
                    reslt.ToString(response, true);
                }

                Regex instanceToLemmaReg = new Regex(query);


                if (reslt.Count() > 0)
                {
                    var lnp = reslt.First();

                    Match mch = instanceToLemmaReg.Match(lnp.Value);
                    lemma = mch.Groups[1].Value;
                }
            }
            else
            {
                if (response != null)
                {
                    response.AppendLine("#1 The word is trusted to be a lemma [" + word + "] - skipping search");
                }
            }
            // <------------------------------------------------------------------- preparing chache ---------------

            var cache = languageManagerUnitex.manager.operatorDelaf.Search(lemma, false, 300);

            if (response != null)
            {
                response.AppendLine("Cached definitions [" + cache.Count() + "] ");
            }



            // <------------------------------------------------------------  2. finding lemma definition

            output.lemmaForm = lemma;
            output.lemma     = new termExploreItem(lemma);

            if (response != null)
            {
                response.AppendLine("#2 Finding Lemma definition [" + lemma + "] ");
            }

            string lemmaQuery                = string.Format(posConverter.REGEX_UNITEX_DeclarationForLemma, lemma);
            Regex  lemmaQueryRegex           = new Regex(lemmaQuery);
            fileTextSearchResult lemmaResult = languageManagerUnitex.manager.operatorDelaf.Search(cache, lemmaQuery, true, 5, RegexOptions.IgnoreCase);

            if (response != null)
            {
                lemmaResult.ToString(response, true);
            }

            if (lemmaResult.Count() == 0)
            {
                if (response != null)
                {
                    response.consoleAltColorToggle();
                    response.AppendLine("Failed to find lemma definition for [" + word + "]. Aborting exploration.");
                    response.consoleAltColorToggle();
                }
                output.wasExploreFailed = true;
                return(output);
            }

            foreach (var lr_lnp in lemmaResult)
            {
                Match lmch = lemmaQueryRegex.Match(lr_lnp.Value);
                if (lmch.Success)
                {
                    output.lemma.gramSet.Add(lmch.Groups[1].Value);
                }
            }
            if (response != null)
            {
                output.lemma.ToString(response);
            }



            // <------------------------------------------------------------  3. getting all instances for the lemma
            if (response != null)
            {
                response.AppendLine("#3 Extracting all instances for the Lemma [" + lemma + "] ");
            }

            string instanceQuery                = string.Format(posConverter.REGEX_UNITEX_LemmaToInstanceFormat, lemma);
            string instanceUnitexQuery          = "," + lemma + ".";
            Regex  instanceQueryRegex           = new Regex(instanceQuery);
            fileTextSearchResult instanceResult = languageManagerUnitex.manager.operatorDelaf.Search(cache, instanceUnitexQuery, false, 100, RegexOptions.IgnoreCase);

            if (response != null)
            {
                instanceResult.ToString(response, true);
            }

            foreach (var lr_lnp in instanceResult)
            {
                Match lmch = instanceQueryRegex.Match(lr_lnp.Value);
                output.instances.Add(lmch.Groups[1].Value, lmch.Groups[2].Value);
            }

            // <------------------------------------------------------------  4. Resulting term model
            if (response != null)
            {
                response.AppendLine("#4 Resulting term model [" + lemma + "] ");
                output.ToString(response);
            }


            return(output);
        }
Ejemplo n.º 30
0
        public static termExploreModel explore(string word, ILogBuilder response, termExploreMode mode, bool verbose = false)
        {
            termExploreModel    model  = new termExploreModel(word);
            termExploreModelSet outset = semanticLexiconManager.manager.constructor.loadTermModels(word, true);

            if (response != null)
            {
                response.consoleAltColorToggle();
                response.AppendHorizontalLine();
                response.AppendLine("Exploring term[" + model.inputForm + "] with [" + mode.ToString() + "]");
                response.consoleAltColorToggle();
            }

            if (Enumerable.Any(outset))
            {
                model = Enumerable.First(outset);

                if (response != null)
                {
                    response.AppendLine("term[" + model.inputForm + "]->lemma[" + model.lemma.inputForm + "]");
                }
            }
            else
            {
                model.lemmaForm = "";
                if (response != null)
                {
                    response.AppendLine("term[" + word + "]->missingLemma[]");
                }
            }


            var output = response;

            if (!verbose)
            {
                response = null;
            }

            switch (mode)
            {
            case termExploreMode.apertium_direct:
                model = getSynonymsWithApertium(model, response);
                break;

            case termExploreMode.apertium_wordnet_eng:
                model = getSynonymsWithWordnetViaApertium(model, response);
                break;

            case termExploreMode.apertium_wordnet_srb:
                model = getSynonymsWithSerbianWordNetAndApertium(model, response);
                break;

            case termExploreMode.corpus:
                model = getSynonymsByCorpus(model, response);
                break;

            case termExploreMode.hunspell_srb:
                model = getSynonymsWithHunspell(model, response);
                break;

            case termExploreMode.none:
                break;

            case termExploreMode.wordnet_srb:
                model = getSynonymsWithSerbianWordNet(model, response);
                break;

            case termExploreMode.unitex:
                model = exploreWithUnitex(word, response);
                break;
            }

            model.PostProcess();
            if (output != null)
            {
                model.ToString(output, verbose, false);
            }

            return(model);
        }