示例#1
0
        /// <summary>
        /// Creates the explore item.
        /// </summary>
        /// <param name="token">The token.</param>
        /// <returns></returns>
        public termExploreModel createExploreItem(string token)
        {
            //termExploreItem output = new termExploreItem(token);

            Match m = ruleSet.regex.Match(token);

            root  = m.Groups[1].Value;
            stem  = m.Groups[2].Value;
            sufix = m.Groups[3].Value;

            //output.gramSet = ruleSet.GetGramSet(this);


            morphRule rule = ruleSet.rules.First();

            string lemmaForm = rule.GetForm(this);

            termExploreModel model = new termExploreModel(lemmaForm);

            model.gramSet = ruleSet.GetGramSet(rule.sufix);

            model.instances.AddRangeUnique(ruleSet.GetItems(this, lemmaForm));

            model.lemma         = new termExploreItem(lemmaForm);
            model.lemma.gramSet = model.gramSet;

            model.rootWord = root;


            return(model);
        }
        /// <summary>
        /// Saves the term model. Prefix is clean filename prefix without spacing character
        /// </summary>
        /// <param name="termModel">The term model.</param>
        /// <param name="prefix">The prefix.</param>
        public void saveTermModel(termExploreModel termModel, string prefix = "")
        {
            string   filepath = projectFolderStructure[lexiconConstructorProjectFolder.metadata].path + "\\" + prefix + termModel.filename(".xml");
            FileInfo fi       = filepath.getWritableFile(getWritableFileMode.overwrite);

            objectSerialization.saveObjectToXML(termModel, fi.FullName);
        }
示例#3
0
        protected override void stageExecute(ILogBuilder response)
        {
            foreach (string word in state.entryList)
            {
                // <------------------------------------------------------------------------------------------------ Exploration


                termExploreModel output = null;
                if (state.verbose)
                {
                    output = termExploreProcedures.exploreWithUnitex(word, response);
                }
                else
                {
                    output = termExploreProcedures.exploreWithUnitex(word, null);
                }

                if (output.wasExploreFailed)
                {
                    if (state.debug)
                    {
                        response.consoleAltColorToggle();
                        response.AppendLine("--- running debug search for [" + word + "]");
                        var exp = languageManagerUnitex.manager.operatorDelaf.Search(word, false, 25);
                        exp.ToString(response, true);

                        string debugLines = exp.ToString();
                        string debugPath  = semanticLexiconManager.manager.constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].pathFor(word + "_failDebug.txt");
                        debugLines.saveStringToFile(debugPath, getWritableFileMode.overwrite);

                        response.consoleAltColorToggle();
                    }
                    state.shadowBuffer.Add(word);
                    state.failedBuffer.Add(word);
                }
                else
                {
                    if (state.saveModel)
                    {
                        semanticLexiconManager.manager.constructor.saveTermModel(output);
                    }
                    state.shadowBuffer.Add(word);
                    state.shadowBuffer.AddRangeUnique(output.GetShadow());
                    try
                    {
                        semanticLexiconManager.manager.constructor.addTermModelToLexicon(output);
                        response.AppendLine("Lexicon update: Lemma [" + output.lemma.inputForm + "][" + output.instances.Count() + "]");
                        state.processedBuffer.Add(output.lemma.inputForm);
                    }
                    catch (Exception ex)
                    {
                        state.failedBuffer.Add(word);
                        response.AppendLine("Lexicon term update failed for [" + word + "][" + output.lemmaForm + "]");
                        output.ToString(response, true);
                    }
                }
            }
        }
示例#4
0
        /// <summary>
        /// Adds the term model to the lexicon
        /// </summary>
        /// <param name="termModel">The term model.</param>
        public void addTermModelToLexicon(termExploreModel termModel)
        {
            //var tlt = manager.lexiconContext.TermLemmas.Where<ITermLemma>(x => x.name.Equals(termModel.lemma.inputForm)&& x.type.Equals(termModel.lemma.gramSet.getPosType().toString()));
            ITermLemma tl = manager.lexiconContext.TermLemmas.Where(x => x.name.Equals(termModel.lemmaForm)).FirstOrDefault();

            //IQueryable<ITermLemma> tlt = manager.lexiconContext.TermLemmas.Where(x => x.name.Equals(termModel.lemma.inputForm));
            //List<ITermLemma> tlt_l = tlt.ToList();

            //ITermLemma tl = null;

            if (tl != null)
            {
                // nista nije uradjeno
            }
            else
            {
                tl      = manager.lexiconContext.TermLemmas.Create();
                tl.name = termModel.lemmaForm.or(termModel.inputForm);

                tl.gramSet = termModel.getGramSet();

                tl.type = termModel.getPosType().toStringSafe("TEMP");

                //if (termModel tl.type = termModel.gramSet.getPosType().toString();


                foreach (termExploreItem item in termModel.instances)
                {
                    ITermInstance ti = manager.lexiconContext.TermInstances.Where(x => x.name.Equals(item.inputForm)).FirstOrDefault();

                    if (ti == null)
                    {
                        ti = manager.lexiconContext.TermInstances.Create();

                        ti.name = item.inputForm;
                        //
                        ti.gramSet = item.getGramSet();                //.gramSet.GetAll();
                        ti.type    = item.getPosType().toStringSafe(); //.// .gramSet.getPosType().toString();
                        ti.lemma   = tl;
                    }
                    else
                    {
                        if (manager.settings.doResolveWordsInDebugMode)
                        {
                            output.AppendLine("Item [" + item.inputForm + "] of lemma [" + tl.name + "] was already defined.");
                            //foreach (var lm in ti.)
                            //    output.AppendLine("[" + item.inputForm + "]->lemma [" + tl.name + "] was already defined.");
                        }
                    }

                    manager.lexiconContext.TermInstances.Add(ti);
                }
            }

            manager.lexiconContext.SaveChanges();
        }
        public wordnetSymsetResults query_srb(List <String> srb_tokens, ILogBuilder response, Boolean buildModel = true)
        {
            wordnetSymsetResults output = new wordnetSymsetResults();

            getReady();

            List <DataRow> matches = new List <DataRow>();

            foreach (String tkn in srb_tokens)
            {
                matches.AddRangeUnique(table.Select(SRB_COLUMN_TOKEN + " LIKE '" + tkn + "'"));
            }

            foreach (DataRow dr in matches)
            {
                String eng  = dr[SRB_COLUMN_TOKEN].toStringSafe();
                String code = dr[SRB_COLUMN_CODE].toStringSafe();
                output.Add(code, eng);
            }

            if (buildModel)
            {
                foreach (var pair in output)
                {
                    termExploreModel md  = null;
                    String           srb = pair.Key;
                    if (!output.models.ContainsKey(srb))
                    {
                        md = new termExploreModel(srb);



                        output.models.Add(srb, md);
                    }
                    else
                    {
                        md = output.models[srb];
                    }

                    String   symc = pair.Value[0].ToString();
                    pos_type pt   = posConverter.wordNetFirstNumToPosType.getValue(symc, pos_type.none);

                    gramFlags gr = new gramFlags();
                    gr.Set(pt);
                    md.gramSet.Add(gr);

                    md.wordnetPrimarySymsets.AddUnique(pair.Value);
                }
            }


            return(output);
        }
        /// <summary>
        /// Gets model from the Lexicon lemma entry -- loads from file, or if file do not exist - reconstructs it from the TermLema
        /// </summary>
        /// <param name="lemma">The lemma.</param>
        /// <returns></returns>
        public termExploreModel getTermModel(ITermLemma lemma, bool dontLoadFromFile = false)
        {
            termExploreModel output = new termExploreModel();

            if (!dontLoadFromFile)
            {
                string filepath = projectFolderStructure[lexiconConstructorProjectFolder.metadata].pathFor(lemma.name + "_" + lemma.type.ToString() + ".xml");
                if (File.Exists(filepath))
                {
                    output             = objectSerialization.loadObjectFromXML <termExploreModel>(filepath);
                    output.modelSource = termExploreModelSource.fromFile;
                    return(output);
                }
            }

            output = new termExploreModel(lemma);
            return(output);
        }
        public void addSynonymsAndConceptLinks(termExploreModel termModel, bool saveModel = false)
        {
            // <----------- ADDING SYNONYMS ----------- >

            var lemmas = manager.getLemma(termModel.lemmaForm);

            var lemmasyns = manager.getLemmas(termModel.synonyms);

            builderForLog linkLog = new builderForLog();

            linkLog.open("Creating synonym-2-lemma links");

            termModel.links_synonym = 0;
            foreach (ITermLemma lemma in lemmas)
            {
                foreach (ITermLemma lemsyn in lemmasyns)
                {
                    bool added = false;
                    if (!lemma.relatedTo.Contains(lemsyn))
                    {
                        if (!lemma.relatedFrom.Contains(lemsyn))
                        {
                            lemma.relatedTo.Add(lemsyn);

                            added = true;
                        }
                    }
                    if (added)
                    {
                        termModel.links_synonym++;
                        linkLog.AppendLine("[" + termModel.links_synonym.ToString("D5") + "] " + lemma.name + " -> " + lemsyn.name);
                    }
                    else
                    {
                        linkLog.AppendLine("[Link exists] " + lemma.name + " -> " + lemsyn.name);
                    }
                }
            }
            linkLog.close();

            linkLog.open("Creating concept 2 lemma links");
            // <----------- ADDING SYNSETS
            List <Concept> concepts = new List <Concept>();
            Concept        conHead  = null;

            termModel.links_lemmaConcept = 0;
            foreach (string code in termModel.wordnetPrimarySymsets)
            {
                if (code.isCleanWord())
                {
                    aceLog.log("wrong symset code -- [" + code + "]  -- ignored!");
                    continue;
                }
                Concept con   = manager.getConcept(code, true, "WordNet Code");
                bool    added = false;
                foreach (TermLemma lemma in lemmas)
                {
                    if (!con.lemmas.Contains(lemma))
                    {
                        con.lemmas.Add(lemma);
                        added = true;
                    }
                    if (added)
                    {
                        termModel.links_lemmaConcept++;
                        linkLog.AppendLine("[" + termModel.links_lemmaConcept.ToString("D5") + "] " + con.name + " -> " + lemma.name);
                    }
                    else
                    {
                        linkLog.AppendLine("[Link exists] " + con.name + " -> " + lemma.name);
                    }
                }

                concepts.Add(con);
                conHead = con;
            }
            linkLog.close();

            linkLog.open("Creating concept 2 concept links");
            // <--------------------------- linking SYNSET concepts
            termModel.links_conceptConcept = 0;
            foreach (Concept con in concepts)
            {
                foreach (Concept con2 in concepts)
                {
                    bool added = false;
                    if (!con2.relatedTo.Contains(con))
                    {
                        if (!con2.relatedFrom.Contains(con))
                        {
                            var sharedLemmas = con2.lemmas.Where(x => con.lemmas.Contains(x));
                            if (sharedLemmas.Count() > 0)
                            {
                                con2.relatedTo.Add(con);
                                added = true;
                            }
                        }
                    }
                    if (added)
                    {
                        termModel.links_conceptConcept++;
                        linkLog.AppendLine("[" + termModel.links_conceptConcept.ToString("D5") + "] " + con2.name + " -> " + con.name);
                    }
                    else
                    {
                        linkLog.AppendLine("[Link exists] " + con2.name + " -> " + con.name);
                    }
                }
            }
            linkLog.close();

            manager.lexiconContext.SaveChanges();
            string pth = projectFolderStructure[lexiconConstructorProjectFolder.links].pathFor(termModel.filename(".txt"));

            linkLog.ToString().saveStringToFile(pth, getWritableFileMode.overwrite);

            if (saveModel)
            {
                saveTermModel(termModel);
            }
        }
示例#8
0
        public termExploreItem exploreEntities(String form, termExploreModel exploreModel)
        {
            tokenQuery         tq             = new tokenQuery(form, null, tokenQuerySourceEnum.imb_namedentities);
            tokenQueryResponse responseEntity = languageManagerDBNamedEntities.manager.exploreToken(tq);

            termExploreItem item = exploreModel.instances[form];

            if (item == null)
            {
                item = new termExploreItem(form);
            }

            if (responseEntity.status == tokenQueryResultEnum.accept)
            {
                exploreModel.lemmaForm = item.inputForm;
                String  gset    = "";
                Boolean __found = true;
                foreach (namedEntity ctf in responseEntity.flags)
                {
                    Boolean keepSearching = false;
                    __found = true;
                    switch (ctf)
                    {
                    //case namedEntity.businessDomain:
                    //    gset = gset.add("N+Comp", "|"); ;
                    //    exploreModel.wordnetPrimarySymsets.Add("MC04-CON-BD");
                    //    break;
                    case namedEntity.countryName:
                        gset = gset.add("N+Top", "|");;
                        exploreModel.wordnetPrimarySymsets.Add("MC04-GEO-C");
                        break;

                    case namedEntity.languageName:
                        gset = gset.add("N+Lang", "|");;
                        exploreModel.wordnetPrimarySymsets.Add("MC04-GEO-LN");
                        break;

                    case namedEntity.personalName:
                        gset = gset.add("N+First", "|");;
                        exploreModel.wordnetPrimarySymsets.Add("MC04-HUM-PN");
                        break;

                    case namedEntity.personalPosition:
                        gset = gset.add("N+Hum", "|");;
                        exploreModel.wordnetPrimarySymsets.Add("MC04-HUM-PP");
                        break;

                    case namedEntity.presonalLastName:
                        gset = gset.add("N+Last", "|");;
                        exploreModel.wordnetPrimarySymsets.Add("MC04-HUM-LN");
                        break;

                    case namedEntity.title:
                        gset = gset.add("N", "|");;
                        exploreModel.wordnetPrimarySymsets.Add("MC04-CON-AD-TITLE");
                        break;

                    case namedEntity.townName:
                        gset = gset.add("N+PGr1", "|");
                        exploreModel.wordnetPrimarySymsets.Add("MC04-CON-AD-TN");
                        break;

                    case namedEntity.townZip:
                        gset = gset.add("NUMnumerical+Top", "|");;
                        exploreModel.wordnetPrimarySymsets.Add("MC04-CON-AD-TZ");
                        break;

                    default:
                        keepSearching = true;
                        __found       = false;
                        break;
                    }
                    if (!keepSearching)
                    {
                        break;
                    }
                }

                item.gramSet.SetAll(gset);

                if (__found)
                {
                    exploreModel.instances.Add(item);

                    if (exploreModel.lemma == null)
                    {
                        exploreModel.lemma = item;
                    }

                    exploreModel.synonyms.AddRange(responseEntity.dataTokens.getTokens());

                    exploreModel.flags = termExploreItemEnumFlag.namedEntity;
                }

                return(item);
            }

            return(item);
        }