Beispiel #1
0
        // get the semantic words
        private Dictionary <int, string []> GetSemantic(string word)
        {
            ArrayList     Semantic_words = new ArrayList();
            WordNetEngine wn             = new WordNetEngine(@"../../", false);

            char[] delimiters = { ' ', ',' };
            // get the nouns
            int          count     = 0;
            Set <SynSet> syns_noun = wn.GetSynSets(word, WordNetEngine.POS.Noun);

            foreach (SynSet syn in syns_noun)
            {
                string syn_s = syn.ToString();
                int    index = syn_s.IndexOf(":");
                syn_s = syn_s.Substring(0, index);// remove until the ":"
                syn_s = syn_s.Substring(1, syn_s.Length - 2);
                string[] words = syn_s.Split(delimiters);
                for (int i = 0; i < words.Length; i++)
                {
                    if (words[i] != "" && !Semantic_words.Contains(words[i]))
                    {
                        if (count < 2)
                        {
                            Semantic_words.Add(words[i]);
                            count++;
                        }
                    }
                }
            }

            // get the verb
            Set <SynSet> syns_verb = wn.GetSynSets(word, WordNetEngine.POS.Verb);

            foreach (SynSet syn in syns_verb)
            {
                string syn_s = syn.ToString();
                int    index = syn_s.IndexOf(":");
                syn_s = syn_s.Substring(0, index);// remove until the ":"
                syn_s = syn_s.Substring(1, syn_s.Length - 2);
                string[] words = syn_s.Split(delimiters);
                for (int i = 0; i < words.Length; i++)
                {
                    if (words[i] != "" && !Semantic_words.Contains(words[i]))
                    {
                        if (count < 4)
                        {
                            Semantic_words.Add(words[i]);
                            count++;
                        }
                    }
                }
            }

            string[] Semantic_final = (string[])Semantic_words.ToArray(typeof(string));
            Dictionary <int, string[]> Semantic_dic = new Dictionary <int, string[]>();

            Semantic_dic.Add(900, Semantic_final);
            return(Semantic_dic);
        }
Beispiel #2
0
        public void JcnMeasureForkSynsets()
        {
            var forkSyn = engine.GetSynSets("fork", WordType.Noun);
            var carSyn  = engine.GetSynSets("car", WordType.Noun);
            var result  = instance.Measure(forkSyn[4], carSyn[4]);

            Assert.AreEqual(0.17, Math.Round(result, 2));
        }
Beispiel #3
0
        private void getSynSets_Click(object sender, EventArgs e)
        {
            synSets.Items.Clear();
            semanticRelations.Items.Clear();
            lexicalRelations.Items.Clear();
            getRelatedSynSets.Enabled = false;

            // retrive synsets
            Set <SynSet> synSetsToShow = null;

            if (synsetID.Text != "")
            {
                try { synSetsToShow = new Set <SynSet>(new SynSet[] { _wordNetEngine.GetSynSet(synsetID.Text) }); }
                catch (Exception)
                {
                    MessageBox.Show("Invalid SynSet ID");
                    return;
                }
            }
            else
            {
                // get single most common synset
                if (mostCommon.Checked)
                {
                    try
                    {
                        SynSet synset = _wordNetEngine.GetMostCommonSynSet(word.Text, (WordNetEngine.POS)pos.SelectedItem);
                        if (synset != null)
                        {
                            synSetsToShow = new Set <SynSet>(new SynSet[] { synset });
                        }
                    }
                    catch (Exception ex) { MessageBox.Show("Error:  " + ex); return; }
                }
                // get all synsets
                else
                {
                    try
                    {
                        synSetsToShow = _wordNetEngine.GetSynSets(word.Text, (WordNetEngine.POS)pos.SelectedItem);
                    }
                    catch (Exception ex) { MessageBox.Show("Error:  " + ex); return; }
                }
            }

            if (synSetsToShow.Count > 0)
            {
                foreach (SynSet synSet in synSetsToShow)
                {
                    synSets.Items.Add(synSet);
                }
            }
            else
            {
                MessageBox.Show("No synsets found");
            }
        }
        /// <summary>
        /// Outputs the Hypernyms (words that the original word is a meaning of)
        /// </summary>
        private void OutputHypernyms(IEnumerable <string> words, WordNetEngine.POS pos)
        {
            StringBuilder output       = new StringBuilder();
            int           limitCounter = 0;

            foreach (string word in words)
            {
                limitCounter++;

                //get the synsets
                var synSets = _wordNetEngine.GetSynSets(word, pos);

                //retrieve hypernymSynsets
                output.Append(word + "[");
                foreach (SynSet directSynset in synSets)
                {
                    var hypernymSynSets =
                        directSynset.GetRelatedSynSets(WordNetEngine.SynSetRelation.Hypernym, false);

                    foreach (SynSet hypernymSynSet in hypernymSynSets)
                    {
                        //have the synset, find all the words in that synset
                        IEnumerable <string> outputList =
                            hypernymSynSet.Words.Where(synonym => synonym != word);

                        if (outputList.Count() > 0)
                        {
                            output.Append("(");

                            int count = 0;
                            foreach (string synonym in outputList)
                            {
                                count++;
                                output.Append(synonym);

                                //add comma not to the last one
                                if (count != outputList.Count())
                                {
                                    output.Append(",");
                                }
                            }
                            output.Append(")");
                        }
                    }
                }

                output.Append("]" + Environment.NewLine);

                if (limitCounter > nupdLimit.Value)
                {
                    break;
                }
            }
            this.txtHypernyms.Text += output.ToString();
        }
        public string GetSynonym(string concept)
        {
            string sinonimos      = "";
            string conceptSynonym = "";

            //Cargar la base de datos de WordNet
            WordNetEngine wordNetEngine = new WordNetEngine();
            PartOfSpeech  pos           = new PartOfSpeech();

            //var directory = Directory.GetCurrentDirectory();
            var directory = HttpContext.Current.Server.MapPath("./") + "bin\\WordNet";

            wordNetEngine.LoadFromDirectory(directory);


            //Utilizar la librería para traer los sinonimos
            var synSetList = wordNetEngine.GetSynSets(concept);

            if (synSetList.Count == 0)
            {
                return("(" + concept + " + )");
            }

            foreach (var synSet in synSetList)
            {
                var words = string.Join(" + ", synSet.Words);

                sinonimos = sinonimos + words;
            }

            conceptSynonym = "(" + sinonimos + ")";


            return(conceptSynonym);
        }
        public string Evaluate(Context context)
        {
            try
            {
                var          commonMode     = _plugin.Settings["Use-Common"].GetValue <bool>();
                var          toSearch       = context.Element.Value;
                Set <SynSet> synSetsList    = null;
                var          definitionList = new List <string>();
                if (commonMode)
                {
                    synSetsList = _wordNetEngine.GetSynSets(toSearch);
                }
                else
                {
                    var synset = _wordNetEngine.GetMostCommonSynSet(toSearch, WordNetEngine.POS.Noun);
                    if (synset != null)
                    {
                        synSetsList = new Set <SynSet>(new[] { synset });
                    }
                }

                if (synSetsList != null)
                {
                    foreach (var synSet in synSetsList)
                    {
                        var types = SynUtility.Text.GetFormattedSentence(synSet.Words);
                        types = "( " + types.Replace("_", " ") + " ) ";
                        var glossString = "";

                        foreach (var sentence in SynUtility.Text.GetWords(synSet.Gloss, ";"))
                        {
                            if (sentence.StartsWith(" \""))
                            {
                                glossString += ". Example sentence: " + sentence;
                            }
                            else
                            {
                                glossString += "Definition: " + sentence;
                            }
                        }
                        definitionList.Add(types + "\n" + glossString);
                    }

                    var toShow = "";
                    foreach (var sentence in definitionList)
                    {
                        toShow += sentence + "\n";
                    }

                    return(toShow);
                }
            }
            catch (Exception exception)
            {
                VA.Logger.Error(exception);
            }

            return(string.Empty);
        }
Beispiel #7
0
 public static List <SynSet> FindDefinition(string word)
 {
     if (!WordNet.IsLoaded)
     {
         WordNet.LoadFromDirectory(HostingEnvironment.MapPath("~/App_Data/EBook/Wordnet"));
     }
     return(WordNet.GetSynSets(word));
 }
Beispiel #8
0
        public void InsertInTable(string word)
        {
            //   try
            //   {

            var synSetList = wordNet.GetSynSets(word);

            if (synSetList.Count == 0)
            {
                return;
            }
            int index = 0;

            foreach (var synSet in synSetList)
            {
                string words = string.Join(", ", synSet.Words);

                string pos = $"{synSet.PartOfSpeech}";

                string gloss = $"{synSet.Gloss}";

                string[] NYMS = new string[27];

                for (int i = 0; i < 27; i++)
                {
                    NYMS[i] = " --- ";
                }


                //var hypernyms = synSet.GetRelatedSynSets(SynSetRelation.AlsoSee, true);
                System.Collections.Generic.List <SynSet> nyms;
                //-------------------------------------------------------------------1

                for (int i = 0; i < 27; i++)
                {
                    nyms = synSet.GetRelatedSynSets((SynSetRelation)i + 1, true);

                    foreach (var item in nyms)
                    {
                        NYMS[i] = NYMS[i] + ", " + item;
                    }
                    //NYMS[i] = AddNewLines(NYMS[i],25);
                }



                dgv.Rows.Add(words, pos, gloss, NYMS[0], NYMS[1], NYMS[2], NYMS[3], NYMS[4], NYMS[5], NYMS[6], NYMS[7], NYMS[8], NYMS[9], NYMS[10], NYMS[11], NYMS[12], NYMS[13], NYMS[14], NYMS[15], NYMS[16], NYMS[17], NYMS[18], NYMS[19], NYMS[20], NYMS[21], NYMS[22], NYMS[23], NYMS[24], NYMS[25], NYMS[26]);
                index++;
            }



            //  }
            // catch (Exception e)
            //  {

            // }
        }
 public bool Execute(string lemma, string partOfSpeech, List <string> baseForms)
 {
     if (!baseForms.Contains(lemma) && mEngine.GetSynSets(lemma, partOfSpeech).Count > 0)
     {
         baseForms.Add(lemma);
         return(true);
     }
     return(false);
 }
Beispiel #10
0
        //Implementation of advanced system

        //WordNet
        public static List <string> WordNet(string query)
        {
            //Can read more files in database but dont know how to use those file yet
            //var FileList = Directory.GetFiles(@"C:\Users\USER\Documents\GitHub\IFN647_Advanced\testConvertOrPresentJson\bin\Debug\dict");
            //var wordNet = new WordNetEngine();
            //foreach (var file in FileList)
            //{
            //    wordNet.AddDataSource(new StreamReader(file));
            //    // Do some work
            //}

            var directory = Directory.GetCurrentDirectory();
            var wordNet   = new WordNetEngine();

            wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "./dict/", "data.adj")), PartOfSpeech.Adjective);
            wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "./dict/", "data.adv")), PartOfSpeech.Adverb);
            wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "./dict/", "data.noun")), PartOfSpeech.Noun);
            wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "./dict/", "data.verb")), PartOfSpeech.Verb);

            wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "./dict/", "index.adj")), PartOfSpeech.Adjective);
            wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "./dict/", "index.adv")), PartOfSpeech.Adverb);
            wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "./dict/", "index.noun")), PartOfSpeech.Noun);
            wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "./dict/", "index.verb")), PartOfSpeech.Verb);

            Console.WriteLine("Loading database...");
            wordNet.Load();
            Console.WriteLine("Load completed.");



            string word       = $"{query}";
            var    synSetList = new List <Syn.WordNet.SynSet>();

            if (word != null)
            {
                synSetList = wordNet.GetSynSets(word);
                if (synSetList.Count == 0)
                {
                    Console.WriteLine($"No SynSet found for '{word}'");
                }
            }

            var synonyms = new List <string>();

            foreach (var synSet in synSetList)
            {
                var words = string.Join(", ", synSet.Words);
                synonyms.Add(words);
                Console.WriteLine($"\nWords: {words}");
                Console.WriteLine($"POS: {synSet.PartOfSpeech}");
                Console.WriteLine($"Gloss: {synSet.Gloss}");
            }

            return(synonyms);
        }
Beispiel #11
0
        static InformationExtraction()
        {
            TERMS_CLASSIFICATION = new Dictionary <WeaponType, string[]>();
            TERMS_CLASSIFICATION.Add(WeaponType.Dagger, new string[] { "dagger", "Dagger", "knives", "Knives", "knife", "Knife" });
            TERMS_CLASSIFICATION.Add(WeaponType.Sword, new string[] { "sword", "Sword", "cut", "cutting" });
            TERMS_CLASSIFICATION.Add(WeaponType.Axe, new string[] { "axe", "Axe", "ax", "Ax" });
            TERMS_CLASSIFICATION.Add(WeaponType.PoleWeapon, new string[] { "pole", "Pole", "staff", "Staff", "spear", "Spear" });
            TERMS_CLASSIFICATION.Add(WeaponType.Bow, new string[] { "bow", "Bow" });
            TERMS_CLASSIFICATION.Add(WeaponType.Ranged, new string[] { "throw", "Throw", "projectile", "Projectile", "dart", "Dart", "missile", "Missile", "javelin", "Javelin", "throwing", "Throwing" });
            TERMS_CLASSIFICATION.Add(WeaponType.Club, new string[] { "club", "Club", "blunt", "Blunt", "flail", "Flail", "chain", "Chain" });
            TERMS_CLASSIFICATION.Add(WeaponType.Siege, new string[] { "siege", "Siege", "artillery", "Artillery" });

            TERMS_CLASSIFICATION_ALL = TERMS_CLASSIFICATION.SelectMany(kvp => kvp.Value).ToArray();

            // Unlike weapon classification, where we defined the terms we were looking for from beforehand, we cannot manually define the terms needed to represent Attack and Defense.

            // Our chosen solution picks a few root words and searches for similar words.
            string[] attackRoots  = { "powerful", "vicious" };
            string[] defenseRoots = { "sturdy", "defensive", "protective", "fortified" };

            // For each root word, we scan through the linked words that are presented by WordNet and add them to a list, leaving out words that are marked as Antonyms, or opposites. We then check out root words and keep adding the results to the list.
            WordNetEngine wordnet = new WordNetEngine(WeaponGeneratorConstants.FOLDER_WORDNET, true);
            Func <string[], List <string> > scanAndAdd = roots =>
            {
                List <string> terms = new List <string>();
                terms.AddRange(roots);
                // go 3 levels
                for (int i = 3; i > 0; --i)
                {
                    List <string> newRoots = new List <string>();
                    foreach (string root in roots)
                    {
                        Set <SynSet> synonims = wordnet.GetSynSets(root, WordNetEngine.POS.Adjective, WordNetEngine.POS.Adverb, WordNetEngine.POS.Noun);
                        foreach (SynSet synonim in synonims)
                        {
                            if (synonim.LexicalRelations.Any(r => r == WordNetEngine.SynSetRelation.Antonym))
                            {
                                continue;
                            }
                            Debug.Assert(synonim.SemanticRelations.All(r => r != WordNetEngine.SynSetRelation.Antonym));

                            newRoots.AddRange(synonim.Words.Where(w => !terms.Contains(w)));
                            terms.AddRange(synonim.Words);
                        }
                    }
                    roots = newRoots.ToArray();
                }
                return(terms.Distinct().ToList());
            };

            TERMS_ATTACK  = scanAndAdd(attackRoots);
            TERMS_DEFENSE = scanAndAdd(defenseRoots);
            wordnet.Close();
            GC.Collect();
        }
Beispiel #12
0
        private static IEnumerable <SynSet> GetSynSets(string?theme, WordNetEngine wordNetEngine)
        {
            if (string.IsNullOrWhiteSpace(theme))
            {
                return(Enumerable.Empty <SynSet>());
            }

            var sets = wordNetEngine.GetSynSets(theme).ToList();

            return(sets);
        }
Beispiel #13
0
        /// <summary>
        /// This is the method that actually does the work.
        /// </summary>
        /// <param name="DA">The DA object can be used to retrieve data from input parameters and
        /// to store data in output parameters.</param>
        protected override void SolveInstance(IGH_DataAccess DA)
        {
            string term1 = null;
            string term2 = null;

            if (!DA.GetData(0, ref term1))
            {
                return;
            }


            // If the retrieved data is Nothing, we need to abort.
            // We're also going to abort on a zero-length String.
            if (term1 == null)
            {
                return;
            }
            if (term1.Length == 0)
            {
                return;
            }

            var synSetList = wordNet.GetSynSets(term1);

            if (synSetList.Count == 0)
            {
                Console.WriteLine($"No SynSet found for '{term1}'");
            }

            var wordList         = new List <String>();
            var partOfSpeechList = new List <String>();
            var glossList        = new List <String>();


            foreach (var synSet in synSetList)
            {
                var synonym = string.Join(", ", synSet.Words);
                wordList.Add(synonym);
                partOfSpeechList.Add(synSet.PartOfSpeech.ToString());
                glossList.Add(synSet.Gloss);
            }

            if (DA.GetData(1, ref term2))
            {
                var similarity = wordNet.GetSentenceSimilarity(term1, term2);
                DA.SetData(3, similarity);
            }

            // Use the DA object to assign a new String to the first output parameter.
            DA.SetDataList(0, wordList);
            DA.SetDataList(1, partOfSpeechList);
            DA.SetDataList(2, glossList);
        }
Beispiel #14
0
 public Set <SynSet> GetSynSets(string word, params string[] pos)
 {
     try
     {
         if (pos == null || pos.Length == 0)
         {
             //Get synset with all parts of speech
             return(wordNetEngine.GetSynSets(word));
         }
         else
         {
             //Get synsets with restriction of part of speech
             var translatedPOS = TransformPOS(pos.First());
             return(wordNetEngine.GetSynSets(word, translatedPOS));
         }
     }
     catch (Exception ex)
     {
         Logger.LogError("Unable to get synset. " + ex);
         return(new Set <SynSet>());
     }
 }
Beispiel #15
0
        private static List <string> getSynSets(string word, WordNetEngine.POS type)
        {
            Set <SynSet> synSetsToShow = null;

            try
            {
                synSetsToShow = _wordNetEngine.GetSynSets(word, type);
            }
            catch (Exception)
            {
                return(null);
            }
            var returnList = new List <string>();

            foreach (var syns in synSetsToShow)
            {
                returnList.AddRange(syns.Words);
            }
            returnList = returnList.Distinct().ToList();

            foreach (var returnListItem in returnList.ToList())
            {
                try
                {
                    synSetsToShow = _wordNetEngine.GetSynSets(returnListItem, type);
                }
                catch (Exception)
                {
                    return(null);
                }
                foreach (var syns in synSetsToShow)
                {
                    returnList.AddRange(syns.Words);
                }
            }
            returnList = returnList.Distinct().ToList();

            return(returnList);
        }
        private SynSet FindSynset(IGlossaryEntry entry, WordNetEngine wordnet)
        {
            // TODO: find POS, use only nouns for now
            int?   minDistance = null;
            SynSet result      = null;

            foreach (SynSet synset in wordnet.GetSynSets(entry.Word, WordNetEngine.POS.Noun))
            {
                int distance = LevenshteinDistance.Compute(entry.Definition, synset.Gloss);
                if (minDistance == null || minDistance > distance)
                {
                    result = synset;
                }
            }
            return(result);
        }
Beispiel #17
0
        public List <SynSet> GettingSynSets(string word)
        {
            var directory = System.IO.Directory.GetCurrentDirectory();

            wordNet = new WordNetEngine();
            //Console.WriteLine(directory);
            //Console.WriteLine("Loading database...");
            wordNet.LoadFromDirectory(directory);
            //Console.WriteLine("Load completed.");

            if (true)
            {
                var synSetList = wordNet.GetSynSets(word);
                return(synSetList);
            }
        }
Beispiel #18
0
        static void Main(string[] args)
        {
            List <String> relaciones = new List <String>();

            if (File.Exists("words.txt"))
            {
                // Usernames are listed first in users.ul, and are followed by a period and then the password associated with that username.
                StreamReader reader = new StreamReader("words.txt");
                string       line;

                Set <SynSet>  synSetsToShow = null;
                List <String> rels          = new List <string>();

                using (var file = File.CreateText("relaciones.csv"))
                {
                    while ((line = reader.ReadLine()) != null)
                    {
                        try
                        {
                            synSetsToShow = _wordNetEngine.GetSynSets(line, LAIR.ResourceAPIs.WordNet.WordNetEngine.POS.Noun);
                            //main SynSet
                            SynSet s1 = synSetsToShow.Last();

                            foreach (WordNetEngine.SynSetRelation synSetRelation in s1.SemanticRelations)
                            {
                                if (synSetRelation.ToString().Equals("Hypernym") || synSetRelation.ToString().Equals("Hyponym"))
                                {
                                    //related SynSet
                                    SynSet s2 = s1.GetRelatedSynSets(synSetRelation, false).First();
                                    //fill the line
                                    String csv_line = s1.Words[0] + "," + s2.Words[0] + "," + synSetRelation;

                                    file.WriteLine(csv_line);
                                    //out of the foreach loop
                                    //break;
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.ToString());
                        }
                    }
                    reader.Close();
                }
            }
        }
        /*--------------------------------------------------------------------------------------------*/
        private static void BuildBaseDbInserts(ISession pSess)
        {
            int  count = 0;
            int  total = 0;
            long start = DateTime.UtcNow.Ticks;

            foreach (WordNetEngine.POS key in Engine.AllWords.Keys)
            {
                Set <string> valSet = Engine.AllWords[key];
                total += valSet.Count;
            }

            foreach (WordNetEngine.POS key in Engine.AllWords.Keys)
            {
                Set <string> valSet = Engine.AllWords[key];

                foreach (string word in valSet)
                {
                    var ssg = new SynSetGroup(word, Engine.GetSynSets(word));
                    ssg.InsertSynSetsAndWords(pSess);
                    count++;

                    if (count % 5000 == 0)
                    {
                        Console.WriteLine("Syn/Word: \t" + count + " of " + total +
                                          " \t" + (DateTime.UtcNow.Ticks - start) / 10000 / 1000.0 + " sec");
                    }
                }
            }

            count = 0;
            total = SynSetGroup.GetCachedSynsetCount();
            const int step = 5000;

            while (true)
            {
                if (!SynSetGroup.InsertLexicalsAndSemantics(pSess, Engine, count, step))
                {
                    break;
                }

                count += step;
                Console.WriteLine("Lex/Sem: \t" + count + " of " + total +
                                  " \t" + (DateTime.UtcNow.Ticks - start) / 10000 / 1000.0 + " sec");
            }
        }
Beispiel #20
0
        public ActionResult Login([FromBody] LinguisticRequest linguisticRequest)
        {
            var directory = Directory.GetCurrentDirectory();

            directory = Path.Combine(directory, "Artifacts");

            var wordNet = new WordNetEngine();
            var words   = new List <String>();

            try
            {
                wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.adj")), PartOfSpeech.Adjective);
                wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.adv")), PartOfSpeech.Adverb);
                wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.noun")), PartOfSpeech.Noun);
                wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.verb")), PartOfSpeech.Verb);

                wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adj")), PartOfSpeech.Adjective);
                wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adv")), PartOfSpeech.Adverb);
                wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.noun")), PartOfSpeech.Noun);
                wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.verb")), PartOfSpeech.Verb);

                wordNet.Load();

                var synSetList = wordNet.GetSynSets(linguisticRequest.Word);

                if (synSetList.Count == 0)
                {
                    return(BadRequest(new { message = "No Words Found." }));
                }

                foreach (var synSet in synSetList)
                {
                    foreach (string word in synSet.Words)
                    {
                        words.Add(word);
                    }
                }


                return(Ok(words));
            }
            catch (Exception ex)
            {
                return(BadRequest(new { message = ex.Message }));
            }
        }
Beispiel #21
0
        protected string GetSynonyms(string query)
        {
            if (SettingsViewModel.Instance.PosLists.Length == 0)
            {
                return(query);
            }

            List <string> synonymList = new List <string>();

            string[] tokens = TokeniseString(query.ToLower());

            foreach (var t in tokens)
            {
                var synSetList = wordNet.GetSynSets(t, SettingsViewModel.Instance.PosLists);

                if (synSetList.Count == 0)
                {
                    synonymList.Add(t);
                    Console.WriteLine($"No SynSet found for '{t}'");
                }

                foreach (var synSet in synSetList)
                {
                    if (synSet.PartOfSpeech == PartOfSpeech.None)
                    {
                        continue;
                    }

                    var words = string.Join(", ", synSet.Words);
                    synonymList.AddRange(synSet.Words);

                    //Console.WriteLine($"\nWords: {synSet.Words}");
                    //Console.WriteLine($"POS: {synSet.PartOfSpeech}");
                    //Console.WriteLine($"Gloss: {synSet.Gloss}");
                }
            }

            synonymList = synonymList.Distinct().ToList();

            //Console.WriteLine($"synonym List: {string.Join(", ", synonymList)}");
            return(SetWeightOriginalTrems(tokens, synonymList));
        }
Beispiel #22
0
        private void Define()
        {
            definitions.Clear();

            var synSetList = wordNet.GetSynSets(SearchText.Text.Trim());

            if (synSetList.Count == 0)
            {
                definitions.Add(new Definition("", noEntries, ""));
            }
            else
            {
                foreach (var synSet in synSetList)
                {
                    definitions.Add(new Definition(
                                        string.Join(", ", synSet.Words),
                                        $"{synSet.PartOfSpeech}",
                                        synSet.Gloss));
                }
            }
        }
Beispiel #23
0
        /// <summary>
        /// Gets all wordnet synonyms for a word
        /// </summary>
        /// <param name="word">The word to find synonyms for</param>
        /// <param name="limit">The max amount of synonyms to return</param>
        /// <returns>a list of unique synonyms for the word</returns>
        public string[] GetSynonyms(string word, int limit)
        {
            var synSetList = wordNet.GetSynSets(word);

            var words = new HashSet <string>();

            int counter = 0;

            // add each unique word to list
            foreach (SynSet set in synSetList)
            {
                foreach (string setWord in set.Words)
                {
                    // dont add any these type of words (ex. test_test) (idk why these are in the wordnet...)
                    if (!setWord.Contains('_') && !setWord.Contains('-'))
                    {
                        words.Add(setWord);
                        counter++;
                    }

                    // dont go over limit
                    if (counter >= limit)
                    {
                        break;
                    }
                }

                // dont go over limit
                if (counter >= limit)
                {
                    break;
                }
            }

            // null checks
            string[] result = new string[words.Count];
            words.CopyTo(result);

            return((result.Length != 0) ? result : new string[] { word });
        }
Beispiel #24
0
        /// <summary>
        /// Get the synonym set of the input word
        /// </summary>
        /// <param name="word">The word which is used for search its synonym</param>
        /// <returns> return all synonyms of the parameter "word" in an array </returns>
        public string[] GetSynnetList(string word)
        {
            var synSetList = wordnet.GetSynSets(word);
            int listNumber = synSetList.Count;

            string[] words = new[] { "" };
            if (listNumber == 0)
            {
                words[0] = word;
                return(words);
            }
            else
            {
                List <string> result = synSetList[0].Words;
                for (int i = 1; i < synSetList.Count; i++)
                {
                    result = result.Union(synSetList[i].Words).ToList <string>();
                }
                words = result.ToArray();
                return(words);
            }
        }
Beispiel #25
0
        public string Expansion_query(string[] query_token)
        {
            string expandquery = "";

            foreach (string t in query_token)
            {
                var synSetlist = wordNet.GetSynSets(t);
                if (synSetlist.Count != 0)
                {
                    foreach (var synSet in synSetlist)
                    {
                        foreach (var word in synSet.Words)
                        {
                            if (word != t)
                            {
                                expandquery += word;
                            }
                        }
                    }
                }
            }
            return(expandquery.Replace('_', ' '));
        }
        public void run()
        {
            while (true)
            {
                Console.Write("\nType first word:");

                var word       = Console.ReadLine();
                var synSetList = wordNet.GetSynSets(word);
                //System.Collections.Generic.List<SynSet> synSetList
                if (synSetList.Count == 0)
                {
                    Console.WriteLine($"No SynSet found for '{word}'");
                }

                foreach (var synSet in synSetList)
                {
                    var words = string.Join(", ", synSet.Words);

                    Console.WriteLine($"\nWords: {words}");
                    Console.WriteLine($"Part Of Speech: {synSet.PartOfSpeech}");
                    Console.WriteLine($"Gloss: {synSet.Gloss}");
                }
            }
        }
Beispiel #27
0
        public string WordNetExpand0(string inputString, bool queryhook)
        {
            string[] words      = inputString.Split(' ');
            string   returnText = inputString + " ";

            if (userCachedPending == false)
            {
                WNUser2Cache();
            }
            int numWords = words.Length;

            // Ok, lets try WordNet
            //WordNetEngine ourWordNetEngine = this.user.bot.wordNetEngine;
            WordNetEngine.POS ourPOS = WordNetEngine.POS.Noun;
            List <WordNetEngine.SynSetRelation> vlist = new List <WordNetEngine.SynSetRelation>(); //[2];

            //vlist.Add(WordNetEngine.SynSetRelation.Hypernym);
            //vlist.Add(WordNetEngine.SynSetRelation.InstanceHypernym);
            vlist.Add(WordNetEngine.SynSetRelation.Hyponym);
            vlist.Add(WordNetEngine.SynSetRelation.InstanceHyponym);



            // retrive synsets
            Set <SynSet> synStartSet = null;

            try { synStartSet = wordNetEngine.GetSynSets("entity", ourPOS); }
            catch (Exception)
            {
                writeToLog("Invalid Start SynSet ID");
                return(returnText);
            }

            for (int i = 0; i < numWords; i++)
            {
                string focusWord        = words[i];
                string focusWordResults = "";
                if (WNExpandCache.Contains(focusWord))
                {
                    focusWordResults = (string)WNExpandCache[focusWord];
                }
                else
                {
                    Set <SynSet> synDestSet = null;
                    try { synDestSet = wordNetEngine.GetSynSets(focusWord, ourPOS); }
                    catch (Exception)
                    {
                        writeToLog("Invalid Dest SynSet ID");
                        continue;
                    }
                    int numlinks = 0;
                    if (synStartSet.Count > 0)
                    {
                        //WordNetEngine.SynSetRelation[] vlist = new WordNetEngine.SynSetRelation[2];
                        //vlist[0] = WordNetEngine.SynSetRelation.Hyponym;
                        //vlist[1] = WordNetEngine.SynSetRelation.InstanceHyponym;
                        foreach (SynSet synSrcSet in synStartSet)
                        {
                            foreach (SynSet synDstSet in synDestSet)
                            {
                                //synSets.Items.Add(synSet);
                                List <SynSet> linkageList = null;

                                linkageList = synSrcSet.GetShortestPathTo(synDstSet, vlist);
                                if ((linkageList != null) && (linkageList.Count > 0))
                                {
                                    foreach (SynSet s in linkageList)
                                    {
                                        StringBuilder desc = new StringBuilder();
                                        //desc.Append("{");
                                        bool prependComma = false;
                                        foreach (string word in s.Words)
                                        {
                                            desc.Append((prependComma ? ", " : "") + word);
                                            prependComma = true;
                                        }

                                        //desc.Append("}");

                                        //LinkBox.Items.Add(desc.ToString());
                                        focusWordResults = focusWordResults + " " + desc.ToString() + " ";
                                    }
                                    //LinkBox.Text = "true";
                                    numlinks++;
                                    //return;
                                }
                            }
                        }
                    }
                    WNExpandCache.Add(focusWord, focusWordResults.Trim()); //Add to Cache
                }
                returnText = returnText + " " + focusWordResults;
            }
            returnText = returnText.Trim();

            if (queryhook)
            {
                if (returnText.Contains("person"))
                {
                    returnText = returnText + " who";
                }
                if (returnText.Contains("imaginary_being"))
                {
                    returnText = returnText + " who";
                }
                if (returnText.Contains("causal_agent"))
                {
                    returnText = returnText + " who";
                }

                if (returnText.Contains("object"))
                {
                    returnText = returnText + " what";
                }
                if (returnText.Contains("location"))
                {
                    returnText = returnText + " where";
                }
                if (returnText.Contains("time_period"))
                {
                    returnText = returnText + " when";
                }
                if (returnText.Contains("amount"))
                {
                    returnText = returnText + " how much how many";
                }
                if (returnText.Contains("measure"))
                {
                    returnText = returnText + "  how much how many";
                }
                if (returnText.Contains("quantity"))
                {
                    returnText = returnText + "  how much how many";
                }
            }
            // filter out "stop concepts" which have a > 70% occurance and thus low info content
            returnText = returnText.Replace("entity", "");
            returnText = returnText.Replace("abstraction", "");
            returnText = returnText.Replace("abstract", "");
            returnText = returnText.Replace("unit", "");
            returnText = returnText.Replace("physical", "");
            returnText = returnText.Replace("yes", "");
            return(returnText.Trim());
        }
Beispiel #28
0
        public override float CanUnify(Unifiable with)
        {
            string re = ComputeInner();

            string wnPos      = GetAttribValue("wnpos", "").ToLower();
            string wnRelation = GetAttribValue("wnrel", "").ToLower();

            //Lookup definition for current word we could unify with
            string wordAttributes = "";
            string key            = (string)with.ToValue(query).Trim();

            if (Proc.wordAttributeHash.Contains(key))
            {
                wordAttributes = (string)Proc.wordAttributeHash[key];
            }
            else
            {
                if (Proc.wordAttributeHash.Contains(key.ToLower()))
                {
                    key            = key.ToLower();
                    wordAttributes = (string)Proc.wordAttributeHash[key];
                }
            }
            // Can you find a match inside ?
            var matcher = new Regex(re);

            if (matcher.IsMatch(wordAttributes))
            {
                return(AND_TRUE);
            }


            // Ok, lets try WordNet
            WordNetEngine ourWordNetEngine = Proc.wordNetEngine;
            Set <SynSet>  synPatternSet    = null;

            // find our POS domain if possible
            WordNetEngine.POS ourPOS = WordNetEngine.POS.Noun;
            if (wnPos.Length > 0)
            {
                // populate POS list
                foreach (WordNetEngine.POS p in Enum.GetValues(typeof(WordNetEngine.POS)))
                {
                    if (p != WordNetEngine.POS.None)
                    {
                        if (p.ToString().ToLower().Equals(wnPos) || p.ToString().ToLower().StartsWith(wnPos))
                        {
                            ourPOS = p;
                        }
                    }
                }
            }
            if (ourWordNetEngine == null)
            {
                writeDebugLine("Wordnet engine not loaded .. returning AND_FALSE");
                {
                    return(AND_FALSE);
                }
            }
            try { synPatternSet = ourWordNetEngine.GetSynSets(re, ourPOS); }
            catch (Exception)
            {
                return(AND_FALSE);
            }
            if (synPatternSet.Count == 0)
            {
                try { synPatternSet = ourWordNetEngine.GetSynSets(re.ToLower(), ourPOS); }
                catch (Exception)
                {
                    return(AND_FALSE);
                }
            }

            Set <SynSet> synInputSet = null;

            try { synInputSet = ourWordNetEngine.GetSynSets(key, ourPOS); }
            catch (Exception)
            {
                return(AND_FALSE);
            }
            if (synInputSet.Count == 0)
            {
                try { synInputSet = ourWordNetEngine.GetSynSets(key.ToLower(), ourPOS); }
                catch (Exception)
                {
                    return(AND_FALSE);
                }
            }

            List <WordNetEngine.SynSetRelation> vlist = new List <WordNetEngine.SynSetRelation>();   //[2];

            //vlist[0] = WordNetEngine.SynSetRelation.Hyponym;
            //vlist[1] = WordNetEngine.SynSetRelation.InstanceHyponym;
            //vlist[0] = WordNetEngine.SynSetRelation.Hypernym ;
            //vlist[1] = WordNetEngine.SynSetRelation.InstanceHypernym;
            if (wnRelation.Length == 0)
            {
                vlist.Add(WordNetEngine.SynSetRelation.Hypernym);
                vlist.Add(WordNetEngine.SynSetRelation.InstanceHypernym);
            }
            else
            {
                // populate Relation list
                foreach (WordNetEngine.SynSetRelation r in Enum.GetValues(typeof(WordNetEngine.SynSetRelation)))
                {
                    if (r != WordNetEngine.SynSetRelation.None)
                    {
                        if (r.ToString().ToLower().Contains(wnRelation))
                        {
                            vlist.Add(r);
                        }
                    }
                }
            }

            if ((synInputSet.Count > 0) && (synPatternSet.Count > 0))
            {
                foreach (SynSet synDstSet in synInputSet)
                {
                    foreach (SynSet synSrcSet in synPatternSet)
                    {
                        //synSets.Items.Add(synSet);
                        List <SynSet> linkageList = null;

                        linkageList = synDstSet.GetShortestPathTo(synSrcSet, vlist);
                        if ((linkageList != null) && (linkageList.Count > 0))
                        {
                            return(AND_TRUE);
                        }
                    }
                }
                return(AND_FALSE);
            }

            return(AND_FALSE);
        }
Beispiel #29
0
        public void Extract(string text_segment)
        {
            if (!string.IsNullOrEmpty(text_segment))
            {
                #region Local Variables

                int           i = 0;
                int           j;
                int           k;
                int           d;
                int           l;
                int           chunkLength;
                int           chunksLength;
                string        curToken;
                List <SynSet> Senses, tmpSenses;
                SynSet        tmpSense;

                List <SentenceChunk>        Chunks         = new List <SentenceChunk>(); // This list of all chunks
                List <SentenceChunk>        tmpChunks      = new List <SentenceChunk>(); // This list of all chunks
                Dictionary <string, SynSet> CachedConcepts = new Dictionary <string, SynSet>();
                TextVectors = new List <TaggedWord>();                                   // The list that will hold all mappable terms with thier information
                List <string> MiscTerms = new List <string>();                           // The list of unmapped terms in the text
                string[]      tokens;
                string[]      sentences = _sentenceDetector.SentenceDetect(text_segment);

                #endregion

                #region Section 3.1.

                // Extract all chunks from the given text segment
                for (k = 0; k < sentences.Length; k++)
                {
                    tokens    = _tokenizer.Tokenize(sentences[k]);
                    tmpChunks = _chunker.GetChunks(tokens, _posTagger.Tag(tokens));
                    tmpChunks.RemoveAll(predicate => predicate.TaggedWords.Count == 0);
                    Chunks.AddRange(tmpChunks);
                }

                tmpChunks = null;
                tokens    = null;
                sentences = null;

                // Extract elements that will be used for Similarity Matrix Generation as the input of clustering
                chunksLength = Chunks.Count;
                while (i < chunksLength)
                {
                    // Look only inside NP chunks
                    if (Chunks[i].Tag == "NP")
                    {
                        #region Rectify NP Chunks
                        if (i + 1 < chunksLength)
                        {
                            if (Chunks[i + 1].Tag == "NP")
                            {
                                if (Chunks[i + 1].TaggedWords[0].Tag.StartsWith("NNP") || AllowedDTList.ContainsKey(Chunks[i + 1].TaggedWords[0].Word))
                                {
                                    int length = Chunks[i].TaggedWords.Count;
                                    foreach (TaggedWord w in Chunks[i + 1].TaggedWords)
                                    {
                                        w.Index = length;
                                        Chunks[i].TaggedWords.Add(w);
                                        length++;
                                    }

                                    Chunks.RemoveRange(i + 1, 1);
                                    chunksLength = chunksLength - 1;
                                }
                            }
                            else
                            if (Chunks[i + 1].Tag == "PP" && i + 2 < chunksLength)
                            {
                                if (Chunks[i + 2].TaggedWords[0].Tag.StartsWith("NNP") || AllowedDTList.ContainsKey(Chunks[i + 1].TaggedWords[0].Word))
                                {
                                    int length = Chunks[i].TaggedWords.Count;
                                    Chunks[i + 1].TaggedWords[0].Index = length;
                                    Chunks[i].TaggedWords.Add(Chunks[i + 1].TaggedWords[0]);
                                    length++;
                                    foreach (TaggedWord w in Chunks[i + 2].TaggedWords)
                                    {
                                        w.Index = length;
                                        length++;
                                        Chunks[i].TaggedWords.Add(w);
                                    }

                                    Chunks.RemoveRange(i + 1, 2);
                                    chunksLength = chunksLength - 2;
                                }
                            }
                        }
                        #endregion

                        #region Find N-Gram NNPs

                        // This part is very important:
                        // 1- Rectify any linguistic errors generated as side effect of the previous step (such as "Belly the")
                        // 2- Eliminate any syntactic errors such as Texas Rangers (sports) --> Texas Ranger (Police)
                        // since we don't alter the value of a NNP(s)

                        chunkLength = Chunks[i].TaggedWords.Count;
                        j           = 0;
                        // Loop through all chunk words
                        while (j < chunkLength)
                        {
                            if (Chunks[i].TaggedWords[j].Tag[0] == 'N')
                            {
                                // Needed for fast access to the last element in SemanticElements
                                d = TextVectors.Count() - 1;

                                // Check the probability of merging N-gram Named Entities (NNP(S)* || NNP(S)*|DT*|NNP(S)*)
                                if (Chunks[i].TaggedWords[j].Tag.StartsWith("NNP"))
                                {
                                    k = 0;

                                    // First scan to see if the pattern is satisfied
                                    for (l = j + 1; l < chunkLength; l++)
                                    {
                                        // Here to define any patterns the user may wish to apply
                                        if (
                                            Chunks[i].TaggedWords[l].Tag.StartsWith("NNP") || // allow N-Gram NNP
                                            AllowedDTList.ContainsKey(Chunks[i].TaggedWords[l].Word) || // allow adding stop words inside the NNP
                                            Chunks[i].TaggedWords[l].Tag == "CD"    // allow adding numbers inside NNP
                                            )
                                        {
                                            k++;
                                        }
                                        else
                                        {
                                            break;
                                        }
                                    }
                                    // k-value changing means a pattern has been found
                                    // if k is changed and the scanned pattern does not end with a stop word
                                    if (k > 0 && !AllowedDTList.ContainsKey(Chunks[i].TaggedWords[j + k].Word))
                                    {
                                        // Concatenate all the pattern parts ans store them in temp variable
                                        curToken = Chunks[i].TaggedWords[j].Word;
                                        for (l = j + 1; l <= j + k; l++)
                                        {
                                            curToken = curToken + " " + Chunks[i].TaggedWords[l].Word;
                                        }

                                        // Delete all the parts added in temp
                                        Chunks[i].TaggedWords.RemoveRange(j + 1, k);

                                        // rectify the sequence length after deletion
                                        chunkLength = chunkLength - k;


                                        // Check if the perv token is a capitalized JJ
                                        if (d > -1 && j > 0 && TextVectors[d].Tag == "JJ" && char.IsUpper(TextVectors[d].Word[0]))
                                        {
                                            // Replace current j with its previous j-1 word, and allocate special tag NNP*J
                                            Chunks[i].TaggedWords[j - 1].Tag  = Chunks[i].TaggedWords[j].Tag + "J";
                                            Chunks[i].TaggedWords[j - 1].Word = TextVectors[d].Word + " " + curToken;
                                            // Remove the previous word from all lists
                                            TextVectors.RemoveAt(d);
                                            Chunks[i].TaggedWords.RemoveRange(j, 1);
                                            chunkLength--;
                                            j--;
                                        }
                                        else
                                        {
                                            // Only update the current word
                                            Chunks[i].TaggedWords[j].Word = curToken;
                                        }

                                        TextVectors.Add(Chunks[i].TaggedWords[j]);
                                        // Skip the loop by k steps
                                        j = j + k;
                                    }
                                    else
                                    {
                                        // If there is no pattern match --> add singular NNP(S)
                                        // Before addition check JJ pattern
                                        if (d > -1 && j > 0 && TextVectors[d].Tag == "JJ" && char.IsUpper(TextVectors[d].Word[0]))
                                        {
                                            // Replace current j with its previous j-1 word, and allocate special tag NNP*J
                                            Chunks[i].TaggedWords[j - 1].Tag  = Chunks[i].TaggedWords[j].Tag + "J";
                                            Chunks[i].TaggedWords[j - 1].Word = TextVectors[d].Word + " " + Chunks[i].TaggedWords[j].Word;
                                            // Remove the previous word from all lists
                                            TextVectors.RemoveAt(d);
                                            Chunks[i].TaggedWords.RemoveRange(j, 1);
                                            chunkLength--;
                                            j--;
                                        }

                                        TextVectors.Add(Chunks[i].TaggedWords[j]);
                                        j++;
                                    }
                                }
                                else
                                {
                                    // If the current word is NN(S)
                                    if (Chunks[i].TaggedWords[j].Tag == "NNS")
                                    {
                                        Chunks[i].TaggedWords[j].Word = _wn.Lemmatize(Chunks[i].TaggedWords[j].Word, "noun");
                                    }

                                    // Find if the current token forms bigram WordNet concept with the previous token
                                    if (j > 0)
                                    {
                                        if (Chunks[i].TaggedWords[j - 1].Tag == "NN" || Chunks[i].TaggedWords[j - 1].Tag == "NNS" || Chunks[i].TaggedWords[j - 1].Tag == "JJ")
                                        {
                                            if (_wn.GetSynSets(Chunks[i].TaggedWords[j - 1].Word + "_" + Chunks[i].TaggedWords[j].Word, "noun").Count > 0)
                                            {
                                                Chunks[i].TaggedWords[j].Word  = Chunks[i].TaggedWords[j - 1].Word + "_" + Chunks[i].TaggedWords[j].Word;
                                                Chunks[i].TaggedWords[j].Index = Chunks[i].TaggedWords[j - 1].Index;
                                                Chunks[i].TaggedWords.RemoveRange(j - 1, 1);
                                                TextVectors.RemoveAt(d);
                                                j--;
                                                chunkLength--;
                                            }
                                        }
                                    }

                                    TextVectors.Add(Chunks[i].TaggedWords[j]);
                                    j++;
                                }
                            }
                            else
                            {
                                if (Chunks[i].TaggedWords[j].Tag[0] == 'J')
                                {
                                    // We add adjectives to increase the disambiguation accuracy
                                    TextVectors.Add(Chunks[i].TaggedWords[j]);
                                }
                                // Skip any chunk element that is not NNP(S),NN(S), or JJ(*)
                                j++;
                            }
                        }
                        #endregion

                        i++;
                    }
                    else
                    {
                        // Remove the current Chunk since it was checked during rectification phase of the previous step
                        // Keeping only NPs is for efficiency reason during the last step of the algorithm
                        Chunks.RemoveRange(i, 1);
                        chunksLength--;
                    }
                }

                #region Disambiguatation

                d = TextVectors.Count;
                // Normalize NNP* vectors before the actual disambiguatation
                // Performing normalization after disambiguatation may affects caching the concepts since the keys may change
                for (i = 0; i < d; i++)
                {
                    if (TextVectors[i].Tag.StartsWith("NNP"))
                    {
                        for (j = 0; j < d; j++)
                        {
                            if (TextVectors[j].Tag.StartsWith("NNP"))
                            {
                                if (TextVectors[i].Word.Contains(TextVectors[j].Word))
                                {
                                    TextVectors[j].Word = TextVectors[i].Word;
                                    TextVectors[j].Tag  = TextVectors[i].Tag;
                                }
                                else
                                if (TextVectors[j].Word.Contains(TextVectors[i].Word))
                                {
                                    TextVectors[i].Word = TextVectors[j].Word;
                                    TextVectors[i].Tag  = TextVectors[j].Tag;
                                }
                            }
                        }
                    }
                }



                for (i = 0; i < d; i++)
                {
                    // For limiting access to the list -- Efficiency
                    curToken = TextVectors[i].Word;
                    if (TextVectors[i].Tag == "NN" || TextVectors[i].Tag == "NNS")
                    {
                        if (CachedConcepts.ContainsKey(curToken))
                        {
                            TextVectors[i].Sense = CachedConcepts[curToken];
                        }
                        else
                        {
                            // Check availability in WordNet
                            Senses = _wn.GetSynSets(curToken, false, WordNetEngine.POS.Noun);
                            if (Senses.Count > 0)
                            {
                                tmpSense = Disambiguate(Senses, GenerateContextWindow(i, d));
                                CachedConcepts.Add(curToken, tmpSense);
                                TextVectors[i].Sense = CachedConcepts[curToken];
                            }
                        }
                    }
                    else
                    if (TextVectors[i].Tag.StartsWith("NNP"))
                    {
                        if (CachedConcepts.ContainsKey(curToken))
                        {
                            TextVectors[i].Sense = CachedConcepts[curToken];
                            continue;
                        }

                        Senses = _wn.GetSynSets(curToken.Replace(" ", "_"), false, WordNetEngine.POS.Noun);
                        if (Senses.Count > 0)
                        {
                            tmpSense = Disambiguate(Senses, GenerateContextWindow(i, d));
                            CachedConcepts.Add(curToken, tmpSense);
                            TextVectors[i].Sense = CachedConcepts[curToken];
                            continue;
                        }

                        if (PlugInsNumber > 0)
                        {
                            Senses.Clear();
                            for (l = 0; l < PlugInsNumber; l++)
                            {
                                KBDriverQueryArgs[1] = curToken;
                                tmpSenses            = KBDriversQueryPointers[l].Invoke(KBDrivers[l], KBDriverQueryArgs) as List <SynSet>;
                                if (tmpSenses != null)
                                {
                                    Senses.AddRange(tmpSenses);
                                }
                            }

                            if (Senses.Count > 0)
                            {
                                tmpSense = Disambiguate(Senses, GenerateContextWindow(i, d));
                                CachedConcepts.Add(curToken, tmpSense);
                                TextVectors[i].Sense = CachedConcepts[curToken];
                                continue;
                            }
                        }

                        if (TextVectors[i].Tag.EndsWith("J"))
                        {
                            TextVectors[i].Word = curToken.Substring(curToken.IndexOf(" ") + 1);
                            TextVectors[i].Tag  = TextVectors[i].Tag.Substring(0, TextVectors[i].Tag.Length - 1);
                            i--;
                            continue;
                        }
                    }
                }


                // Prepare the vectors for semantic similarity measurement
                // hence, any vector does not hold valid sense must be excluded from the list in temp list
                i = 0;
                while (i < d)
                {
                    if (TextVectors[i].Sense == null)
                    {
                        if (TextVectors[i].Tag.StartsWith("NNP") && !MiscTerms.Contains(TextVectors[i].Word))
                        {
                            MiscTerms.Add(TextVectors[i].Word);
                        }
                        TextVectors.RemoveAt(i);
                        d--;
                    }
                    else
                    {
                        i++;
                    }
                }
                #endregion
                // [Implicit-Dispose]
                tmpSense  = null;
                tmpSenses = null;
                Senses    = null;

                #endregion

                #region Section 3.2.

                // Row * Col - Diagonal / 2 (above or under the Diagonal)
                double[] S = new double[((d * d) - d) / 2];
                // Dummy counter
                k = 0;
                for (i = 0; i < d; i++)
                {
                    for (j = i + 1; j < d; j++)
                    {
                        S[k] = Math.Round(wupMeasure(TextVectors[i].Sense, TextVectors[j].Sense), 4);
                        k++;
                    }
                }

                // Perform clustering on S
                int[] res = ap.Run(S, d, 1, 0.9, 1000, 50);

                // Optimized Clustering information collection
                // We collect clustering information and at the same time filter out all terms that are not close to their exemplars
                Dictionary <int, List <int> > ClusRes = new Dictionary <int, List <int> >();
                // ===================================
                for (i = 0; i < res.Length; i++)
                {
                    if (!ClusRes.ContainsKey(res[i]))
                    {
                        ClusRes.Add(res[i], new List <int>());
                    }

                    if (i == res[i])
                    {
                        ClusRes[res[i]].Add(i);
                        continue;
                    }

                    if (Math.Round(wupMeasure(TextVectors[res[i]].Sense, TextVectors[i].Sense), 4) >= ClosenessToCentroid)
                    {
                        ClusRes[res[i]].Add(i);
                    }
                }

                Console.WriteLine("-> Clustering Information:\n");
                foreach (KeyValuePair <int, List <int> > kv in ClusRes)
                {
                    Console.Write("\t[" + TextVectors[kv.Key].Word + "] " + TextVectors[kv.Key].Sense.ID + " : ");
                    foreach (var item in kv.Value)
                    {
                        Console.Write(TextVectors[item].Word + ",");
                    }
                    Console.WriteLine();
                    Console.WriteLine();
                }

                // Manual averaging of exemplars (Sec. 3.2)
                Console.WriteLine("-> Remove unimportant clusters:");
                bool delFlag;
                while (true)
                {
                    delFlag = false;
                    Console.Write("\tEnter Seed:");
                    curToken = Console.ReadLine();
                    if (curToken == "$")
                    {
                        break;
                    }

                    foreach (var key in ClusRes.Keys)
                    {
                        if (TextVectors[key].Word == curToken)
                        {
                            delFlag = ClusRes.Remove(key);
                            break;
                        }
                    }
                    if (delFlag)
                    {
                        Console.WriteLine("\tCluster deleted");
                    }
                    else
                    {
                        Console.WriteLine("\tSeed is not found");
                    }
                    Console.WriteLine();
                }

                // ESA-Based averaging of exemplars
                // Insert here local server API

                #endregion

                #region Section 3.3.

                // Flatten ClusRes into List
                List <int> Seeds = ClusRes.Values
                                   .SelectMany(x => x) // Flatten
                                   .ToList();

                // Final seeds list must be sorted in case of using candidate phrase selection from a window
                //Seeds.Sort();

                List <string> CandidatePhrases    = new List <string>();
                List <string> CandidatePhraseSeed = new List <string>();

                SelectionWindowSize = Chunks.Count;
                for (i = 0; i < Chunks.Count; i++)
                {
                    if (Chunks[i].Tag == "NP")
                    {
                        d = Chunks[i].TaggedWords.Count;
                        for (l = 0; l < Seeds.Count; l++)
                        {
                            for (j = 0; j < d; j++)
                            {
                                if (Chunks[i].TaggedWords[j].Word == TextVectors[Seeds[l]].Word)
                                {
                                    if (TextVectors[Seeds[l]].Tag.StartsWith("NNP") && !CandidatePhrases.Contains(TextVectors[Seeds[l]].Word) && i < SelectionWindowSize)
                                    {
                                        CandidatePhrases.Add(TextVectors[Seeds[l]].Word);
                                        if (TextVectors[Seeds[l]].Sense.URI != null)
                                        {
                                            CandidatePhraseSeed.Add(TextVectors[Seeds[l]].Sense.URI);
                                        }
                                        else
                                        {
                                            CandidatePhraseSeed.Add("http://www.pdl.io/core_onto/" + TextVectors[Seeds[l]].Sense.ID);
                                        }
                                    }
                                    else
                                    if (TextVectors[Seeds[l]].Tag == "NN" || TextVectors[Seeds[l]].Tag == "NNS")
                                    {
                                        curToken = TextVectors[Seeds[l]].Word;
                                        if (j > 0 && Chunks[i].TaggedWords[j - 1].Tag == "JJ")
                                        {
                                            curToken = Chunks[i].TaggedWords[j - 1].Word + " " + curToken;
                                        }

                                        for (k = j + 1; k < d; k++)
                                        {
                                            if (Chunks[i].TaggedWords[k].Tag != "NN")
                                            {
                                                break;
                                            }
                                            else
                                            {
                                                curToken = curToken + " " + Chunks[i].TaggedWords[k].Word;
                                            }
                                        }

                                        if (curToken.Contains(" ") || curToken.Contains("_"))
                                        {
                                            if (!CandidatePhrases.Contains(curToken))
                                            {
                                                CandidatePhrases.Add(curToken);
                                                if (TextVectors[Seeds[l]].Sense.URI != null)
                                                {
                                                    CandidatePhraseSeed.Add(TextVectors[Seeds[l]].Sense.URI);
                                                }
                                                else
                                                {
                                                    CandidatePhraseSeed.Add("http://www.pdl.io/core_onto/" + TextVectors[Seeds[l]].Sense.ID);
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }

                #endregion

                // Print results
                Console.WriteLine("\n-> Candidate Keyphrases:\n");
                for (i = 0; i < CandidatePhrases.Count; i++)
                {
                    Console.WriteLine("\t" + CandidatePhrases[i].Replace("_", " ") + " , URI:" + CandidatePhraseSeed[i]);
                }

                Console.WriteLine("\n-> MISC Entities:\n");
                for (i = 0; i < MiscTerms.Count; i++)
                {
                    Console.WriteLine("\t" + MiscTerms[i]);
                }
            }
        }
Beispiel #30
0
        //This method will initite the search process.
        private void searchQuery()
        {
            if (!LuceneSearch.isIndexAvailale())
            {
                MessageBox.Show("Please create/load an index first", "Index Error");
                return;
            }

            if (String.IsNullOrEmpty(QueryBox.Text))
            {
                MessageBox.Show("Please enter a query to search.", "Retry");
                return;
            }

            resultsTable.Rows.Clear();
            resultsTable.Refresh();

            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();
            if (queryExpansionCheckBox.Checked == false && multiTermCheckBox.Checked == false)
            {
                //Lucene search without query expansion
                LuceneSearch.CreateSearcher();
                LuceneSearch.CreateParser(LuceneSearch.FIELDS_FN[2]);
                Query parsedQuery = LuceneSearch.ParseQuery(QueryBox.Text);
                queryTextBox.Text = parsedQuery.ToString();
                TopDocs resultDocs = LuceneSearch.SearchText(parsedQuery);
                DisplaySearch(resultDocs);
            }
            else if (queryExpansionCheckBox.Checked == true)
            {
                //Lucene search with query expansion
                var directory = Directory.GetCurrentDirectory() + "/wordnet";
                var wordNet   = new WordNetEngine();

                wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.adj")), PartOfSpeech.Adjective);
                wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.adv")), PartOfSpeech.Adverb);
                wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.noun")), PartOfSpeech.Noun);
                wordNet.AddDataSource(new StreamReader(Path.Combine(directory, "data.verb")), PartOfSpeech.Verb);

                wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adj")), PartOfSpeech.Adjective);
                wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adv")), PartOfSpeech.Adverb);
                wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.noun")), PartOfSpeech.Noun);
                wordNet.AddIndexSource(new StreamReader(Path.Combine(directory, "index.verb")), PartOfSpeech.Verb);

                wordNet.Load();

                string        baseQuery          = QueryBox.Text.ToLower();
                List <string> tokenizedBaseQuery = LuceneSearch.TokenizeString(baseQuery);
                string        synonyms           = "";
                string        finalQuery         = "";

                foreach (string token in tokenizedBaseQuery)
                {
                    finalQuery += token + "^5 ";
                    List <SynSet> synSetList = wordNet.GetSynSets(token);
                    if (synSetList.Count != 0)
                    {
                        foreach (SynSet synSet in synSetList)
                        {
                            foreach (string word in synSet.Words)
                            {
                                if (!word.Equals(token))
                                {
                                    synonyms += word + " ";
                                }
                            }
                        }
                    }
                }
                finalQuery += synonyms;
                LuceneSearch.CreateSearcher();
                LuceneSearch.CreateParser(LuceneSearch.FIELDS_FN[2]);
                Query parsedQuery = LuceneSearch.ParseQuery(finalQuery);
                queryTextBox.Text = parsedQuery.ToString();
                TopDocs resultDocs = LuceneSearch.SearchText(parsedQuery);
                DisplaySearch(resultDocs);
            }
            else if (multiTermCheckBox.Checked == true)
            {
                LuceneSearch.CreateSearcher();
                LuceneSearch.CreateParser(LuceneSearch.FIELDS_FN[2]);
                Query parsedQuery = LuceneSearch.ParseQuery("\"" + QueryBox.Text + "\"");
                queryTextBox.Text = parsedQuery.ToString();
                TopDocs resultDocs = LuceneSearch.SearchText(parsedQuery);
                DisplaySearch(resultDocs);
            }

            stopwatch.Stop();
            TimeSpan elapsed     = stopwatch.Elapsed;
            string   elapsedTime = elapsed.ToString(@"hh\:mm\:ss\.fff");

            SearchTimeLabel.Text = "Search time elapsed: " + elapsedTime;
        }