Beispiel #1
0
        public TestForm()
        {
            InitializeComponent();

            // create wordnet engine (use disk-based retrieval by default)
            string root = Directory.GetDirectoryRoot(".");
            _wordNetEngine = new WordNetEngine(@"C:\Users\danie\Documents\Visual Studio 2015\Projects\Anti-Plagiarism\References\WordNetAPI-master\WordNetAPI-master\resources", false);

            if (!_wordNetEngine.InMemory)
                test.Text += " (will take a while)";

            // populate POS list
            foreach (WordNetEngine.POS p in Enum.GetValues(typeof(WordNetEngine.POS)))
                if (p != WordNetEngine.POS.None)
                    pos.Items.Add(p);

            pos.SelectedIndex = 0;

            // allow scrolling of synset list
            synSets.HorizontalScrollbar = true;

            _semSimSs1 = _semSimSs2 = null;
            _origSsLbl = ss1.Text;
            _semanticSimilarityModel = new WordNetSimilarityModel(_wordNetEngine);
        }
Beispiel #2
0
        public TestForm()
        {
            InitializeComponent();

            // create wordnet engine (use disk-based retrieval by default)
            string root = Directory.GetDirectoryRoot(".");
            //_wordNetEngine = new WordNetEngine(root + @"\dev\wordnetapi\resources\", false);
            _wordNetEngine = new WordNetEngine(@"..\..\..\..\resources", false);

            if (!_wordNetEngine.InMemory)
                test.Text += " (will take a while)";

            // populate POS list
            foreach (WordNetEngine.POS p in Enum.GetValues(typeof(WordNetEngine.POS)))
                if (p != WordNetEngine.POS.None)
                    pos.Items.Add(p);

            pos.SelectedIndex = 0;

            // allow scrolling of synset list
            synSets.HorizontalScrollbar = true;

            _semSimSs1 = _semSimSs2 = null;
            _origSsLbl = ss1.Text;
            _semanticSimilarityModel = new WordNetSimilarityModel(_wordNetEngine);
        }
Beispiel #3
0
 public IEnumerable<Term> FindTermsByWordnetPOS(WordNetEngine.POS pos)
 {
     var result = from term in terms
                  where term.PoS.FitsWordnetPOS(pos)
                  select term;
     return result;
 }
Beispiel #4
0
 public General()
 {
     // maintaining one WordNetEngine for all tests
     // this might be dumb but giving it a try in hopes of increased efficiency
     // another option might be to set inMemory to false
     _wordNetEngine = TestHelper.WordNetEngine;
     _semanticSimilarityModel = new WordNetSimilarityModel(_wordNetEngine);
 }
 public static PartsOfSpeech GetWnlibPOSFromWordNetEnginePOS(WordNetEngine.POS pos)
 {
     if (pos == WordNetEngine.POS.Adjective) return PartsOfSpeech.Adj;
     else if (pos == WordNetEngine.POS.Adverb) return PartsOfSpeech.Adv;
     else if (pos == WordNetEngine.POS.Noun) return PartsOfSpeech.Noun;
     else if (pos == WordNetEngine.POS.Verb) return PartsOfSpeech.Verb;
     else return PartsOfSpeech.Unknown;
 }
        public Classifier()
        {
            wordNetEngine = new WordNetEngine(@"..\resources", false);
            semanticSimilarityModel = new WordNetSimilarityModel(wordNetEngine);

            //CategoriesNew = new Dictionary<string, Pair<Dictionary<SynSet, int>, int>>();
            CategoriesNew = new List<Category>();
        }
Beispiel #7
0
        public void Init(string wordnetDir, bool inMemory, string modelDir)
        {
            wordnet = new WordNetEngine(wordnetDir, inMemory);
            nlp = new OpenNLPService(modelDir);
            bow = new BagOfWords();
            terms = new FlatRepository();

            executions = new List<IExecution>();
            executions.Add(new FirstExecution(wordnet, nlp));
        }
Beispiel #8
0
        private List<string> _words; // words must be ordered in order to use lexical relation indexes

        #endregion Fields

        #region Constructors

        /// <summary>
        /// Constructor. Creates the shell of a SynSet without any actual information. To gain access to SynSet words, gloss, 
        /// and related SynSets, call SynSet.Instantiate.
        /// </summary>
        /// <param name="pos">POS of SynSet</param>
        /// <param name="offset">Byte location of SynSet definition within data file</param>
        /// <param name="wordNetEngine">WordNet engine used to instantiate this synset. This should be non-null only when constructing
        /// synsets for disk-based WordNet engines.</param>
        internal SynSet(WordNetEngine.POS pos, int offset, WordNetEngine wordNetEngine)
        {
            _pos = pos;
            _offset = offset;
            _wordNetEngine = wordNetEngine;
            _instantiated = false;

            if (_wordNetEngine != null && _wordNetEngine.InMemory)
                throw new Exception("Don't need to pass a non-null WordNetEngine when using in-memory storage");

            // precompute the ID and hash code for efficiency
            _id = _pos + ":" + _offset;
            _hashCode = _id.GetHashCode();
        }
Beispiel #9
0
        // Constructor
        public MainPage()
        {
            InitializeComponent();

            Dictionary<string, Stream> dictFiles = new Dictionary<string, Stream>(dictFileNames.Length);
            foreach (string dictFile in dictFileNames)
            {
                Uri resUri = new Uri(string.Concat(resPrefix, dictFile), UriKind.Relative);
                dictFiles.Add(dictFile, Application.GetResourceStream(resUri).Stream);
            }

            wn = new WordNet.WordNetEngine(dictFiles);

            bgWorker = new BackgroundWorker();
            bgWorker.DoWork += new DoWorkEventHandler(QueryWNIAndPopulate);
            bgWorker.WorkerReportsProgress = true;
            bgWorker.RunWorkerCompleted += new RunWorkerCompletedEventHandler(OnListLoadComplete);
        }
Beispiel #10
0
        public static IEnumerable <Pun> GetPuns(
            PunCategory category,
            string theme,
            IReadOnlyCollection <SynSet> synSets,
            WordNetEngine wordNetEngine,
            PronunciationEngine pronunciationEngine,
            SpellingEngine spellingEngine,
            IReadOnlyList <PunStrategyFactory> strategies)
        {
            var sw = Stopwatch.StartNew();

#if Debug
            Console.WriteLine(@"Getting Puns");
#endif

            var resultCount = 0;

            var phrases = GetPhrases(category);

            var themeWords =
                synSets.SelectMany(
                    synSet => GetRelatedWords(theme, synSet, wordNetEngine)
                    .Select(x => x.Word)
                    )
                .Where(x => !x.Contains('_'))
                .Prepend(theme)
                .Distinct(StringComparer.OrdinalIgnoreCase)
                .Except(CommonWords.Value, StringComparer.OrdinalIgnoreCase)
                .Where(x => x.Length > 1)
                .Select(pronunciationEngine.GetPhoneticsWord)
                .Where(x => x is not null)
                .Cast <PhoneticsWord>()
                .Where(x => x.Syllables.Count > 1 || x.Syllables[0].Symbols.Count > 1)
                .Distinct(WordPronunciationComparer.Instance)
                .ToList();
#if Debug
            Console.WriteLine($@"Got Theme Words ({sw.Elapsed}");
#endif

            var cache = new Dictionary <PhoneticsWord, PunReplacement>();

            var punStrategies =
                strategies.Select(x => x.GetStrategy(spellingEngine, themeWords)).ToList();

        #if Debug
            Console.WriteLine($@"Built Strategies ({sw.Elapsed}");
#endif

            //TODO run in parallel
            foreach (var phrase in phrases)
            {
                var words = phrase
                            .Split(' ', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);

                var wordList         = new List <string>();
                var punWords         = new HashSet <string>();
                var containsOriginal = false;
                var containsPun      = false;

                foreach (var word in words)
                {
                    var bestReplacement = BestReplacement(
                        word,
                        pronunciationEngine,
                        cache,
                        punStrategies
                        );

                    if (bestReplacement != null)
                    {
                        var casing    = DetectCasing(word);
                        var newString = ToCase(bestReplacement.Value.ReplacementString, casing);
                        wordList.Add(newString);
                        containsOriginal |= bestReplacement.Value.IsAmalgam;
                        containsPun       = true;
                        punWords.Add(bestReplacement.Value.PunWord);
                    }
                    else
                    {
                        wordList.Add(word);
                        containsOriginal = true;
                    }
                }

                if (containsPun && (words.Length > 1 || containsOriginal))
                {
                    var pun = new Pun(wordList.ToDelimitedString(" "), phrase, punWords);

                #if Debug
                    if (resultCount == 0)
                    {
                        Console.WriteLine($@"{pun.NewPhrase} ({sw.Elapsed})");
                    }
                #endif

                    yield return(pun);

                    resultCount++;
                }
            }


        #if Debug
            Console.WriteLine($@"{resultCount} Puns Got ({sw.Elapsed})");
        /// <summary>
        /// Gets similarity of two strings using the most common synset for given string/pos pairs
        /// </summary>
        /// <param name="string1">First string</param>
        /// <param name="pos1">First POS</param>
        /// <param name="pos2">Second POS</param>
        /// <param name="string2">Second string</param>
        /// <param name="strategy">Similarity strategy to use</param>
        /// <param name="relations">Relations to use when computing similarity</param>
        /// <returns>Similarity</returns>
        public float GetSimilarity(string string1, WordNetEngine.POS pos1, string string2, WordNetEngine.POS pos2, Strategy strategy, params WordNetEngine.SynSetRelation[] relations)
        {
            float similarity = 0;

            if (strategy == Strategy.WuPalmer1994Average)
            {
                // get average similarity across all synsets
                int numScores = 0;
                foreach (SynSet synset1 in _wordNetEngine.GetSynSets(string1, pos1))
                    foreach (SynSet synset2 in _wordNetEngine.GetSynSets(string2, pos2))
                    {
                        similarity += GetSimilarity(synset1, synset2, strategy, relations);
                        ++numScores;
                    }

                if (numScores > 0)
                    similarity = similarity / (float)numScores;
            }
            else if (strategy == Strategy.WuPalmer1994Maximum)
            {
                // get maximum similarity across all synsets
                foreach (SynSet synset1 in _wordNetEngine.GetSynSets(string1, pos1))
                    foreach (SynSet synset2 in _wordNetEngine.GetSynSets(string2, pos2))
                    {
                        float currSim = GetSimilarity(synset1, synset2, strategy, relations);
                        if (currSim > similarity)
                            similarity = currSim;
                    }
            }
            else if (strategy == Strategy.WuPalmer1994Minimum)
            {
                // get minimum similarity across all synsets
                similarity = -1;
                foreach (SynSet synset1 in _wordNetEngine.GetSynSets(string1, pos1))
                    foreach (SynSet synset2 in _wordNetEngine.GetSynSets(string2, pos2))
                    {
                        float currSim = GetSimilarity(synset1, synset2, strategy, relations);
                        if (similarity == -1 || currSim < similarity)
                            similarity = currSim;
                    }

                // if we didn't find any synsets, similarity is zero
                if (similarity == -1)
                    similarity = 0;
            }
            else if (strategy == Strategy.WuPalmer1994MostCommon)
            {
                // use most common synsets
                SynSet synset1 = _wordNetEngine.GetMostCommonSynSet(string1, pos1);
                SynSet synset2 = _wordNetEngine.GetMostCommonSynSet(string2, pos2);

                if (synset1 != null && synset2 != null)
                    similarity = GetSimilarity(synset1, synset2, strategy, relations);
            }
            else
                throw new NotImplementedException("Unimplemented strategy:  " + strategy);

            if (similarity < 0 || similarity > 1)
                throw new Exception("Invalid similarity:  " + similarity);

            return similarity;
        }
Beispiel #12
0
 public WordNetRelation(WordNetEngine.SynSetRelation relation)
 {
     Relation = relation;
     Words = null;
 }
 public LookupIndexWordOperation(WordNetEngine engine)
 {
     mEngine = engine;
 }
Beispiel #14
0
 public FirstExecution(WordNetEngine wordnet, INLPService nlp)
     : base(wordnet, nlp)
 {
     bow = new BagOfWords();
 }
Beispiel #15
0
 public TokenizerOperation(WordNetEngine engine)
 {
     mEngine = engine;
 }
Beispiel #16
0
        /// <summary>
        /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from 
        /// idSynset; otherwise, related synsets are created as shells.
        /// </summary>
        /// <param name="definition">Definition line of synset from data file</param>
        /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param>
        internal void Instantiate(string definition, Dictionary<string, SynSet> idSynset)
        {
            // don't re-instantiate
            if (_instantiated)
                throw new Exception("Synset has already been instantiated");

            /* get lexicographer file name...the enumeration lines up precisely with the wordnet spec (see the lexnames file) except that
             * it starts with None, so we need to add 1 to the definition line's value to get the correct file name */
            int lexicographerFileNumber = int.Parse(GetField(definition, 1)) + 1;
            if (lexicographerFileNumber <= 0)
                throw new Exception("Invalid lexicographer file name number. Should be >= 1.");

            _lexicographerFileName = (WordNetEngine.LexicographerFileName)lexicographerFileNumber;

            // get number of words in the synset and the start character of the word list
            int wordStart;
            int numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber);
            wordStart = definition.IndexOf(' ', wordStart) + 1;

            // get words in synset
            _words = new List<string>(numWords);
            for (int i = 0; i < numWords; ++i)
            {
                int wordEnd = definition.IndexOf(' ', wordStart + 1) - 1;
                int wordLen = wordEnd - wordStart + 1;
                string word = definition.Substring(wordStart, wordLen);
                if (word.Contains(' '))
                    throw new Exception("Unexpected space in word:  " + word);

                _words.Add(word);

                // skip lex_id field
                wordStart = definition.IndexOf(' ', wordEnd + 2) + 1;
            }

            // get gloss
            _gloss = definition.Substring(definition.IndexOf('|') + 1).Trim();
            if (_gloss.Contains('|'))
                throw new Exception("Unexpected pipe in gloss");

            // get number and start of relations
            int relationCountField = 3 + (_words.Count * 2) + 1;
            int relationFieldStart;
            int numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart));
            relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1;

            // grab each related synset
            _relationSynSets = new Dictionary<WordNetEngine.SynSetRelation, Set<SynSet>>();
            _lexicalRelations = new Dictionary<WordNetEngine.SynSetRelation, Dictionary<SynSet, Dictionary<int, Set<int>>>>();
            for (int relationNum = 0; relationNum < numRelations; ++relationNum)
            {
                string relationSymbol = null;
                int relatedSynSetOffset = -1;
                WordNetEngine.POS relatedSynSetPOS = WordNetEngine.POS.None;
                int sourceWordIndex = -1;
                int targetWordIndex = -1;

                // each relation has four columns
                for (int relationField = 0; relationField <= 3; ++relationField)
                {
                    int fieldEnd = definition.IndexOf(' ', relationFieldStart + 1) - 1;
                    int fieldLen = fieldEnd - relationFieldStart + 1;
                    string fieldValue = definition.Substring(relationFieldStart, fieldLen);

                    // relation symbol
                    if (relationField == 0)
                        relationSymbol = fieldValue;
                    // related synset offset
                    else if (relationField == 1)
                        relatedSynSetOffset = int.Parse(fieldValue);
                    // related synset POS
                    else if (relationField == 2)
                        relatedSynSetPOS = GetPOS(fieldValue);
                    // source/target word for lexical relation
                    else if (relationField == 3)
                    {
                        sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber);
                        targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber);
                    }
                    else
                        throw new Exception();

                    relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1;
                }

                // get related synset...create shell if we don't have a lookup
                SynSet relatedSynSet;
                if (idSynset == null)
                    relatedSynSet = new SynSet(relatedSynSetPOS, relatedSynSetOffset, _wordNetEngine);
                // look up related synset directly
                else
                    relatedSynSet = idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset];

                // get relation
                WordNetEngine.SynSetRelation relation = WordNetEngine.GetSynSetRelation(_pos, relationSymbol);

                // add semantic relation if we have neither a source nor a target word index
                if (sourceWordIndex == 0 && targetWordIndex == 0)
                {
                    _relationSynSets.EnsureContainsKey(relation, typeof(Set<SynSet>));
                    _relationSynSets[relation].Add(relatedSynSet);
                }
                // add lexical relation
                else
                {
                    _lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary<SynSet, Dictionary<int, Set<int>>>));
                    _lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary<int, Set<int>>));
                    _lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(Set<int>));

                    if (!_lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex))
                        _lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex);
                }
            }

            // release the wordnet engine if we have one...don't need it anymore
            if (_wordNetEngine != null)
                _wordNetEngine = null;

            _instantiated = true;
        }
Beispiel #17
0
        public List <SynSet> Query(WordNetEngine _wn, string term)
        {
            try
            {
                HttpWebRequest request;
                ExternalSet    obj;
                int            SynsCount;
                string         InternalSynSetId;
                string         HypernymInfo;
                int            tmpDepth;
                List <SynSet>  Syns = new List <SynSet>();
                SynSet         tmpSyn;

                request        = WebRequest.Create("http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?QueryString=" + term) as HttpWebRequest;
                request.Method = "Get";
                request.Accept = "application/json";
                using (HttpWebResponse response = request.GetResponse() as HttpWebResponse)
                    using (StreamReader reader = new StreamReader(response.GetResponseStream()))
                        obj = JsonConvert.DeserializeObject <ExternalSet>(reader.ReadToEnd());

                SynsCount = obj.SynSets.Count;
                if (SynsCount > 0)
                {
                    for (int k = 0; k < SynsCount; k++)
                    {
                        if (MeasureOverlap(term, obj.SynSets[k].Synonym))
                        {
                            // If the Sense has classes, map directly to WordNet
                            if (obj.SynSets[k].Hypernyms.Count > 0)
                            {
                                tmpDepth         = 0;
                                InternalSynSetId = "";
                                foreach (ExternalHypernym hypernym in obj.SynSets[k].Hypernyms)
                                {
                                    // if the class RDF Identifier has URL, extract only the concept
                                    if (hypernym.URI.StartsWith("ht"))
                                    {
                                        hypernym.URI = hypernym.URI.Substring(hypernym.URI.LastIndexOf("/") + 1);
                                    }
                                    hypernym.URI = hypernym.URI.Trim().ToLower();

                                    if (SchemaMap.TryGetValue(hypernym.URI, out HypernymInfo))
                                    {
                                        if (tmpDepth < Convert.ToInt32(HypernymInfo[0]))
                                        {
                                            tmpDepth         = Convert.ToInt32(HypernymInfo[0]);
                                            InternalSynSetId = HypernymInfo.Substring(1);
                                        }
                                    }
                                }
                                if (InternalSynSetId != "")
                                {
                                    tmpSyn = _wn.GetSynSet("Noun:" + InternalSynSetId);
                                    if (obj.SynSets[k].Gloss != null)
                                    {
                                        tmpSyn.Gloss = rx.Replace(obj.SynSets[k].Gloss, string.Empty);
                                    }
                                    tmpSyn.URI = obj.SynSets[k].ID;
                                    Syns.Add(tmpSyn);
                                }
                            }
                            else
                            if (obj.SynSets[k].Synonym.Contains("("))
                            {
                                List <SynSet> tmpSyns = _wn.GetSynSets(obj.SynSets[k].Synonym.Split('(', ')')[1], "noun");
                                if (tmpSyns.Count > 0)
                                {
                                    tmpSyn = tmpSyns.First();
                                    if (obj.SynSets[k].Gloss != null)
                                    {
                                        tmpSyn.Gloss = rx.Replace(obj.SynSets[k].Gloss, string.Empty);
                                    }
                                    tmpSyn.URI = obj.SynSets[k].ID;
                                    Syns.Add(tmpSyn);
                                }
                            }
                            else
                            {
                                // Infer a class from categries
                            }
                        }
                    }
                }
                return(Syns);
            }
            catch (Exception ex)
            {
                return(null);
            }
        }
 public WordnetForm()
 {
     InitializeComponent();
     //  string root = Directory.GetDirectoryRoot(".");
     _wordNetEngine = new WordNetEngine(@"WordnetDicts\", true);
 }
Beispiel #19
0
 public Reword()
 {
     _wordNetEngine = new WordNetEngine(@"C:\Users\danie\Documents\Visual Studio 2015\Projects\Anti-Plagiarism\References\WordNetAPI-master\WordNetAPI-master\resources\", false);
 }
Beispiel #20
0
 public WordFixture()
 {
     SpellingEngine      = new SpellingEngine();
     WordNetEngine       = new WordNetEngine();
     PronunciationEngine = new PronunciationEngine();
 }
Beispiel #21
0
        static void Main(string[] args)
        {
            string         filepath = null;
            string         curPath  = Directory.GetCurrentDirectory();
            OpenFileDialog ofd      = new OpenFileDialog();

            ofd.Filter = "JSON|*.json";
            if (ofd.ShowDialog() == DialogResult.OK)
            {
                filepath = ofd.SafeFileName;
            }
            string         json         = File.ReadAllText(filepath);
            List <records> storeRecords = DeserializeJSON(json);

            WriteLine("Hello Lucene.Net");

            LuceneApplication LuceneApp = new LuceneApplication();

            //wrdnet wordne = new wrdnet();


            LuceneApp.CreateIndex(curPath);
            WriteLine(curPath);
            WriteLine("Adding Documents to Index");

            DateTime start = System.DateTime.Now;

            for (int x = 0; x < storeRecords.Count; x++)
            {
                WriteLine("Adding record no #{0}", x + 1);
                for (int y = 0; y < storeRecords[x].passages.Count; y++)
                {
                    string single_text = (storeRecords[x].passages[y].url + storeRecords[x].passages[y].passage_text);
                    //WriteLine("URL: {0}", storeRecords[x].passages[y].url.ToString());
                    //WriteLine("Passage Text: {0}", storeRecords[x].passages[y].passage_text.ToString());
                    // LuceneApp.IndexText(storeRecords[x].passages[y].url + storeRecords[x].passages[y].passage_text);

                    LuceneApp.IndexText(single_text);

                    //LuceneApp.IndexText(storeRecords[x].passages[y].passage_text);
                }
            }
            DateTime end = System.DateTime.Now;

            WriteLine("Total time for indexing >> {0}", end - start);

            WriteLine("All documents added.");

            // clean up
            LuceneApp.CleanUpIndexer();

            LuceneApp.CreateSearcher();
            // var directory = Directory.GetCurrentDirectory();
            var directory = @"C:\Users\Suprith Kangokar\Desktop\LuceneTest\LuceneTest\LuceneTest\bin\Debug\Wordnet";
            var wordNet   = new WordNetEngine();
            //  wordne.wordnet();
            string QUIT = "q";

            Write("Enter your query >>");
            string queryText = ReadLine();

            wordNet.LoadFromDirectory(directory);
            var synSetList = wordNet.GetSynSets(queryText);

            if (synSetList.Count == 0)
            {
                Console.WriteLine("No SynSet found");
            }
            string ex = "\t";


            foreach (var synSet in synSetList)
            {
                ex         = string.Join(", ", synSet.Words);
                queryText += ("\t" + ex);
            }
            //  LuceneApp.CreateParser();

            //string QUIT = "q";

            //Write("Enter your query >>");
            //string queryText = ReadLine();



            while (queryText != QUIT)
            {
                LuceneApp.DisplayResults(LuceneApp.SearchIndex(queryText));
                Write("Enter your query or press 'q' to exit >>");
                queryText = ReadLine();
            }



            //WriteLine("Press Enter to exit.");
            //ReadLine();
        }
Beispiel #22
0
 public WordNetManager()
 {
     wordNetEngine = new WordNetEngine(ReferringManager.Instance.WordNetDirectory, false);
 }
Beispiel #23
0
 public LookupExceptionsOperation(WordNetEngine engine)
 {
     mEngine = engine;
 }
Beispiel #24
0
        /// <summary>
        /// Gets the number of synsets related to the current one by the given relation
        /// </summary>
        /// <param name="relation">Relation to check</param>
        /// <returns>Number of synset related to the current one by the given relation</returns>
        public int GetRelatedSynSetCount(WordNetEngine.SynSetRelation relation)
        {
            if (!_relationSynSets.ContainsKey(relation))
                return 0;

            return _relationSynSets[relation].Count;
        }
Beispiel #25
0
 public bool FitsWordnetPOS(WordNetEngine.POS pos)
 {
     return ForWordnet() == pos;
 }
 public TokenizerOperation(WordNetEngine engine, string[] delimiters)
 {
     mEngine = engine;
     mDelimiters = delimiters;
 }
Beispiel #27
0
        public static Category FromString(WordNetEngine wordNetEngine, String text)
        {
            string categoryName;
            int wordCount;
            Dictionary<SynSet, int> synSetDictionary;

            //  split into lines
            var lines = Regex.Split(text, "\r\n|\r|\n");

            //  first line "name=.."
            var firstLineSplit = lines[0].Split(new char[] {'='}, StringSplitOptions.RemoveEmptyEntries);
            if (firstLineSplit.Length != 2)
            {
                return null;
            }
            if (firstLineSplit[0] != STRING_REPRESENTATION_FIRST_LINE_KEY)
            {
                return null;
            }
            categoryName = firstLineSplit[1];

            //  second line "wordcount=.."
            var secondLineSplit = lines[1].Split(new char[] {'='}, StringSplitOptions.RemoveEmptyEntries);
            if (secondLineSplit.Length != 2)
            {
                return null;
            }
            if (secondLineSplit[0] != STRING_REPRESENTATION_SECOND_LINE_KEY || !Int32.TryParse( secondLineSplit[1], out wordCount))
            {
                return null;
            }

            synSetDictionary = new Dictionary<SynSet,int>();

            //  rest of the lines - dictionary
            for(int i = 2 ; i < lines.Length ; ++i)
            {
                var lineSplit = lines[i].Split(new char[] {'|'}, StringSplitOptions.RemoveEmptyEntries);
                if (lineSplit.Length != 2)
                {
                    break;
                }

                //  synset word count
                int synSetWordCount;
                if (!Int32.TryParse(lineSplit[1], out synSetWordCount))
                {
                    return null;
                }

                //  synset
                var synSet = wordNetEngine.GetSynSet(lineSplit[0]);
                if (synSet == null)
                {
                    return null;
                }

                synSetDictionary.Add(synSet, synSetWordCount);
            }

            Utility.Log("loaded " + categoryName + " (" + wordCount + ") with " + synSetDictionary.Count + " synsets");

            return new Category(categoryName, wordCount, synSetDictionary);
        }
Beispiel #28
0
 public TokenizerOperation(WordNetEngine engine, string[] delimiters)
 {
     mEngine     = engine;
     mDelimiters = delimiters;
 }
Beispiel #29
0
        private static void LoadWordnet(WordNetEngine wordNet)
        {
            var directory = Path.Combine(Directory.GetCurrentDirectory(), "wordnet");

            wordNet.LoadFromDirectory(directory);
        }
Beispiel #30
0
 public WordNetRelation(WordNetEngine.SynSetRelation relation, List<string> words)
 {
     Relation = relation;
     Words = words;
 }
Beispiel #31
0
 public ExecutionBase(WordNetEngine wordnet, INLPService nlp)
 {
     this.wordnet = wordnet;
     this.nlp     = nlp;
 }
Beispiel #32
0
 static TestHelper()
 {
     WordNetEngine = new WordNetEngine(@"WordNet", true);
 }
Beispiel #33
0
 public ExecutionBase(WordNetEngine wordnet, INLPService nlp)
 {
     this.wordnet = wordnet;
     this.nlp = nlp;
 }
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="wordNetEngine">WordNet engine to use</param>
 public WordNetSimilarityModel(WordNetEngine wordNetEngine)
 {
     _wordNetEngine = wordNetEngine;
 }
Beispiel #35
0
 public Reword()
 {
     _wordNetEngine = new WordNetEngine(@"C:\Users\Daniel\Google Drive\Programming\C#\Anti-Plagiarism\References\WordNetAPI-master\WordNetAPI-master\resources\", false);
 }
Beispiel #36
0
        public string ExpandWeightedQuery(string level, string query)
        {
            string        expandedquery           = "";
            WordNetEngine wordnet                 = new WordNetEngine();
            var           directory               = System.IO.Directory.GetCurrentDirectory();
            Dictionary <string, string> thesaurus = new Dictionary <string, string>();
            string path = directory + "\\wordnet\\";

            wordnet.LoadFromDirectory(path);
            if (wordnet.IsLoaded)
            {
                char[]   delimiter = { ' ', ';' };
                string[] querylist = query.Split(delimiter, StringSplitOptions.RemoveEmptyEntries);
                //separate the text inputed into several parts;
                foreach (string item in querylist)
                {
                    var synSetList = wordnet.GetSynSets(item); //create a set of synonyms for the item

                    if (synSetList.Count != 0)                 //if there is synonym, it continues to do the next things
                    {
                        Dictionary <string, int> uniqueword = new Dictionary <string, int>();

                        foreach (SynSet syns in synSetList)
                        {
                            //syns.Words is a list not a string
                            foreach (string w in syns.Words)
                            {
                                if (uniqueword.ContainsKey(w))
                                {
                                    uniqueword[w] = uniqueword[w] + 1;
                                }
                                else
                                {
                                    uniqueword.Add(w, 1);
                                }
                            }

                            //if a user want to expand the query to a certian lexical level, such as hypernym...
                            //if the level is not synonym,it means the uniqueword will be larger
                            if (level != "Synonym")
                            {
                                SynSetRelation relation       = (SynSetRelation)Enum.Parse(typeof(SynSetRelation), level);
                                var            relationsynset = syns.GetRelatedSynSets(relation, true);

                                foreach (SynSet element in relationsynset)
                                {
                                    foreach (string ite in element.Words)
                                    {
                                        if (uniqueword.ContainsKey(ite))
                                        {
                                            uniqueword[ite] = uniqueword[ite] + 1;
                                        }
                                        else
                                        {
                                            uniqueword.Add(ite, 1);
                                        }
                                    }
                                }
                            }
                        }//finish exploring all synonyms for a specific item, so can add them into the dictionary
                        string lexical = "";
                        foreach (string w in uniqueword.Keys)
                        {
                            if (w != item)
                            {
                                lexical = lexical + " " + w;
                            }
                        }

                        thesaurus.Add(item, lexical);
                    } //this condition is there are synonyms
                }     //end the loop for each item( item is actualy a query)

                foreach (string term in thesaurus.Keys)
                {
                    expandedquery = expandedquery + " " + term + "^5" + thesaurus[term];
                }
            }//this condition is that wordnet engine is loaded, if you change the database directory, it can't work.
            return(expandedquery);
        }
Beispiel #37
0
 public WordFixture()
 {
     WordNetEngine       = new WordNetEngine();
     PronunciationEngine = new PronunciationEngine();
 }
Beispiel #38
0
        public void button1_Click(object sender, EventArgs e)
        {
            querydiaplay.Clear();
            if (searchBox1.Text == "")
            {
                MessageBox.Show("Please Enter Your Query");
            }

            else
            {
                if (PhrasecheckBox1.Checked)
                {
                    input = ("\"" + searchBox1.Text + "\"");
                    querydiaplay.AppendText("Searching for\n");
                    querydiaplay.AppendText(input);
                }
                if (preprocessing.Checked)//if user choose preprocessing, application will tokenization, remove stop word and extract stemming
                {
                    input = lucene.preprocessing(searchBox1.Text);
                    querydiaplay.AppendText("Searching for\n");
                    querydiaplay.AppendText(input);
                }

                if (synonym.Checked)
                {
                    var directory = System.IO.Directory.GetCurrentDirectory(); // Set WordNet directory
                    wordNet = new WordNetEngine();                             // Initiate WordNet object
                    wordNet.LoadFromDirectory(directory);
                    MessageBox.Show("loading wornet successful");
                    string input_text = searchBox1.Text;
                    int    letterindex;

                    /*
                     * string temp_s = "";
                     * bool quotesOpened = false;
                     * for (letterindex = 0; letterindex < input_text.Length; letterindex++)
                     * {
                     *  if (input_text[letterindex] == '"')
                     *  {
                     *      quotesOpened = !quotesOpened;
                     *  }
                     *  else
                     *  {
                     *      if (!quotesOpened && input_text[letterindex] == '"')
                     *          temp_s = temp_s + input_text[letterindex];
                     *  }
                     *
                     * }
                     */
                    string[] separators  = { ",", ".", "!", "?", ";", ":", "-", " ", "\n", "\"", "'" };
                    string[] input_array = input_text.Split(separators, StringSplitOptions.RemoveEmptyEntries);
                    for (int i = 0; i < input_array.Length; i++)
                    {
                        weighted_text += input_array[i] + "^5";
                    }


                    input = weighted_text + Expansion_query(input_array);
                    querydiaplay.AppendText("Searching for\n");
                    querydiaplay.AppendText(input);
                }
                if (!preprocessing.Checked && !synonym.Checked && !PhrasecheckBox1.Checked)
                {
                    input = searchBox1.Text;
                    querydiaplay.AppendText("Searching for\n");
                    querydiaplay.AppendText(input);
                }



                stopwatch.Restart();
                lucene.CreatParser();
                results = lucene.Searching(input);

                if (results.TotalHits % 10 == 0)
                {
                    totalpage = results.TotalHits / 10;
                }
                else
                {
                    totalpage = results.TotalHits / 10 + 1;
                }
                searchBypage(results, currentpage, pagesize);
                stopwatch.Stop();
                label3.Text   = (currentpage + " / " + totalpage);
                textBox1.Text = results.TotalHits.ToString();
                textBox2.Text = stopwatch.Elapsed.ToString();
            }
        }
 public WordnetForm()
 {
     InitializeComponent();
     //  string root = Directory.GetDirectoryRoot(".");
     _wordNetEngine = new WordNetEngine(@"WordnetDicts\", true);
 }
 public LookupIndexWordOperation(WordNetEngine engine)
 {
     mEngine = engine;
 }
 public LookupExceptionsOperation(WordNetEngine engine)
 {
     mEngine = engine;
 }
Beispiel #42
0
 /// <summary>
 /// Gets synsets related to the current synset
 /// </summary>
 /// <param name="relation">Synset relation to follow</param>
 /// <param name="recursive">Whether or not to follow the relation recursively for all related synsets</param>
 /// <returns>Synsets related to the given one by the given relation</returns>
 public Set<SynSet> GetRelatedSynSets(WordNetEngine.SynSetRelation relation, bool recursive)
 {
     return GetRelatedSynSets(new WordNetEngine.SynSetRelation[] { relation }, recursive);
 }
Beispiel #43
0
        // Constructors and finalizers:
        private Repository()
        {
            _assemblyName = Regex.Match(_assemblyFullName, "^(.*?),.*$").Result("$1");

            _rootDrive = ("/usr/project/xtmp/dp195/Poetix18/").Replace(@"\", Dsc);
            _nlpFolder = ("rhetorica/nlp/").Replace(@"\", Dsc);

            _openNlpModelsFolder = ("OpenNLP/models/").Replace(@"\", Dsc);
            _openNlpModelsPath   = RootDrive + _nlpFolder + _openNlpModelsFolder;

            _wordNetFolder = ("WordNet_3/").Replace(@"\", Dsc);
            _wordNetPath   = RootDrive + _nlpFolder + _wordNetFolder;

            _grammarFolder = ("StanfordParser/grammar/").Replace(@"\", Dsc);
            _grammarPath   = RootDrive + _nlpFolder + _grammarFolder;

            _dataFolder   = ("data/").Replace(@"\", Dsc);
            _nlpTextsPath = RootDrive + _dataFolder;

            string[] localTextDirectoryParts =
            {
                CurrentAssemblyDirectoryPath,
                "..",                        "..","..", "data"
                //"..", "..", "text"
            };
            _localTextPath = Path.Combine(localTextDirectoryParts) + "/"; // For development use

            // WordNet engine:
            Console.Write("Loading WordNet engine.... ");
            _wordNetEngine = new WordNetEngine(WordNetPath, true);
            Console.WriteLine("Done.");

            // OpenNLP sentence detector:
            Console.Write("Loading OpenNLP sentence detector.... ");
            java.io.FileInputStream modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-sent.bin");
            _sentenceModel = new SentenceModel(modelInputStream);
            modelInputStream.close();
            _sentenceDetector = new SentenceDetectorME(_sentenceModel);
            Console.WriteLine("Done.");

            // OpenNLP tokenizer:
            Console.Write("Loading OpenNLP tokenizer.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-token.bin");
            _tokenizerModel  = new opennlp.tools.tokenize.TokenizerModel(modelInputStream);
            modelInputStream.close();
            _tokenizer = new opennlp.tools.tokenize.TokenizerME(_tokenizerModel);
            Console.WriteLine("Done.");

            // OpenNLP name finder:
            Console.Write("Loading OpenNLP name finder.... ");
            modelInputStream      = new java.io.FileInputStream(OpenNlpModelsPath + "en-ner-person.bin");
            _tokenNameFinderModel = new TokenNameFinderModel(modelInputStream);
            modelInputStream.close();
            _nameFinder = new NameFinderME(_tokenNameFinderModel);
            Console.WriteLine("Done.");

            // OpenNLP POS tagger:
            Console.Write("Loading OpenNLP POS tagger.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-pos-maxent.bin");
            _posModel        = new POSModel(modelInputStream);
            modelInputStream.close();
            _tagger = new POSTaggerME(_posModel);
            Console.WriteLine("Done.");

            // OpenNLP chunker:
            Console.Write("Loading OpenNLP chunker.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-chunker.bin");
            _chunkerModel    = new ChunkerModel(modelInputStream);
            modelInputStream.close();
            _chunker = new ChunkerME(_chunkerModel);
            Console.WriteLine("Done.");

            // OpenNLP parser:
            if (_loadParser)
            {
                Console.Write("Loading OpenNLP parser.... ");
                modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-parser-chunking.bin");
                _parserModel     = new ParserModel(modelInputStream);
                modelInputStream.close();
                _parser = ParserFactory.create(_parserModel);
                Console.WriteLine("Done.");
            }

            // Stanford parser:
            //_stanfordParser = new LexicalizedParser(GrammarPath + "englishPCFG.ser.gz"); // Obsolete method
            _stanfordParser = LexicalizedParser.loadModel(GrammarPath + "englishPCFG.ser.gz");

            // Porter stemmer:
            _porterStemmer = new PorterStemmer();
        }
        /// <summary>
        /// Outputs the Hypernyms (words that the original word is a meaning of)
        /// </summary>
        private void OutputHypernyms(IEnumerable<string> words, WordNetEngine.POS pos)
        {
            StringBuilder output = new StringBuilder();
            int limitCounter = 0;
            foreach (string word in words)
            {
                limitCounter++;

                //get the synsets
                var synSets = _wordNetEngine.GetSynSets(word, pos);

                //retrieve hypernymSynsets
                output.Append(word + "[");
                foreach (SynSet directSynset in synSets)
                {
                    var hypernymSynSets =
                       directSynset.GetRelatedSynSets(WordNetEngine.SynSetRelation.Hypernym, false);

                    foreach (SynSet hypernymSynSet in hypernymSynSets)
                    {

                        //have the synset, find all the words in that synset
                        IEnumerable<string> outputList =
                            hypernymSynSet.Words.Where(synonym => synonym != word);

                        if (outputList.Count() > 0)
                        {
                            output.Append("(");

                            int count = 0;
                            foreach (string synonym in outputList)
                            {
                                count++;
                                output.Append(synonym);

                                //add comma not to the last one
                                if (count != outputList.Count())
                                    output.Append(",");
                            }
                            output.Append(")");
                        }
                    }
                }

                output.Append("]" + Environment.NewLine);

                if (limitCounter > nupdLimit.Value)
                    break;
            }
            this.txtHypernyms.Text += output.ToString();
        }
 public TokenizerOperation(WordNetEngine engine)
 {
     mEngine = engine;
 }
        /// <summary>
        /// Outputs synonyms of the specified words
        /// </summary>
        private void OutputSynonyms(IEnumerable<string> words, WordNetEngine.POS pos)
        {
            StringBuilder synonymOutput = new StringBuilder();
            int limitCounter = 0;
            foreach (string word in words)
            {
                limitCounter++;

                //Words are synonyms if they share the same Synsets
                //So find synsets and then find the words connected to these synsets.
                var synSets = _wordNetEngine.GetSynSets(word, pos);

                synonymOutput.Append(word + "[");
                foreach (SynSet synSet in synSets)
                {
                    //have the synset, find all the words in that synset
                    IEnumerable<string> outputList = synSet.Words
                        .Where(synonym => synonym != word);
                    //add to dictionary

                    if (outputList.Count() > 0)
                    {
                        synonymOutput.Append("(");

                        int count = 0;
                        foreach (string synonym in outputList)
                        {
                            count++;
                            synonymOutput.Append(synonym);

                            //add comma not to the last one
                            if (count != outputList.Count())
                                synonymOutput.Append(",");
                        }
                        synonymOutput.Append(")");
                    }
                }

                synonymOutput.Append("]" + Environment.NewLine);

                if (limitCounter > nupdLimit.Value)
                    break;
            }
            this.txtSynonyms.Text += synonymOutput.ToString();
        }
Beispiel #47
0
 public FirstExecution(WordNetEngine wordnet, INLPService nlp)
     : base(wordnet, nlp)
 {
     bow = new BagOfWords();
 }