/// <summary>
        ///     Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from
        ///     idSynset; otherwise, related synsets are created as shells.
        /// </summary>
        /// <param name="definition">Definition line of synset from data file</param>
        /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param>
        internal void Instantiate(string definition, Dictionary <string, SynSet> idSynset)
        {
            // don't re-instantiate
            if (Instantiated)
            {
                throw new Exception("Synset has already been instantiated");
            }

            /* get lexicographer file name...the enumeration lines up precisely with the wordnet spec (see the lexnames file) except that
             * it starts with None, so we need to add 1 to the definition line's value to get the correct file name */
            int lexicographerFileNumber = int.Parse(GetField(definition, 1)) + 1;

            if (lexicographerFileNumber <= 0)
            {
                throw new Exception("Invalid lexicographer file name number. Should be >= 1.");
            }

            LexicographerFileName = (LexicographerFileName)lexicographerFileNumber;

            // get number of words in the synset and the start character of the word list
            int numWords = int.Parse(GetField(definition, 3, out int wordStart), NumberStyles.HexNumber);

            wordStart = definition.IndexOf(' ', wordStart) + 1;

            // get words in synset
            Words = new List <string>(numWords);
            for (int i = 0; i < numWords; ++i)
            {
                int    wordEnd = definition.IndexOf(' ', wordStart + 1) - 1;
                int    wordLen = wordEnd - wordStart + 1;
                string word    = definition.Substring(wordStart, wordLen);
                if (word.Contains(' '))
                {
                    throw new Exception("Unexpected space in word:  " + word);
                }

                Words.Add(word);

                // skip lex_id field
                wordStart = definition.IndexOf(' ', wordEnd + 2) + 1;
            }

            // get gloss
            Gloss = definition.Substring(definition.IndexOf('|') + 1).Trim();
            if (Gloss.Contains('|'))
            {
                throw new Exception("Unexpected pipe in gloss");
            }

            // get number and start of relations
            int relationCountField = 3 + Words.Count * 2 + 1;
            int numRelations       = int.Parse(GetField(definition, relationCountField, out int relationFieldStart));

            relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1;

            // grab each related synset
            relationSynSets  = new Dictionary <SynSetRelation, List <SynSet> >();
            lexicalRelations =
                new Dictionary <SynSetRelation, Dictionary <SynSet, Dictionary <int, List <int> > > >();

            for (int relationNum = 0; relationNum < numRelations; ++relationNum)
            {
                string   relationSymbol      = null;
                int      relatedSynSetOffset = -1;
                WordType relatedSynSetPOS    = WordType.Unknown;
                int      sourceWordIndex     = -1;
                int      targetWordIndex     = -1;

                // each relation has four columns
                for (int relationField = 0; relationField <= 3; ++relationField)
                {
                    int    fieldEnd   = definition.IndexOf(' ', relationFieldStart + 1) - 1;
                    int    fieldLen   = fieldEnd - relationFieldStart + 1;
                    string fieldValue = definition.Substring(relationFieldStart, fieldLen);

                    // relation symbol
                    if (relationField == 0)
                    {
                        relationSymbol = fieldValue;
                    }

                    // related synset offset
                    else if (relationField == 1)
                    {
                        relatedSynSetOffset = int.Parse(fieldValue);
                    }

                    // related synset POS
                    else if (relationField == 2)
                    {
                        relatedSynSetPOS = GetPOS(fieldValue);
                    }

                    // source/target word for lexical relation
                    else if (relationField == 3)
                    {
                        sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber);
                        targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber);
                    }
                    else
                    {
                        throw new Exception();
                    }

                    relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1;
                }

                // get related synset...create shell if we don't have a lookup
                SynSet relatedSynSet;
                if (idSynset == null)
                {
                    relatedSynSet = new SynSet(relatedSynSetPOS, relatedSynSetOffset);
                }

                // look up related synset directly
                else
                {
                    relatedSynSet = idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset];
                }

                // get relation
                SynSetRelation relation = WordNetEngine.GetSynSetRelation(POS, relationSymbol);

                // add semantic relation if we have neither a source nor a target word index
                if (sourceWordIndex == 0 &&
                    targetWordIndex == 0)
                {
                    var list = relationSynSets.GetItemCreate(relation);
                    list.Add(relatedSynSet);
                }

                // add lexical relation
                else
                {
                    var itemRelation        = lexicalRelations.GetItemCreate(relation);
                    var itemRelatedSynSet   = itemRelation.GetItemCreate(relatedSynSet);
                    var itemSourceWordIndex = itemRelatedSynSet.GetItemCreate(sourceWordIndex);

                    if (!itemSourceWordIndex.Contains(targetWordIndex))
                    {
                        itemSourceWordIndex.Add(targetWordIndex);
                    }
                }
            }

            Instantiated = true;
        }
        private void Load()
        {
            if (!Directory.Exists(WordNetDirectory))
            {
                throw new DirectoryNotFoundException("Non-existent WordNet directory:  " + WordNetDirectory);
            }

            // get data and index paths
            string[] dataPaths =
            {
                Path.Combine(WordNetDirectory, "data.adj"),
                Path.Combine(WordNetDirectory, "data.adv"),
                Path.Combine(WordNetDirectory, "data.noun"),
                Path.Combine(WordNetDirectory, "data.verb")
            };

            string[] indexPaths = new[]
            {
                Path.Combine(WordNetDirectory, "index.adj"),
                Path.Combine(WordNetDirectory, "index.adv"),
                Path.Combine(WordNetDirectory, "index.noun"),
                Path.Combine(WordNetDirectory, "index.verb")
            };

            // make sure all files exist
            foreach (string path in dataPaths.Union(indexPaths))
            {
                if (!File.Exists(path))
                {
                    throw new FileNotFoundException("Failed to find WordNet file:  " + path);
                }
            }

            string sortFlagPath = Path.Combine(WordNetDirectory, ".sorted_for_dot_net");

            if (!File.Exists(sortFlagPath))
            {
                /* make sure the index files are sorted according to the current sort order. the index files in the
                 * wordnet distribution are sorted in the order needed for (presumably) the java api, which uses
                 * a different sort order than the .net runtime. thus, unless we resort the lines in the index
                 * files, we won't be able to do a proper binary search over the data. */
                foreach (string indexPath in indexPaths)
                {
                    // create temporary file for sorted lines
                    string tempPath = Path.GetTempFileName();
                    using (StreamWriter tempFile = new StreamWriter(tempPath))
                    {
                        // get number of words (lines) in file
                        int numWords = 0;
                        using (TextReader indexFile = new StreamReader(indexPath))
                        {
                            string line;
                            while ((line = indexFile.ReadLine()) != null)
                            {
                                if (!line.StartsWith(" "))
                                {
                                    ++numWords;
                                }
                            }
                        }

                        // get lines in file, sorted by first column (i.e., the word)
                        Dictionary <string, string> wordLine = new Dictionary <string, string>(numWords, StringComparer.OrdinalIgnoreCase);
                        using (StreamReader indexFile = new StreamReader(indexPath))
                        {
                            string line;
                            while ((line = indexFile.ReadLine()) != null)
                            {
                                // write header lines to temp file immediately
                                if (line.StartsWith(" "))
                                {
                                    tempFile.WriteLine(line);
                                }
                                else
                                {
                                    // trim useless blank spaces from line and map line to first column
                                    line = line.Trim();
                                    wordLine.Add(line.Substring(0, line.IndexOf(' ')), line);
                                }
                            }
                        }

                        // get sorted words
                        List <string> sortedWords = new List <string>(wordLine.Count);
                        sortedWords.AddRange(wordLine.Keys);
                        sortedWords.Sort();

                        // write lines sorted by word
                        foreach (string word in sortedWords)
                        {
                            tempFile.WriteLine(wordLine[word]);
                        }

                        tempFile.Close();
                    }

                    // replace original index file with properly sorted one
                    File.Delete(indexPath);
                    File.Move(tempPath, indexPath);
                }

                // create flag file, indicating that we've sorted the data
                using (StreamWriter sortFlagFile = new StreamWriter(sortFlagPath))
                {
                    sortFlagFile.WriteLine(
                        "This file serves no purpose other than to indicate that the WordNet distribution data in the current directory has been sorted for use by the .NET API.");
                    sortFlagFile.Close();
                }
            }


            // pass 1:  get total number of synsets
            int totalSynsets = 0;

            foreach (string dataPath in dataPaths)
            {
                // scan synset data file for lines that don't start with a space...these are synset definition lines
                using (StreamReader dataFile = new StreamReader(dataPath))
                {
                    string line;
                    while ((line = dataFile.ReadLine()) != null)
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            ++totalSynsets;
                        }
                    }
                }
            }

            // pass 2:  create synset shells (pos and offset only)
            idSynset = new Dictionary <string, SynSet>(totalSynsets, StringComparer.OrdinalIgnoreCase);
            foreach (string dataPath in dataPaths)
            {
                WordType pos = GetFilePOS(dataPath);

                // scan synset data file
                using (StreamReader dataFile = new StreamReader(dataPath))
                {
                    string line;
                    while ((line = dataFile.ReadLine()) != null)
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            // get offset and create synset shell
                            int    offset = int.Parse(line.Substring(0, firstSpace));
                            SynSet synset = new SynSet(pos, offset);

                            idSynset.Add(synset.ID, synset);
                        }
                    }
                }
            }

            // pass 3:  instantiate synsets (hooks up relations, set glosses, etc.)
            foreach (string dataPath in dataPaths)
            {
                WordType pos = GetFilePOS(dataPath);

                // scan synset data file
                using (StreamReader dataFile = new StreamReader(dataPath))
                {
                    string line;
                    while ((line = dataFile.ReadLine()) != null)
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            // instantiate synset defined on current line, using the instantiated synsets for all references
                            idSynset[pos + ":" + int.Parse(line.Substring(0, firstSpace))].Instantiate(line, idSynset);
                        }
                    }
                }
            }

            // organize synsets by pos and words...also set most common synset for word-pos pairs that have multiple synsets
            posWordSynSets = new Dictionary <WordType, Dictionary <string, List <SynSet> > >();
            foreach (string indexPath in indexPaths)
            {
                WordType pos = GetFilePOS(indexPath);

                posWordSynSets.GetItemCreate(pos);

                // scan word index file, skipping header lines
                using (StreamReader indexFile = new StreamReader(indexPath))
                {
                    string line;
                    while ((line = indexFile.ReadLine()) != null)
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            // grab word and synset shells, along with the most common synset
                            string        word    = line.Substring(0, firstSpace);
                            List <SynSet> synsets = GetSynSetShells(line, pos, out SynSet mostCommonSynSet);

                            // set flag on most common synset if it's ambiguous
                            if (synsets.Count > 1)
                            {
                                idSynset[mostCommonSynSet.ID].SetAsMostCommonSynsetFor(word);
                            }
                            // use reference to the synsets that we instantiated in our three-pass routine above
                            posWordSynSets[pos].Add(word, new List <SynSet>(synsets.Count));
                            foreach (SynSet synset in synsets)
                            {
                                posWordSynSets[pos][word].Add(idSynset[synset.ID]);
                            }
                        }
                    }
                }
            }
        }
        /// <summary>
        ///     Gets the shortest path from the current synset to another, following the given synset relations.
        /// </summary>
        /// <param name="destination">Destination synset</param>
        /// <param name="relations">Relations to follow, or null for all relations.</param>
        /// <returns>Synset path, or null if none exists.</returns>
        public List <SynSet> GetShortestPathTo(SynSet destination, IEnumerable <SynSetRelation> relations)
        {
            if (relations == null)
            {
                relations = Enum.GetValues(typeof(SynSetRelation)) as SynSetRelation[];
            }

            // make sure the backpointer on the current synset is null - can't predict what other functions might do
            SearchBackPointer = null;

            // avoid cycles
            List <SynSet> synsetsEncountered = new List <SynSet>();

            synsetsEncountered.Add(this);

            // start search queue
            Queue <SynSet> searchQueue = new Queue <SynSet>();

            searchQueue.Enqueue(this);

            // run search
            List <SynSet> path = null;

            while (searchQueue.Count > 0 &&
                   path == null)
            {
                SynSet currSynSet = searchQueue.Dequeue();

                // see if we've finished the search
                if (currSynSet == destination)
                {
                    // gather synsets along path
                    path = new List <SynSet>();
                    while (currSynSet != null)
                    {
                        path.Add(currSynSet);
                        currSynSet = currSynSet.SearchBackPointer;
                    }

                    // reverse for the correct order
                    path.Reverse();
                }

                // expand the search one level
                else
                {
                    foreach (SynSet synset in currSynSet.GetRelatedSynSets(relations, false))
                    {
                        if (!synsetsEncountered.Contains(synset))
                        {
                            synset.SearchBackPointer = currSynSet;
                            searchQueue.Enqueue(synset);

                            synsetsEncountered.Add(synset);
                        }
                    }
                }
            }

            // null-out all search backpointers
            foreach (SynSet synset in synsetsEncountered)
            {
                synset.SearchBackPointer = null;
            }

            return(path);
        }
        /// <summary>
        /// Gets synset shells from a word index line. A synset shell is an instance of SynSet with only the POS and Offset
        /// members initialized. These members are enough to look up the full synset within the corresponding data file. This
        /// method is static to prevent inadvertent references to a current WordNetEngine, which should be passed via the
        /// corresponding parameter.
        /// </summary>
        /// <param name="wordIndexLine">Word index line from which to get synset shells</param>
        /// <param name="pos">POS of the given index line</param>
        /// <param name="mostCommonSynSet">Returns the most common synset for the word</param>
        /// <returns>Synset shells for the given index line</returns>
        private static List <SynSet> GetSynSetShells(string wordIndexLine, WordType pos, out SynSet mostCommonSynSet)
        {
            List <SynSet> synsets = new List <SynSet>();

            mostCommonSynSet = null;

            // get number of synsets
            string[] parts      = wordIndexLine.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            int      numSynSets = int.Parse(parts[2]);

            // grab each synset shell, from last to first
            int firstOffsetIndex = parts.Length - numSynSets;

            for (int i = parts.Length - 1; i >= firstOffsetIndex; --i)
            {
                // create synset
                int offset = int.Parse(parts[i]);

                // add synset to collection
                SynSet synset = new SynSet(pos, offset);
                synsets.Add(synset);

                // if this is the last synset offset to get (since we grabbed them in reverse order), record it as the most common synset
                if (i == firstOffsetIndex)
                {
                    mostCommonSynSet = synset;
                }
            }

            if (mostCommonSynSet == null)
            {
                throw new Exception("Failed to get most common synset");
            }
            return(synsets);
        }