private Dictionary <int, float> GetPerClassWeights(StreamReader trainingInstancesReader)
        {
            Dictionary <int, int> classCount = new Dictionary <int, int>();
            string line;

            while (trainingInstancesReader.TryReadLine(out line))
            {
                int firstSpace = line.IndexOf(' ');
                if (firstSpace == -1)
                {
                    firstSpace = line.Length;
                }

                int classNum = int.Parse(line.Substring(0, firstSpace));
                classCount.EnsureContainsKey(classNum, typeof(int));
                classCount[classNum]++;
            }

            Dictionary <int, float> classWeight = new Dictionary <int, float>();
            int total = classCount.Values.Sum();

            foreach (int classNum in classCount.Keys)
            {
                if (_libLinear.GetUnmappedLabel(classNum.ToString()) != PointPrediction.NullLabel)
                {
                    classWeight.Add(classNum, (total - classCount[classNum]) / (float)classCount[classNum]);
                }
            }

            return(classWeight);
        }
Exemple #2
0
        public static Dictionary <long, Dictionary <string, int> > GetSliceLocationTrueCount(IEnumerable <Incident> incidents, Prediction prediction)
        {
            Dictionary <long, Dictionary <string, int> > sliceLocationTrueCount = new Dictionary <long, Dictionary <string, int> >();

            DiscreteChoiceModel model = prediction.Model;
            long sliceTicks           = -1;

            if (model is TimeSliceDCM)
            {
                sliceTicks = (model as TimeSliceDCM).TimeSliceTicks;
            }

            foreach (Incident incident in incidents)
            {
                long slice = 1;
                if (sliceTicks > 0)
                {
                    slice = incident.Time.Ticks / sliceTicks;
                }

                int    row      = (int)((incident.Location.Y - prediction.PredictionArea.BoundingBox.MinY) / prediction.PredictionPointSpacing);
                int    col      = (int)((incident.Location.X - prediction.PredictionArea.BoundingBox.MinX) / prediction.PredictionPointSpacing);
                string location = row + "-" + col;

                sliceLocationTrueCount.EnsureContainsKey(slice, typeof(Dictionary <string, int>));
                sliceLocationTrueCount[slice].EnsureContainsKey(location, typeof(int));
                sliceLocationTrueCount[slice][location]++;
            }

            return(sliceLocationTrueCount);
        }
Exemple #3
0
        /// <summary>
        /// Gets lexically related words for the current synset. Many of the relations in WordNet are lexical instead of semantic. Whereas
        /// the latter indicate relations between entire synsets (e.g., hypernym), the former indicate relations between specific
        /// words in synsets. This method retrieves all lexical relations and the words related thereby.
        /// </summary>
        /// <returns>Mapping from relations to mappings from words in the current synset to related words in the related synsets</returns>
        public Dictionary <SynSetRelation, Dictionary <string, List <string> > > GetLexicallyRelatedWords()
        {
            var relatedWords = new Dictionary <SynSetRelation, Dictionary <string, List <string> > >();

            foreach (var relation in lexicalRelations.Keys)
            {
                relatedWords.EnsureContainsKey(relation, typeof(Dictionary <string, List <string> >));

                foreach (var relatedSynSet in lexicalRelations[relation].Keys)
                {
                    // make sure related synset is initialized
                    if (!relatedSynSet.Instantiated)
                    {
                        relatedSynSet.Instantiate(wordNet.Provider);
                    }

                    foreach (var sourceWordIndex in lexicalRelations[relation][relatedSynSet].Keys)
                    {
                        var sourceWord = Words[sourceWordIndex - 1];

                        relatedWords[relation].EnsureContainsKey(sourceWord, typeof(List <string>), false);

                        foreach (var targetWordIndex in lexicalRelations[relation][relatedSynSet][sourceWordIndex])
                        {
                            var targetWord = relatedSynSet.Words[targetWordIndex - 1];
                            relatedWords[relation][sourceWord].Add(targetWord);
                        }
                    }
                }
            }

            return(relatedWords);
        }
Exemple #4
0
        /// <summary>
        /// Gets lexically related words for the current synset. Many of the relations in WordNet are lexical instead of semantic. Whereas
        /// the latter indicate relations between entire synsets (e.g., hypernym), the former indicate relations between specific
        /// words in synsets. This method retrieves all lexical relations and the words related thereby.
        /// </summary>
        /// <returns>Mapping from relations to mappings from words in the current synset to related words in the related synsets</returns>
        public Dictionary <WordNetEngine.SynSetRelation, Dictionary <string, Set <string> > > GetLexicallyRelatedWords()
        {
            Dictionary <WordNetEngine.SynSetRelation, Dictionary <string, Set <string> > > relatedWords = new Dictionary <WordNetEngine.SynSetRelation, Dictionary <string, Set <string> > >();

            foreach (WordNetEngine.SynSetRelation relation in _lexicalRelations.Keys)
            {
                relatedWords.EnsureContainsKey(relation, typeof(Dictionary <string, Set <string> >));

                foreach (SynSet relatedSynSet in _lexicalRelations[relation].Keys)
                {
                    // make sure related synset is initialized
                    if (!relatedSynSet.Instantiated)
                    {
                        relatedSynSet.Instantiate();
                    }

                    foreach (int sourceWordIndex in _lexicalRelations[relation][relatedSynSet].Keys)
                    {
                        string sourceWord = _words[sourceWordIndex - 1];

                        relatedWords[relation].EnsureContainsKey(sourceWord, typeof(Set <string>), false);

                        foreach (int targetWordIndex in _lexicalRelations[relation][relatedSynSet][sourceWordIndex])
                        {
                            string targetWord = relatedSynSet.Words[targetWordIndex - 1];
                            relatedWords[relation][sourceWord].Add(targetWord);
                        }
                    }
                }
            }

            return(relatedWords);
        }
        /// <summary>
        /// Adds a frame element to this set
        /// </summary>
        /// <param name="frameElement">Frame element to add</param>
        public void Add(FrameElement frameElement)
        {
            _frameElements.Add(frameElement);
            _idFrameElement.Add(frameElement.ID, frameElement);

            string lowerName = frameElement.Name.ToLower();

            _nameFrameElements.EnsureContainsKey(lowerName, typeof(Set <FrameElement>));
            _nameFrameElements[lowerName].Add(frameElement);
        }
        /// <summary>
        /// Looks up all information for a given verb, organized by sense.
        /// Key:  Sense of verb (role set ID)
        /// Value:  List of VerbInfo objects for senses of verb
        /// </summary>
        /// <param name="verb">Verb to look up information for</param>
        /// <returns>Verb information, organized by sense</returns>
        public Dictionary <int, List <VerbInfo> > GetVerbInfoBySense(string verb)
        {
            // get all verb info
            Dictionary <int, List <VerbInfo> > info = new Dictionary <int, List <VerbInfo> >();

            foreach (VerbInfo vi in GetVerbInfo(verb))
            {
                info.EnsureContainsKey(vi.RoleSetId, typeof(List <VerbInfo>));
                info[vi.RoleSetId].Add(vi);
            }

            return(info);
        }
        /// <summary>
        /// Saves the PropBank-to-VerbNet mapping to file
        /// </summary>
        /// <param name="path">Path to file</param>
        public void SavePropBankVerbNetLinking(string path)
        {
            // gather propbank-verbnet linking - organized by verb, role set, verbnet class, then tuples of pb-vn argument links
            Dictionary <string, Dictionary <int, Dictionary <string, List <Tuple <int, string> > > > > pbVnLinking = new Dictionary <string, Dictionary <int, Dictionary <string, List <Tuple <int, string> > > > >();

            foreach (string propBankRole in PropBankRoles)
            {
                string[] parts   = propBankRole.Split('.');
                string   verb    = parts[0];
                int      roleSet = int.Parse(parts[1]);
                int      arg     = int.Parse(parts[2]);

                pbVnLinking.EnsureContainsKey(verb, typeof(Dictionary <int, Dictionary <string, List <Tuple <int, string> > > >));
                pbVnLinking[verb].EnsureContainsKey(roleSet, typeof(Dictionary <string, List <Tuple <int, string> > >));

                foreach (string verbNetRole in GetVerbNetRolesForPropBank(propBankRole))
                {
                    string verbNetClass = verbNetRole.Substring(0, verbNetRole.LastIndexOf('.'));
                    string themeRole    = verbNetRole.Substring(verbNetRole.LastIndexOf('.') + 1);
                    pbVnLinking[verb][roleSet].EnsureContainsKey(verbNetClass, typeof(List <Tuple <int, string> >));
                    pbVnLinking[verb][roleSet][verbNetClass].Add(new Tuple <int, string>(arg, themeRole));
                }
            }

            // write linking file
            StreamWriter file = new StreamWriter(path);

            file.WriteLine("<pbvn-typemap>");
            foreach (string predicate in pbVnLinking.Keys)
            {
                file.WriteLine("  <predicate lemma=\"" + predicate + "\">");
                foreach (int roleSet in pbVnLinking[predicate].Keys)
                {
                    foreach (string vnClass in pbVnLinking[predicate][roleSet].Keys)
                    {
                        file.WriteLine("    <argmap pb-roleset=\"" + predicate + "." + roleSet + "\" vn-class=\"" + vnClass + "\">");
                        foreach (Tuple <int, string> map in pbVnLinking[predicate][roleSet][vnClass])
                        {
                            file.WriteLine("      <role pb-arg=\"" + map.Item1 + "\" vn-theta=\"" + map.Item2 + "\" />");
                        }

                        file.WriteLine("    </argmap>");
                    }
                }

                file.WriteLine("  </predicate>");
            }

            file.WriteLine("</pbvn-typemap>");
            file.Close();
        }
        /// <summary>
        /// Adds a mapping from FrameNet to VerbNet
        /// </summary>
        /// <param name="frameElement">Frame element</param>
        /// <param name="verbNetRole">VerbNet role to add to frame element</param>
        public void AddVerbNetRoleForFrameNet(string frameElement, string verbNetRole)
        {
            // map framenet to verbnet
            _frameElementVerbNetRoles.EnsureContainsKey(frameElement, typeof(Set <string>));
            if (!_frameElementVerbNetRoles[frameElement].Contains(verbNetRole))
            {
                _frameElementVerbNetRoles[frameElement].Add(verbNetRole);
            }

            // map verbnet to framenet
            _verbNetRoleFrameElements.EnsureContainsKey(verbNetRole, typeof(Set <string>));
            if (!_verbNetRoleFrameElements[verbNetRole].Contains(frameElement))
            {
                _verbNetRoleFrameElements[verbNetRole].Add(frameElement);
            }
        }
        /// <summary>
        /// Adds a mapping from PropBank to VerbNet
        /// </summary>
        /// <param name="propBankRole">PropBank role</param>
        /// <param name="verbNetRole">VerbNet role to add to PropBank role</param>
        public void AddVerbNetRoleForPropBank(string propBankRole, string verbNetRole)
        {
            // map propbank to verbnet
            _propBankRoleVerbNetRoles.EnsureContainsKey(propBankRole, typeof(Set <string>));
            if (!_propBankRoleVerbNetRoles[propBankRole].Contains(verbNetRole))
            {
                _propBankRoleVerbNetRoles[propBankRole].Add(verbNetRole);
            }

            // map verbnet to propbank
            _verbNetRolePropBankRoles.EnsureContainsKey(verbNetRole, typeof(Set <string>));
            if (!_verbNetRolePropBankRoles[verbNetRole].Contains(propBankRole))
            {
                _verbNetRolePropBankRoles[verbNetRole].Add(propBankRole);
            }
        }
        /// <summary>
        /// Saves the FrameNet-to-VerbNet mapping to file
        /// </summary>
        /// <param name="path">Path to file</param>
        public void SaveFrameNetVerbNetLinking(string path)
        {
            // gather framenet-verbnet linking - organized by frame, verbnet class, then tuples of fn-vn links
            Dictionary <string, Dictionary <string, List <Tuple <string, string> > > > fnVnLinking = new Dictionary <string, Dictionary <string, List <Tuple <string, string> > > >();

            foreach (string frameElement in FrameElements)
            {
                string[] parts = frameElement.Split('.');
                string   frame = InitialCharactersToUpper(parts[0], 1);
                string   fe    = InitialCharactersToUpper(parts[1], 1);

                fnVnLinking.EnsureContainsKey(frame, typeof(Dictionary <string, List <Tuple <string, string> > >));

                // gather roles for frame element
                foreach (string verbNetRole in GetVerbNetRolesForFrameNet(frameElement))
                {
                    string verbNetClass = verbNetRole.Substring(0, verbNetRole.LastIndexOf('.'));
                    string themeRole    = verbNetRole.Substring(verbNetRole.LastIndexOf('.') + 1);
                    fnVnLinking[frame].EnsureContainsKey(verbNetClass, typeof(List <Tuple <string, string> >));
                    fnVnLinking[frame][verbNetClass].Add(new Tuple <string, string>(fe, themeRole));
                }
            }

            // write linking file...sort everything to make version control more informative
            StreamWriter file = new StreamWriter(path);

            file.WriteLine("<verbnetRoles-framenetFEs_RoleMappingData>");
            foreach (string frame in new SortedSet <string>(fnVnLinking.Keys))
            {
                foreach (string vnClass in new SortedSet <string>(fnVnLinking[frame].Keys))
                {
                    file.WriteLine("  <vncls class='" + vnClass + "' fnframe='" + frame + "'>" + Environment.NewLine +
                                   "    <roles>");

                    foreach (Tuple <string, string> map in new SortedSet <Tuple <string, string> >(fnVnLinking[frame][vnClass]))
                    {
                        file.WriteLine("      <role fnrole='" + map.Item1 + "' vnrole='" + map.Item2 + "'/>");
                    }

                    file.WriteLine("    </roles>" + Environment.NewLine +
                                   "  </vncls>");
                }
            }

            file.WriteLine("</verbnetRoles-framenetFEs_RoleMappingData>");
            file.Close();
        }
Exemple #11
0
        public static Dictionary <long, Dictionary <string, List <double> > > GetSliceLocationThreats(Prediction prediction)
        {
            Dictionary <long, Dictionary <string, List <double> > > sliceLocationThreats = new Dictionary <long, Dictionary <string, List <double> > >();

            DiscreteChoiceModel model = prediction.Model;
            long sliceTicks           = -1;

            if (model is TimeSliceDCM)
            {
                sliceTicks = (model as TimeSliceDCM).TimeSliceTicks;
            }

            Dictionary <int, Point> idPoint = new Dictionary <int, Point>();

            foreach (Point point in prediction.Points)
            {
                idPoint.Add(point.Id, point);
            }

            foreach (PointPrediction pointPrediction in prediction.PointPredictions)
            {
                long slice = 1;
                if (sliceTicks > 0)
                {
                    slice = pointPrediction.Time.Ticks / sliceTicks;
                }

                PostGIS.Point point    = idPoint[pointPrediction.PointId].Location;
                int           row      = (int)((point.Y - prediction.PredictionArea.BoundingBox.MinY) / prediction.PredictionPointSpacing);
                int           col      = (int)((point.X - prediction.PredictionArea.BoundingBox.MinX) / prediction.PredictionPointSpacing);
                string        location = row + "-" + col;

                sliceLocationThreats.EnsureContainsKey(slice, typeof(Dictionary <string, List <double> >));
                sliceLocationThreats[slice].EnsureContainsKey(location, typeof(List <double>));
                sliceLocationThreats[slice][location].Add(pointPrediction.TotalThreat);
            }

            return(sliceLocationThreats);
        }
Exemple #12
0
        /// <summary>
        /// Gets lexically related words for the current synset. Many of the relations in WordNet are lexical instead of semantic. Whereas
        /// the latter indicate relations between entire synsets (e.g., hypernym), the former indicate relations between specific 
        /// words in synsets. This method retrieves all lexical relations and the words related thereby.
        /// </summary>
        /// <returns>Mapping from relations to mappings from words in the current synset to related words in the related synsets</returns>
        public Dictionary<WordNetEngine.SynSetRelation, Dictionary<string, Set<string>>> GetLexicallyRelatedWords()
        {
            Dictionary<WordNetEngine.SynSetRelation, Dictionary<string, Set<string>>> relatedWords = new Dictionary<WordNetEngine.SynSetRelation, Dictionary<string, Set<string>>>();
            foreach (WordNetEngine.SynSetRelation relation in _lexicalRelations.Keys)
            {
                relatedWords.EnsureContainsKey(relation, typeof(Dictionary<string, Set<string>>));

                foreach (SynSet relatedSynSet in _lexicalRelations[relation].Keys)
                {
                    // make sure related synset is initialized
                    if (!relatedSynSet.Instantiated)
                        relatedSynSet.Instantiate();

                    foreach (int sourceWordIndex in _lexicalRelations[relation][relatedSynSet].Keys)
                    {
                        string sourceWord = _words[sourceWordIndex - 1];

                        relatedWords[relation].EnsureContainsKey(sourceWord, typeof(Set<string>), false);

                        foreach (int targetWordIndex in _lexicalRelations[relation][relatedSynSet][sourceWordIndex])
                        {
                            string targetWord = relatedSynSet.Words[targetWordIndex - 1];
                            relatedWords[relation][sourceWord].Add(targetWord);
                        }
                    }
                }
            }

            return relatedWords;
        }
Exemple #13
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="wordNetDirectory">Path to WorNet directory (the one with the data and index files in it)</param>
        /// <param name="inMemory">Whether or not to store all data in memory. In-memory storage requires quite a bit of space
        /// but it is also very quick. The alternative (false) will cause the data to be searched on-disk with an efficient
        /// binary search algorithm.</param>
        public WordNetEngine(string wordNetDirectory, bool inMemory)
        {
            _wordNetDirectory         = wordNetDirectory;
            _inMemory                 = inMemory;
            _posIndexWordSearchStream = null;
            _posSynSetDataFile        = null;

            if (!System.IO.Directory.Exists(_wordNetDirectory))
            {
                throw new DirectoryNotFoundException("Non-existent WordNet directory:  " + _wordNetDirectory);
            }

            // get data and index paths
            string[] dataPaths = new string[]
            {
                Path.Combine(_wordNetDirectory, "data.adj"),
                Path.Combine(_wordNetDirectory, "data.adv"),
                Path.Combine(_wordNetDirectory, "data.noun"),
                Path.Combine(_wordNetDirectory, "data.verb")
            };

            string[] indexPaths = new string[]
            {
                Path.Combine(_wordNetDirectory, "index.adj"),
                Path.Combine(_wordNetDirectory, "index.adv"),
                Path.Combine(_wordNetDirectory, "index.noun"),
                Path.Combine(_wordNetDirectory, "index.verb")
            };

            // make sure all files exist
            foreach (string path in dataPaths.Union(indexPaths))
            {
                if (!System.IO.File.Exists(path))
                {
                    throw new FileNotFoundException("Failed to find WordNet file:  " + path);
                }
            }

            #region index file sorting
            string sortFlagPath = Path.Combine(_wordNetDirectory, ".sorted_for_dot_net");
            if (!System.IO.File.Exists(sortFlagPath))
            {
                /* make sure the index files are sorted according to the current sort order. the index files in the
                 * wordnet distribution are sorted in the order needed for (presumably) the java api, which uses
                 * a different sort order than the .net runtime. thus, unless we resort the lines in the index
                 * files, we won't be able to do a proper binary search over the data. */
                foreach (string indexPath in indexPaths)
                {
                    // create temporary file for sorted lines
                    string       tempPath = Path.GetTempFileName();
                    StreamWriter tempFile = new StreamWriter(tempPath);

                    // get number of words (lines) in file
                    int          numWords  = 0;
                    StreamReader indexFile = new StreamReader(indexPath);
                    string       line;
                    while (indexFile.TryReadLine(out line))
                    {
                        if (!line.StartsWith(" "))
                        {
                            ++numWords;
                        }
                    }

                    // get lines in file, sorted by first column (i.e., the word)
                    Dictionary <string, string> wordLine = new Dictionary <string, string>(numWords);
                    indexFile = new StreamReader(indexPath);
                    while (indexFile.TryReadLine(out line))
                    {
                        // write header lines to temp file immediately
                        if (line.StartsWith(" "))
                        {
                            tempFile.WriteLine(line);
                        }
                        else
                        {
                            // trim useless blank spaces from line and map line to first column
                            line = line.Trim();
                            wordLine.Add(line.Substring(0, line.IndexOf(' ')), line);
                        }
                    }

                    // get sorted words
                    List <string> sortedWords = new List <string>(wordLine.Count);
                    sortedWords.AddRange(wordLine.Keys);
                    sortedWords.Sort();

                    // write lines sorted by word
                    foreach (string word in sortedWords)
                    {
                        tempFile.WriteLine(wordLine[word]);
                    }

                    tempFile.Close();

                    // replace original index file with properly sorted one
                    System.IO.File.Delete(indexPath);
                    System.IO.File.Move(tempPath, indexPath);
                }

                // create flag file, indicating that we've sorted the data
                StreamWriter sortFlagFile = new StreamWriter(sortFlagPath);
                sortFlagFile.WriteLine("This file serves no purpose other than to indicate that the WordNet distribution data in the current directory has been sorted for use by the .NET API.");
                sortFlagFile.Close();
            }
            #endregion

            #region engine init
            if (inMemory)
            {
                // pass 1:  get total number of synsets
                int totalSynsets = 0;
                foreach (string dataPath in dataPaths)
                {
                    // scan synset data file for lines that don't start with a space...these are synset definition lines
                    StreamReader dataFile = new StreamReader(dataPath);
                    string       line;
                    while (dataFile.TryReadLine(out line))
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            ++totalSynsets;
                        }
                    }
                }

                // pass 2:  create synset shells (pos and offset only)
                _idSynset = new Dictionary <string, SynSet>(totalSynsets);
                foreach (string dataPath in dataPaths)
                {
                    POS pos = GetFilePOS(dataPath);

                    // scan synset data file
                    StreamReader dataFile = new StreamReader(dataPath);
                    string       line;
                    while (dataFile.TryReadLine(out line))
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            // get offset and create synset shell
                            int    offset = int.Parse(line.Substring(0, firstSpace));
                            SynSet synset = new SynSet(pos, offset, null);

                            _idSynset.Add(synset.ID, synset);
                        }
                    }
                }

                // pass 3:  instantiate synsets (hooks up relations, set glosses, etc.)
                foreach (string dataPath in dataPaths)
                {
                    POS pos = GetFilePOS(dataPath);

                    // scan synset data file
                    StreamReader dataFile = new StreamReader(dataPath);
                    string       line;
                    while (dataFile.TryReadLine(out line))
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            // instantiate synset defined on current line, using the instantiated synsets for all references
                            _idSynset[pos + ":" + int.Parse(line.Substring(0, firstSpace))].Instantiate(line, _idSynset);
                        }
                    }
                }

                // organize synsets by pos and words...also set most common synset for word-pos pairs that have multiple synsets
                _posWordSynSets = new Dictionary <POS, Dictionary <string, Set <SynSet> > >();
                foreach (string indexPath in indexPaths)
                {
                    POS pos = GetFilePOS(indexPath);

                    _posWordSynSets.EnsureContainsKey(pos, typeof(Dictionary <string, Set <SynSet> >));

                    // scan word index file, skipping header lines
                    StreamReader indexFile = new StreamReader(indexPath);
                    string       line;
                    while (indexFile.TryReadLine(out line))
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            // grab word and synset shells, along with the most common synset
                            string       word = line.Substring(0, firstSpace);
                            SynSet       mostCommonSynSet;
                            Set <SynSet> synsets = GetSynSetShells(line, pos, out mostCommonSynSet, null);

                            // set flag on most common synset if it's ambiguous
                            if (synsets.Count > 1)
                            {
                                _idSynset[mostCommonSynSet.ID].SetAsMostCommonSynsetFor(word);
                            }

                            // use reference to the synsets that we instantiated in our three-pass routine above
                            _posWordSynSets[pos].Add(word, new Set <SynSet>(synsets.Count));
                            foreach (SynSet synset in synsets)
                            {
                                _posWordSynSets[pos][word].Add(_idSynset[synset.ID]);
                            }
                        }
                    }
                }
            }
            else
            {
                // open binary search streams for index files
                _posIndexWordSearchStream = new Dictionary <POS, BinarySearchTextStream>();
                foreach (string indexPath in indexPaths)
                {
                    // create binary search stream for index file
                    BinarySearchTextStream searchStream = new BinarySearchTextStream(indexPath, new BinarySearchTextStream.SearchComparisonDelegate(delegate(object searchWord, string currentLine)
                    {
                        // if we landed on the header text, search further down
                        if (currentLine[0] == ' ')
                        {
                            return(1);
                        }

                        // get word on current line
                        string currentWord = currentLine.Substring(0, currentLine.IndexOf(' '));

                        // compare searched-for word to the current word
                        return(((string)searchWord).CompareTo(currentWord));
                    }));

                    // add search stream for current POS
                    _posIndexWordSearchStream.Add(GetFilePOS(indexPath), searchStream);
                }

                // open readers for synset data files
                _posSynSetDataFile = new Dictionary <POS, StreamReader>();
                foreach (string dataPath in dataPaths)
                {
                    _posSynSetDataFile.Add(GetFilePOS(dataPath), new StreamReader(dataPath));
                }
            }
            #endregion
        }
Exemple #14
0
 public static void EnsureContainsKey <K, V>(this Dictionary <K, V> dictionary, K key, Type valueType)
 {
     dictionary.EnsureContainsKey(key, valueType, null);
 }
Exemple #15
0
        /// <summary>
        /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from 
        /// idSynset; otherwise, related synsets are created as shells.
        /// </summary>
        /// <param name="definition">Definition line of synset from data file</param>
        /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param>
        internal void Instantiate(string definition, Dictionary<string, SynSet> idSynset)
        {
            // don't re-instantiate
            if (_instantiated)
                throw new Exception("Synset has already been instantiated");

            /* get lexicographer file name...the enumeration lines up precisely with the wordnet spec (see the lexnames file) except that
             * it starts with None, so we need to add 1 to the definition line's value to get the correct file name */
            int lexicographerFileNumber = int.Parse(GetField(definition, 1)) + 1;
            if (lexicographerFileNumber <= 0)
                throw new Exception("Invalid lexicographer file name number. Should be >= 1.");

            _lexicographerFileName = (WordNetEngine.LexicographerFileName)lexicographerFileNumber;

            // get number of words in the synset and the start character of the word list
            int wordStart;
            int numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber);
            wordStart = definition.IndexOf(' ', wordStart) + 1;

            // get words in synset
            _words = new List<string>(numWords);
            for (int i = 0; i < numWords; ++i)
            {
                int wordEnd = definition.IndexOf(' ', wordStart + 1) - 1;
                int wordLen = wordEnd - wordStart + 1;
                string word = definition.Substring(wordStart, wordLen);
                if (word.Contains(' '))
                    throw new Exception("Unexpected space in word:  " + word);

                _words.Add(word);

                // skip lex_id field
                wordStart = definition.IndexOf(' ', wordEnd + 2) + 1;
            }

            // get gloss
            _gloss = definition.Substring(definition.IndexOf('|') + 1).Trim();
            if (_gloss.Contains('|'))
                throw new Exception("Unexpected pipe in gloss");

            // get number and start of relations
            int relationCountField = 3 + (_words.Count * 2) + 1;
            int relationFieldStart;
            int numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart));
            relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1;

            // grab each related synset
            _relationSynSets = new Dictionary<WordNetEngine.SynSetRelation, Set<SynSet>>();
            _lexicalRelations = new Dictionary<WordNetEngine.SynSetRelation, Dictionary<SynSet, Dictionary<int, Set<int>>>>();
            for (int relationNum = 0; relationNum < numRelations; ++relationNum)
            {
                string relationSymbol = null;
                int relatedSynSetOffset = -1;
                WordNetEngine.POS relatedSynSetPOS = WordNetEngine.POS.None;
                int sourceWordIndex = -1;
                int targetWordIndex = -1;

                // each relation has four columns
                for (int relationField = 0; relationField <= 3; ++relationField)
                {
                    int fieldEnd = definition.IndexOf(' ', relationFieldStart + 1) - 1;
                    int fieldLen = fieldEnd - relationFieldStart + 1;
                    string fieldValue = definition.Substring(relationFieldStart, fieldLen);

                    // relation symbol
                    if (relationField == 0)
                        relationSymbol = fieldValue;
                    // related synset offset
                    else if (relationField == 1)
                        relatedSynSetOffset = int.Parse(fieldValue);
                    // related synset POS
                    else if (relationField == 2)
                        relatedSynSetPOS = GetPOS(fieldValue);
                    // source/target word for lexical relation
                    else if (relationField == 3)
                    {
                        sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber);
                        targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber);
                    }
                    else
                        throw new Exception();

                    relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1;
                }

                // get related synset...create shell if we don't have a lookup
                SynSet relatedSynSet;
                if (idSynset == null)
                    relatedSynSet = new SynSet(relatedSynSetPOS, relatedSynSetOffset, _wordNetEngine);
                // look up related synset directly
                else
                    relatedSynSet = idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset];

                // get relation
                WordNetEngine.SynSetRelation relation = WordNetEngine.GetSynSetRelation(_pos, relationSymbol);

                // add semantic relation if we have neither a source nor a target word index
                if (sourceWordIndex == 0 && targetWordIndex == 0)
                {
                    _relationSynSets.EnsureContainsKey(relation, typeof(Set<SynSet>));
                    _relationSynSets[relation].Add(relatedSynSet);
                }
                // add lexical relation
                else
                {
                    _lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary<SynSet, Dictionary<int, Set<int>>>));
                    _lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary<int, Set<int>>));
                    _lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(Set<int>));

                    if (!_lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex))
                        _lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex);
                }
            }

            // release the wordnet engine if we have one...don't need it anymore
            if (_wordNetEngine != null)
                _wordNetEngine = null;

            _instantiated = true;
        }
Exemple #16
0
        /// <summary>
        /// Gets lexically related words for the current synset. Many of the relations in WordNet are lexical instead of semantic. Whereas
        /// the latter indicate relations between entire synsets (e.g., hypernym), the former indicate relations between specific 
        /// words in synsets. This method retrieves all lexical relations and the words related thereby.
        /// </summary>
        /// <returns>Mapping from relations to mappings from words in the current synset to related words in the related synsets</returns>
        public Dictionary<SynSetRelation, Dictionary<string, List<string>>> GetLexicallyRelatedWords() {
            var relatedWords = new Dictionary<SynSetRelation, Dictionary<string, List<string>>>();
            foreach (var relation in lexicalRelations.Keys) {
                relatedWords.EnsureContainsKey(relation, typeof(Dictionary<string, List<string>>));

                foreach (var relatedSynSet in lexicalRelations[relation].Keys) {
                    // make sure related synset is initialized
                    if (!relatedSynSet.Instantiated)
                        relatedSynSet.Instantiate(wordNet.Provider);

                    foreach (var sourceWordIndex in lexicalRelations[relation][relatedSynSet].Keys) {
                        var sourceWord = Words[sourceWordIndex - 1];

                        relatedWords[relation].EnsureContainsKey(sourceWord, typeof(List<string>), false);

                        foreach (var targetWordIndex in lexicalRelations[relation][relatedSynSet][sourceWordIndex]) {
                            var targetWord = relatedSynSet.Words[targetWordIndex - 1];
                            relatedWords[relation][sourceWord].Add(targetWord);
                        }
                    }
                }
            }

            return relatedWords;
        }
Exemple #17
0
        /// <summary>
        /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from
        /// idSynset; otherwise, related synsets are created as shells.
        /// </summary>
        /// <param name="definition">Definition line of synset from data file</param>
        /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param>
        internal void Instantiate(string definition, Dictionary <string, SynSet> idSynset)
        {
            // don't re-instantiate
            if (!_instantiated)
            {
                // get number of words in the synset and the start character of the word list
                int wordStart;
                int numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber);
                wordStart = definition.IndexOf(' ', wordStart) + 1;

                _words = new List <string>(numWords);

                // get words in synset
                for (int i = 0; i < numWords; ++i)
                {
                    int    wordEnd = definition.IndexOf(' ', wordStart + 1) - 1;
                    int    wordLen = wordEnd - wordStart + 1;
                    string word    = definition.Substring(wordStart, wordLen);
                    _words.Add(word);

                    // get lex_id
                    lex_id = Convert.ToInt32(definition.Substring(definition.IndexOf(' ') + 1, 2));

                    // skip lex_id field
                    wordStart = definition.IndexOf(' ', wordEnd + 2) + 1;
                }

                // get gloss
                _gloss = definition.Substring(definition.IndexOf('|') + 1).Trim();

                // get number and start of relations
                int relationCountField = 3 + (_words.Count * 2) + 1;
                int relationFieldStart;
                int numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart));
                relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1;

                // grab each related synset
                _relationSynSets  = new Dictionary <WordNetEngine.SynSetRelation, List <SynSet> >();
                _lexicalRelations = new Dictionary <WordNetEngine.SynSetRelation, Dictionary <SynSet, Dictionary <int, List <int> > > >();
                for (int relationNum = 0; relationNum < numRelations; ++relationNum)
                {
                    string            relationSymbol      = null;
                    int               relatedSynSetOffset = -1;
                    WordNetEngine.POS relatedSynSetPOS    = WordNetEngine.POS.None;
                    int               sourceWordIndex     = -1;
                    int               targetWordIndex     = -1;

                    // each relation has four columns
                    for (int relationField = 0; relationField <= 3; ++relationField)
                    {
                        int    fieldEnd   = definition.IndexOf(' ', relationFieldStart + 1) - 1;
                        int    fieldLen   = fieldEnd - relationFieldStart + 1;
                        string fieldValue = definition.Substring(relationFieldStart, fieldLen);

                        // relation symbol
                        if (relationField == 0)
                        {
                            relationSymbol = fieldValue;
                        }
                        // related synset offset
                        else if (relationField == 1)
                        {
                            relatedSynSetOffset = int.Parse(fieldValue);
                        }
                        // related synset POS
                        else if (relationField == 2)
                        {
                            relatedSynSetPOS = GetPOS(fieldValue);
                        }
                        // source/target word for lexical relation
                        else if (relationField == 3)
                        {
                            sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber);
                            targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber);
                        }
                        else
                        {
                            throw new Exception();
                        }

                        relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1;
                    }

                    // get related synset...create shell if we don't have a lookup
                    SynSet relatedSynSet;
                    if (idSynset == null)
                    {
                        relatedSynSet = new SynSet(relatedSynSetPOS, relatedSynSetOffset, _wordNetEngine);
                    }
                    // look up related synset directly
                    else
                    {
                        relatedSynSet = idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset];
                    }

                    // get relation
                    WordNetEngine.SynSetRelation relation = WordNetEngine.GetSynSetRelation(_pos, relationSymbol);

                    // add semantic relation if we have neither a source nor a target word index
                    if (sourceWordIndex == 0 && targetWordIndex == 0)
                    {
                        _relationSynSets.EnsureContainsKey(relation, typeof(List <SynSet>));
                        _relationSynSets[relation].Add(relatedSynSet);
                    }
                    // add lexical relation
                    else
                    {
                        _lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary <SynSet, Dictionary <int, List <int> > >));
                        _lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary <int, List <int> >));
                        _lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(List <int>));

                        if (!_lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex))
                        {
                            _lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex);
                        }
                    }
                }
                _instantiated = true;
            }

            // release the wordnet engine if we have one...don't need it anymore
            if (_wordNetEngine != null)
            {
                _wordNetEngine = null;
            }
        }
        protected override void Run(Prediction prediction)
        {
            List<PostGIS.Point> predictionPoints = new List<PostGIS.Point>();
            Area predictionArea = prediction.PredictionArea;
            double areaMinX = predictionArea.BoundingBox.MinX;
            double areaMaxX = predictionArea.BoundingBox.MaxX;
            double areaMinY = predictionArea.BoundingBox.MinY;
            double areaMaxY = predictionArea.BoundingBox.MaxY;
            for (double x = areaMinX + prediction.PredictionPointSpacing / 2d; x <= areaMaxX; x += prediction.PredictionPointSpacing)  // place points in the middle of the square boxes that cover the region - we get display errors from pixel rounding if the points are exactly on the boundaries
                for (double y = areaMinY + prediction.PredictionPointSpacing / 2d; y <= areaMaxY; y += prediction.PredictionPointSpacing)
                    predictionPoints.Add(new PostGIS.Point(x, y, predictionArea.Shapefile.SRID));

            List<PostGIS.Point> incidentPoints = new List<PostGIS.Point>(Incident.Get(TrainingStart, TrainingEnd, predictionArea, IncidentTypes.ToArray()).Select(i => i.Location));
            predictionPoints.AddRange(incidentPoints);

            Console.Out.WriteLine("Filtering prediction points to prediction area");
            predictionPoints = predictionArea.Intersects(predictionPoints, prediction.PredictionPointSpacing / 2f).Select(i => predictionPoints[i]).ToList();

            NpgsqlConnection connection = DB.Connection.OpenConnection;

            try
            {
                Console.Out.WriteLine("Inserting points into prediction");
                Point.CreateTable(prediction, predictionArea.Shapefile.SRID);
                List<int> predictionPointIds = Point.Insert(connection, predictionPoints.Select(p => new Tuple<PostGIS.Point, string, DateTime>(p, PointPrediction.NullLabel, DateTime.MinValue)), prediction, predictionArea, false);

                Console.Out.WriteLine("Running overall KDE for " + IncidentTypes.Count + " incident type(s)");
                List<float> density = GetDensityEstimate(incidentPoints, _trainingSampleSize, false, 0, 0, predictionPoints, _normalize);
                Dictionary<int, float> pointIdOverallDensity = new Dictionary<int, float>(predictionPointIds.Count);
                int pointNum = 0;
                foreach (int predictionPointId in predictionPointIds)
                    pointIdOverallDensity.Add(predictionPointId, density[pointNum++]);

                Dictionary<int, Dictionary<string, double>> pointIdIncidentDensity = new Dictionary<int, Dictionary<string, double>>(pointIdOverallDensity.Count);
                if (IncidentTypes.Count == 1)
                {
                    string incident = IncidentTypes.First();
                    foreach (int pointId in pointIdOverallDensity.Keys)
                    {
                        Dictionary<string, double> incidentDensity = new Dictionary<string, double>();
                        incidentDensity.Add(incident, pointIdOverallDensity[pointId]);
                        pointIdIncidentDensity.Add(pointId, incidentDensity);
                    }
                }
                else
                    foreach (string incidentType in IncidentTypes)
                    {
                        Console.Out.WriteLine("Running KDE for incident \"" + incidentType + "\"");
                        incidentPoints = new List<PostGIS.Point>(Incident.Get(TrainingStart, TrainingEnd, predictionArea, incidentType).Select(i => i.Location));
                        density = GetDensityEstimate(incidentPoints, _trainingSampleSize, false, 0, 0, predictionPoints, _normalize);
                        if (density.Count > 0)
                        {
                            pointNum = 0;
                            foreach (int predictionPointId in predictionPointIds)
                            {
                                pointIdIncidentDensity.EnsureContainsKey(predictionPointId, typeof(Dictionary<string, double>));
                                pointIdIncidentDensity[predictionPointId].Add(incidentType, density[pointNum++]);
                            }
                        }
                    }

                PointPrediction.CreateTable(prediction);
                PointPrediction.Insert(GetPointPredictionValues(pointIdOverallDensity, pointIdIncidentDensity), prediction, false);

                Smooth(prediction);
            }
            finally
            {
                DB.Connection.Return(connection);
            }
        }
Exemple #19
0
        /// <summary>
        /// Initializes a new instance of the <see cref="WordNetMemoryProvider"/> class.
        /// </summary>
        /// <param name="dataPath">The data path.</param>
        /// <exception cref="System.ArgumentNullException">dataPath</exception>
        /// <exception cref="System.IO.DirectoryNotFoundException">The data directory does not exist.</exception>
        /// <exception cref="System.IO.FileNotFoundException">A required WordNet file does not exist: [filename]</exception>
        public WordNetMemoryProvider(string dataPath)
        {
            if (string.IsNullOrEmpty(dataPath))
            {
                throw new ArgumentNullException("dataPath");
            }

            var dir = new DirectoryInfo(dataPath);

            if (!dir.Exists)
            {
                throw new DirectoryNotFoundException("The data directory does not exist.");
            }


            var dataPaths = new [] {
                new FileInfo(Path.Combine(dataPath, "data.adj")),
                new FileInfo(Path.Combine(dataPath, "data.adv")),
                new FileInfo(Path.Combine(dataPath, "data.noun")),
                new FileInfo(Path.Combine(dataPath, "data.verb"))
            };

            var indexPaths = new [] {
                new FileInfo(Path.Combine(dataPath, "index.adj")),
                new FileInfo(Path.Combine(dataPath, "index.adv")),
                new FileInfo(Path.Combine(dataPath, "index.noun")),
                new FileInfo(Path.Combine(dataPath, "index.verb"))
            };

            foreach (var file in dataPaths.Union(indexPaths).Where(file => !file.Exists))
            {
                throw new FileNotFoundException("A required WordNet file does not exist: " + file.Name);
            }

            // Pass 1: Get total number of synsets
            var totalSynsets = 0;

            foreach (var dataInfo in dataPaths)
            {
                // scan synset data file for lines that don't start with a space...
                // these are synset definition lines
                using (var dataFile = new StreamReader(dataInfo.FullName)) {
                    string line;
                    while ((line = dataFile.ReadLine()) != null)
                    {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            ++totalSynsets;
                        }
                    }
                }
            }

            // Pass 2: Create synset shells (pos and offset only)
            idSynset = new Dictionary <string, SynSet>(totalSynsets);
            foreach (var dataInfo in dataPaths)
            {
                var pos = WordNetFileProvider.GetFilePos(dataInfo.FullName);

                // scan synset data file
                using (var dataFile = new StreamReader(dataInfo.FullName)) {
                    string line;
                    while ((line = dataFile.ReadLine()) != null)
                    {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace <= 0)
                        {
                            continue;
                        }

                        // get offset and create synset shell
                        var offset = int.Parse(line.Substring(0, firstSpace));
                        var synset = new SynSet(pos, offset, null);

                        idSynset.Add(synset.Id, synset);
                    }
                }
            }

            // Pass 3: Instantiate synsets (hooks up relations, set glosses, etc.)
            foreach (var dataInfo in dataPaths)
            {
                var pos = WordNetFileProvider.GetFilePos(dataInfo.FullName);

                // scan synset data file
                using (var dataFile = new StreamReader(dataInfo.FullName)) {
                    string line;
                    while ((line = dataFile.ReadLine()) != null)
                    {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            // instantiate synset defined on current line, using the instantiated synsets for all references
                            idSynset[pos + ":" + int.Parse(line.Substring(0, firstSpace))].Instantiate(line, idSynset);
                        }
                    }
                }
            }

            // organize synsets by pos and words...
            // also set most common synset for word-pos pairs that have multiple synsets

            posWordSynSets = new Dictionary <WordNetPos, Dictionary <string, List <SynSet> > >();

            foreach (var indexInfo in indexPaths)
            {
                var pos = WordNetFileProvider.GetFilePos(indexInfo.FullName);

                posWordSynSets.EnsureContainsKey(pos, typeof(Dictionary <string, List <SynSet> >));

                // scan word index file, skipping header lines
                using (var indexFile = new StreamReader(indexInfo.FullName)) {
                    string line;
                    while ((line = indexFile.ReadLine()) != null)
                    {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace <= 0)
                        {
                            continue;
                        }

                        // grab word and synset shells, along with the most common synset
                        var    word = line.Substring(0, firstSpace);
                        SynSet mostCommonSynSet;
                        var    synsets = WordNetFileProvider.GetSynSetShells(line, pos, out mostCommonSynSet, wordNet);

                        // set flag on most common synset if it's ambiguous
                        if (synsets.Count > 1)
                        {
                            idSynset[mostCommonSynSet.Id].SetAsMostCommonSynsetFor(word);
                        }

                        // use reference to the synsets that we instantiated in our three-pass routine above
                        posWordSynSets[pos].Add(word, new List <SynSet>(synsets.Count));
                        foreach (var synset in synsets)
                        {
                            posWordSynSets[pos][word].Add(idSynset[synset.Id]);
                        }
                    }
                }
            }
        }
Exemple #20
0
 /// <summary>
 /// Ensures that the current dictionary contains a given key. If it does not, a new key-value pair is added
 /// using the given key and the value resulting from calling the default constructor for the valueType type.
 /// </summary>
 /// <typeparam name="KeyType">Key type</typeparam>
 /// <typeparam name="ValueType">Value type</typeparam>
 /// <param name="dictionary">Dictionary</param>
 /// <param name="key">Key to ensure the existence of</param>
 /// <param name="valueType">Type of value</param>
 public static void EnsureContainsKey <KeyType, ValueType>(this Dictionary <KeyType, ValueType> dictionary, KeyType key, Type valueType)
 {
     dictionary.EnsureContainsKey <KeyType, ValueType>(key, valueType, (object[])null);
 }
        public static Dictionary<long, Dictionary<string, int>> GetSliceLocationTrueCount(IEnumerable<Incident> incidents, Prediction prediction)
        {
            Dictionary<long, Dictionary<string, int>> sliceLocationTrueCount = new Dictionary<long, Dictionary<string, int>>();

            DiscreteChoiceModel model = prediction.Model;
            long sliceTicks = -1;
            if (model is TimeSliceDCM)
                sliceTicks = (model as TimeSliceDCM).TimeSliceTicks;

            foreach (Incident incident in incidents)
            {
                long slice = 1;
                if (sliceTicks > 0)
                    slice = incident.Time.Ticks / sliceTicks;

                int row = (int)((incident.Location.Y - prediction.PredictionArea.BoundingBox.MinY) / prediction.PredictionPointSpacing);
                int col = (int)((incident.Location.X - prediction.PredictionArea.BoundingBox.MinX) / prediction.PredictionPointSpacing);
                string location = row + "-" + col;

                sliceLocationTrueCount.EnsureContainsKey(slice, typeof(Dictionary<string, int>));
                sliceLocationTrueCount[slice].EnsureContainsKey(location, typeof(int));
                sliceLocationTrueCount[slice][location]++;
            }

            return sliceLocationTrueCount;
        }
Exemple #22
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="frameNetDirectory">Path to FrameNet distribution directory</param>
        /// <param name="version">FrameNet version</param>
        public FrameNetEngine(string frameNetDirectory, Version version)
        {
            if (!System.IO.Directory.Exists(frameNetDirectory))
            {
                throw new DirectoryNotFoundException("Invalid FrameNet directory");
            }

            _frameNameFrame             = new Dictionary <string, Frame>();
            _frameElementIdFrameElement = new Dictionary <int, FrameElement>();
            _lexemeLexicalUnitIDs       = new Dictionary <string, Set <int> >();
            _lexicalUnitIdFrame         = new Dictionary <int, Frame>();
            _lexicalUnitLexicalUnitIDs  = new Dictionary <string, Set <int> >();
            _lexicalUnitIdLexicalUnit   = new Dictionary <int, LexicalUnit>();

            if (version == Version.FrameNet_1_3)
            {
                // init annotation engine
                _lexicalUnitAnnotationEngine = new LexicalUnitAnnotationEngine(LAIR.CommonPort.IO.Directory.FindDirectory(frameNetDirectory, "luXML"), version);

                #region get frames
                Set <int> uniqueFrameIDCheck = new Set <int>();
                XmlParser framesP            = new XmlParser(System.IO.File.ReadAllText(LAIR.CommonPort.IO.Directory.FindFile(frameNetDirectory, "frames.xml")));
                while (framesP.SkipToElement("frame"))
                {
                    // create frame
                    string    frameXML        = framesP.OuterXML("frame");
                    XmlParser frameP          = new XmlParser(frameXML);
                    int       frameID         = int.Parse(frameP.AttributeValue("frame", "ID"));
                    string    frameName       = frameP.AttributeValue("frame", "name").ToLower().Trim(); // use lowercase for all frame names
                    string    frameDefinition = frameP.ElementText("definition");
                    Frame     frame           = new Frame(frameName, frameDefinition, frameID);

                    // add to frame index index
                    _frameNameFrame.Add(frame.Name, frame);
                    uniqueFrameIDCheck.Add(frame.ID);

                    // get frame elements
                    string    fesXML = frameP.OuterXML("fes");
                    XmlParser fesP   = new XmlParser(fesXML);
                    string    feXML;
                    while ((feXML = fesP.OuterXML("fe")) != null)
                    {
                        // get frame element
                        XmlParser    feParser = new XmlParser(feXML);
                        int          feID     = int.Parse(feParser.AttributeValue("fe", "ID"));
                        string       feName   = feParser.AttributeValue("fe", "name").Trim().ToLower();
                        string       feDef    = feParser.ElementText("definition");
                        FrameElement fe       = new FrameElement(feID, feName, feDef, frame);
                        frame.FrameElements.Add(fe);

                        // add to index
                        _frameElementIdFrameElement.Add(fe.ID, fe);
                    }

                    // get lexical units
                    string    lusXML    = frameP.OuterXML("lexunits");
                    XmlParser lusParser = new XmlParser(lusXML);
                    string    luXML;
                    while ((luXML = lusParser.OuterXML("lexunit")) != null)
                    {
                        XmlParser luParser = new XmlParser(luXML);
                        int       luID     = int.Parse(luParser.AttributeValue("lexunit", "ID"));
                        string    luName   = luParser.AttributeValue("lexunit", "name");
                        luName = luName.Substring(0, luName.IndexOf('.'));
                        string luPos = luParser.AttributeValue("lexunit", "pos");
                        string luDef = luParser.ElementText("definition");

                        // get lexemes for this lexunit...we may get duplicates...don't worry about them
                        Set <Lexeme> lexemes    = new Set <Lexeme>(false);
                        string       lexemesXML = luParser.OuterXML("lexemes");
                        XmlParser    lexemesP   = new XmlParser(lexemesXML);
                        string       lexemeXML;
                        while ((lexemeXML = lexemesP.OuterXML("lexeme")) != null)
                        {
                            XmlParser lexemeP     = new XmlParser(lexemeXML);
                            string    pos         = lexemeP.AttributeValue("lexeme", "pos");
                            bool      breakBefore = bool.Parse(lexemeP.AttributeValue("lexeme", "breakBefore"));
                            bool      head        = bool.Parse(lexemeP.AttributeValue("lexeme", "headword"));
                            string    value       = lexemeP.ElementText("lexeme");

                            lexemes.Add(new Lexeme(value, pos, breakBefore, head));
                        }

                        // create lexical unit and add to frame
                        LexicalUnit lexicalUnit = new LexicalUnit(luID, luName, luPos, luDef, lexemes);

                        frame.LexicalUnits.Add(lexicalUnit);

                        // add map from full lexeme string to lexical unit id
                        string lexemeString = lexicalUnit.ToString();
                        _lexemeLexicalUnitIDs.EnsureContainsKey(lexemeString, typeof(Set <int>), false);
                        _lexemeLexicalUnitIDs[lexemeString].Add(luID);

                        // add map from lexical unit to frame
                        _lexicalUnitIdFrame.Add(lexicalUnit.ID, frame);

                        // add map from lexical unit to lexical unit id
                        _lexicalUnitLexicalUnitIDs.EnsureContainsKey(lexicalUnit.Name, typeof(Set <int>));
                        _lexicalUnitLexicalUnitIDs[lexicalUnit.Name].Add(lexicalUnit.ID);

                        // add map from lexical unit ID to lexical unit
                        _lexicalUnitIdLexicalUnit.Add(lexicalUnit.ID, lexicalUnit);
                    }
                }
                #endregion

                #region get frame relations
                framesP = new XmlParser(System.IO.File.ReadAllText(LAIR.CommonPort.IO.Directory.FindFile(frameNetDirectory, "frRelation.xml")));
                string relationsXML;
                while ((relationsXML = framesP.OuterXML("frame-relation-type")) != null)
                {
                    // get relation type
                    XmlParser           relationsP = new XmlParser(relationsXML);
                    Frame.FrameRelation relation   = Frame.GetFrameRelation(relationsP.AttributeValue("frame-relation-type", "name"));

                    string relationXML;
                    while ((relationXML = relationsP.OuterXML("frame-relation")) != null)
                    {
                        XmlParser relationP      = new XmlParser(relationXML);
                        string    superFrameName = relationP.AttributeValue("frame-relation", "superFrameName").ToLower();
                        string    subFrameName   = relationP.AttributeValue("frame-relation", "subFrameName").ToLower();

                        Frame superFrame = _frameNameFrame[superFrameName];
                        Frame subFrame   = _frameNameFrame[subFrameName];

                        superFrame.GetSubFrames(relation).Add(subFrame);
                        subFrame.GetSuperFrames(relation).Add(superFrame);

                        // add FE relations
                        while (relationP.SkipToElement("fe-relation"))
                        {
                            int superFeID = int.Parse(relationP.AttributeValue("fe-relation", "supId"));
                            int subFeID   = int.Parse(relationP.AttributeValue("fe-relation", "subId"));

                            FrameElement superFE = superFrame.FrameElements.Get(superFeID);
                            FrameElement subFE   = subFrame.FrameElements.Get(subFeID);

                            superFE.AddSubFrameElement(subFE, relation);
                            subFE.AddSuperFrameElement(superFE, relation);
                        }
                    }
                }
                #endregion
            }
            else if (version == Version.FrameNet_1_5)
            {
                // init annotation engine
                _lexicalUnitAnnotationEngine = new LexicalUnitAnnotationEngine(LAIR.CommonPort.IO.Directory.FindDirectory(frameNetDirectory, "lu"), version);

                #region get frames
                Set <int> uniqueFrameIDCheck = new Set <int>();
                foreach (string framePath in System.IO.Directory.GetFiles(LAIR.CommonPort.IO.Directory.FindDirectory(frameNetDirectory, "frame"), "*.xml"))
                {
                    // create frame
                    XmlParser frameP          = new XmlParser(System.IO.File.ReadAllText(framePath));
                    int       frameID         = int.Parse(frameP.AttributeValue("frame", "ID"));
                    string    frameName       = frameP.AttributeValue("frame", "name").ToLower().Trim(); // use lowercase for all frame names
                    string    frameDefinition = frameP.ElementText("definition");
                    Frame     frame           = new Frame(frameName, frameDefinition, frameID);

                    // add to frame index index
                    _frameNameFrame.Add(frame.Name, frame);
                    uniqueFrameIDCheck.Add(frame.ID);

                    // get frame elements
                    string feXML;
                    while ((feXML = frameP.OuterXML("FE")) != null)
                    {
                        // get frame element
                        XmlParser    feParser = new XmlParser(feXML);
                        int          feID     = int.Parse(feParser.AttributeValue("FE", "ID"));
                        string       feName   = feParser.AttributeValue("FE", "name").Trim().ToLower();
                        string       feDef    = feParser.ElementText("definition");
                        FrameElement fe       = new FrameElement(feID, feName, feDef, frame);
                        frame.FrameElements.Add(fe);

                        // add to index
                        _frameElementIdFrameElement.Add(fe.ID, fe);
                    }

                    // get lexical units
                    frameP.Reset();
                    string luXML;
                    while ((luXML = frameP.OuterXML("lexUnit")) != null)
                    {
                        XmlParser luParser = new XmlParser(luXML);

                        string luPos  = luParser.AttributeValue("lexUnit", "POS");
                        string luName = luParser.AttributeValue("lexUnit", "name");
                        luName = luName.Substring(0, luName.IndexOf('.'));
                        int    luID  = int.Parse(luParser.AttributeValue("lexUnit", "ID"));
                        string luDef = luParser.ElementText("definition");

                        // get lexemes for this lexunit...we may get duplicates...don't worry about them
                        Set <Lexeme> lexemes = new Set <Lexeme>(false);
                        string       lexemeXML;
                        while ((lexemeXML = luParser.OuterXML("lexeme")) != null)
                        {
                            XmlParser lexemeP     = new XmlParser(lexemeXML);
                            bool      head        = bool.Parse(lexemeP.AttributeValue("lexeme", "headword"));
                            bool      breakBefore = bool.Parse(lexemeP.AttributeValue("lexeme", "breakBefore"));
                            string    pos         = lexemeP.AttributeValue("lexeme", "POS");
                            string    value       = lexemeP.AttributeValue("lexeme", "name");

                            lexemes.Add(new Lexeme(value, pos, breakBefore, head));
                        }

                        // create lexical unit and add to frame
                        LexicalUnit lexicalUnit = new LexicalUnit(luID, luName, luPos, luDef, lexemes);

                        frame.LexicalUnits.Add(lexicalUnit);

                        // add map from full lexeme string to lexical unit id
                        string lexemeString = lexicalUnit.ToString();
                        _lexemeLexicalUnitIDs.EnsureContainsKey(lexemeString, typeof(Set <int>), false);
                        _lexemeLexicalUnitIDs[lexemeString].Add(luID);

                        // add map from lexical unit to frame
                        _lexicalUnitIdFrame.Add(lexicalUnit.ID, frame);

                        // add map from lexical unit to lexical unit id
                        _lexicalUnitLexicalUnitIDs.EnsureContainsKey(lexicalUnit.Name, typeof(Set <int>));
                        _lexicalUnitLexicalUnitIDs[lexicalUnit.Name].Add(lexicalUnit.ID);

                        // add map from lexical unit ID to lexical unit
                        _lexicalUnitIdLexicalUnit.Add(lexicalUnit.ID, lexicalUnit);
                    }
                }
                #endregion

                #region get relations
                XmlParser allRelationsP = new XmlParser(System.IO.File.ReadAllText(LAIR.CommonPort.IO.Directory.FindFile(frameNetDirectory, "frRelation.xml")));
                string    relationsXML;
                while ((relationsXML = allRelationsP.OuterXML("frameRelationType")) != null)
                {
                    // get relation type
                    XmlParser           relationsP = new XmlParser(relationsXML);
                    Frame.FrameRelation relation   = Frame.GetFrameRelation(relationsP.AttributeValue("frameRelationType", "name"));

                    // read each instance of the relation
                    string relationXML;
                    while ((relationXML = relationsP.OuterXML("frameRelation")) != null)
                    {
                        XmlParser relationP = new XmlParser(relationXML);

                        // get related frames
                        Frame subFrame   = _frameNameFrame[relationP.AttributeValue("frameRelation", "subFrameName").ToLower()];
                        Frame superFrame = _frameNameFrame[relationP.AttributeValue("frameRelation", "superFrameName").ToLower()];

                        subFrame.GetSuperFrames(relation).Add(superFrame);
                        superFrame.GetSubFrames(relation).Add(subFrame);

                        // add FE relations
                        while (relationP.SkipToElement("FERelation"))
                        {
                            FrameElement subFE   = subFrame.FrameElements.Get(int.Parse(relationP.AttributeValue("FERelation", "subID")));
                            FrameElement superFE = superFrame.FrameElements.Get(int.Parse(relationP.AttributeValue("FERelation", "supID")));

                            subFE.AddSuperFrameElement(superFE, relation);
                            superFE.AddSubFrameElement(subFE, relation);
                        }
                    }
                }
                #endregion
            }
            else
            {
                throw new Exception("Unrecognized FrameNet version:  " + version);
            }
        }
        private void GetThreatSurfaces(Rectangle bitmapDimensions, bool displayFirstSlice, Dictionary<long, List<Tuple<RectangleF, double, string>>> sliceSquareThreatType = null)
        {
            if (_sliceIncidentPointScores == null)
                return;

            Set<string> selectedIncidents = new Set<string>(incidentTypeCheckBoxes.Controls.Cast<ColoredCheckBox>().Where(c => c.CheckState != CheckState.Unchecked).Select(c => c.Text).ToArray());

            float pixelsPerMeter;
            float threatRectanglePixelWidth;
            GetDrawingParameters(bitmapDimensions, out pixelsPerMeter, out threatRectanglePixelWidth);

            List<long> slices = _sliceIncidentPointScores.Keys.OrderBy(s => s).ToList();
            Dictionary<long, Dictionary<int, Dictionary<int, Tuple<double, string>>>> sliceRowColScoreIncident = new Dictionary<long, Dictionary<int, Dictionary<int, Tuple<double, string>>>>(slices.Count);
            Dictionary<long, Bitmap> newSliceThreatSurface = new Dictionary<long, Bitmap>(slices.Count);
            double overallMinScore = double.MaxValue;
            double overallMaxScore = double.MinValue;

            List<Thread> threads = new List<Thread>(Configuration.ProcessorCount);
            for (int i = 0; i < Configuration.ProcessorCount; ++i)
            {
                Thread t = new Thread(new ParameterizedThreadStart(core =>
                    {
                        for (int j = (int)core; j < slices.Count; j += Configuration.ProcessorCount)
                        {
                            long slice = slices[j];

                            #region create bitmap for current slice's threat surface
                            try
                            {
                                lock (newSliceThreatSurface)
                                {
                                    newSliceThreatSurface.Add(slice, new Bitmap(bitmapDimensions.Width, bitmapDimensions.Height, PixelFormat.Format16bppRgb565));
                                }
                            }
                            catch (ArgumentException)
                            {
                                Console.Out.WriteLine("Maximum zoom exceeded. Reset zoom to refresh display.");
                                return;
                            }
                            #endregion

                            #region get incident scores for each row and column of current slice
                            Dictionary<int, Dictionary<int, Dictionary<string, List<double>>>> rowColIncidentScores = new Dictionary<int, Dictionary<int, Dictionary<string, List<double>>>>();
                            foreach (string incident in _sliceIncidentPointScores[slice].Keys)
                                if (selectedIncidents.Contains(incident))
                                    foreach (Tuple<PointF, double> pointScore in _sliceIncidentPointScores[slice][incident])
                                    {
                                        PointF drawingPoint = ConvertMetersPointToDrawingPoint(pointScore.Item1, _regionBottomLeftInMeters, pixelsPerMeter, bitmapDimensions);

                                        int row, col;
                                        GetThreatRectangleRowColumn(drawingPoint, threatRectanglePixelWidth, out row, out col);

                                        rowColIncidentScores.EnsureContainsKey(row, typeof(Dictionary<int, Dictionary<string, List<double>>>));
                                        rowColIncidentScores[row].EnsureContainsKey(col, typeof(Dictionary<string, List<double>>));
                                        rowColIncidentScores[row][col].EnsureContainsKey(incident, typeof(List<double>));
                                        rowColIncidentScores[row][col][incident].Add(pointScore.Item2);
                                    }
                            #endregion

                            #region get score/incident pairs for each cell, tracking min and max scores
                            Dictionary<int, Dictionary<int, Tuple<double, string>>> rowColScoreIncident = new Dictionary<int, Dictionary<int, Tuple<double, string>>>();
                            double sliceMinScore = double.MaxValue;
                            double sliceMaxScore = double.MinValue;
                            foreach (int row in rowColIncidentScores.Keys)
                                foreach (int col in rowColIncidentScores[row].Keys)
                                {
                                    Dictionary<string, List<double>> incidentScores = rowColIncidentScores[row][col];
                                    string mostLikelyIncident = null;
                                    double scoreForMostLikelyIncident = double.MinValue;
                                    foreach (string incident in incidentScores.Keys)
                                    {
                                        double score = incidentScores[incident].Average();
                                        if (score > scoreForMostLikelyIncident)
                                        {
                                            mostLikelyIncident = incident;
                                            scoreForMostLikelyIncident = score;
                                        }
                                    }

                                    if (scoreForMostLikelyIncident < sliceMinScore) sliceMinScore = scoreForMostLikelyIncident;
                                    if (scoreForMostLikelyIncident > sliceMaxScore) sliceMaxScore = scoreForMostLikelyIncident;

                                    rowColScoreIncident.EnsureContainsKey(row, typeof(Dictionary<int, Tuple<double, string>>));
                                    rowColScoreIncident[row].Add(col, new Tuple<double, string>(scoreForMostLikelyIncident, mostLikelyIncident));
                                }
                            #endregion

                            #region store information from thread
                            lock (sliceRowColScoreIncident)
                            {
                                sliceRowColScoreIncident.Add(slice, rowColScoreIncident);
                            }

                            lock (this)
                            {
                                if (sliceMinScore < overallMinScore) overallMinScore = sliceMinScore;
                            }

                            lock (this)
                            {
                                if (sliceMaxScore > overallMaxScore) overallMaxScore = sliceMaxScore;
                            }
                            #endregion
                        }
                    }));

                t.Start(i);
                threads.Add(t);
            }

            foreach (Thread t in threads)
                t.Join();

            #region draw threat surfaces
            double scoreRange = overallMaxScore - overallMinScore;
            if (scoreRange == 0)
                scoreRange = float.Epsilon;

            threads.Clear();
            for (int i = 0; i < Configuration.ProcessorCount; ++i)
            {
                Thread t = new Thread(new ParameterizedThreadStart(core =>
                    {
                        using(Pen pen = new Pen(BackColor, 1))
                        using(SolidBrush brush = new SolidBrush(BackColor))
                        {
                            for (int j = (int)core; j < slices.Count; j += Configuration.ProcessorCount)
                            {
                                long slice = slices[j];

                                Graphics g = Graphics.FromImage(newSliceThreatSurface[slice]);
                                g.Clear(BackColor);

                                #region threat
                                foreach (int row in sliceRowColScoreIncident[slice].Keys)
                                    foreach (int col in sliceRowColScoreIncident[slice][row].Keys)
                                    {
                                        Tuple<double, string> scoreIncident = sliceRowColScoreIncident[slice][row][col];
                                        double scaledScore = (scoreIncident.Item1 - overallMinScore) / scoreRange;
                                        double percentTransparent = 1 - scaledScore;
                                        Color color = _incidentColor[scoreIncident.Item2];

                                        byte red = (byte)(scaledScore * color.R + percentTransparent * BackColor.R);
                                        byte green = (byte)(scaledScore * color.G + percentTransparent * BackColor.G);
                                        byte blue = (byte)(scaledScore * color.B + percentTransparent * BackColor.B);
                                        brush.Color = Color.FromArgb(red, green, blue);

                                        RectangleF threatSquare = new RectangleF(col * threatRectanglePixelWidth, row * threatRectanglePixelWidth, threatRectanglePixelWidth, threatRectanglePixelWidth);
                                        g.FillRectangle(brush, threatSquare);

                                        if (sliceSquareThreatType != null)
                                        {
                                            sliceSquareThreatType.EnsureContainsKey(slice, typeof(List<Tuple<RectangleF, double, string>>));
                                            sliceSquareThreatType[slice].Add(new Tuple<RectangleF, double, string>(threatSquare, scoreIncident.Item1, scoreIncident.Item2));
                                        }
                                    }
                                #endregion

                                #region overlays
                                foreach (Overlay overlay in Overlays)
                                    if (overlay.Displayed)
                                    {
                                        pen.Color = overlay.Color;
                                        brush.Color = overlay.Color;
                                        foreach (List<PointF> points in overlay.Points)
                                            if (points.Count == 1)
                                            {
                                                PointF drawingPoint = ConvertMetersPointToDrawingPoint(points[0], _regionBottomLeftInMeters, pixelsPerMeter, bitmapDimensions);
                                                RectangleF circle = GetCircleBoundingBox(drawingPoint, _pointDrawingDiameter);
                                                g.FillEllipse(brush, circle);
                                                g.DrawEllipse(pen, circle);
                                            }
                                            else
                                                for (int p = 1; p < points.Count; ++p)
                                                    g.DrawLine(pen, ConvertMetersPointToDrawingPoint(points[p - 1], _regionBottomLeftInMeters, pixelsPerMeter, bitmapDimensions), ConvertMetersPointToDrawingPoint(points[p], _regionBottomLeftInMeters, pixelsPerMeter, bitmapDimensions));
                                    }
                                #endregion

                                #region true incidents
                                Set<string> selectedTrueIncidentOverlays = new Set<string>(incidentTypeCheckBoxes.Controls.Cast<ColoredCheckBox>().Where(c => c.CheckState == CheckState.Checked).Select(c => c.Text).ToArray());
                                DateTime sliceStart = DisplayedPrediction.PredictionStartTime;
                                DateTime sliceEnd = DisplayedPrediction.PredictionEndTime;
                                if (slice != -1)
                                {
                                    if (!(DisplayedPrediction.Model is TimeSliceDCM))
                                        throw new Exception("Expected TimeSliceDCM since slice != 1");

                                    long sliceTicks = (DisplayedPrediction.Model as TimeSliceDCM).TimeSliceTicks;
                                    sliceStart = new DateTime(slice * sliceTicks);
                                    sliceEnd = sliceStart + new TimeSpan(sliceTicks);
                                }

                                foreach (string trueIncidentOverlay in selectedTrueIncidentOverlays)
                                {
                                    brush.Color = _incidentColor[trueIncidentOverlay];
                                    pen.Color = Color.Black;
                                    foreach (Incident incident in Incident.Get(sliceStart, sliceEnd, DisplayedPrediction.PredictionArea, trueIncidentOverlay))
                                    {
                                        PointF drawingPoint = ConvertMetersPointToDrawingPoint(new PointF((float)incident.Location.X, (float)incident.Location.Y), _regionBottomLeftInMeters, pixelsPerMeter, bitmapDimensions);
                                        RectangleF circle = GetCircleBoundingBox(drawingPoint, _pointDrawingDiameter);
                                        g.FillEllipse(brush, circle);
                                        g.DrawEllipse(pen, circle);
                                    }
                                }
                                #endregion

                                #region prediction points
                                if (_displayPredictionPoints)
                                {
                                    brush.Color = _predictionPointColor;
                                    pen.Color = Color.Black;
                                    foreach (Point p in DisplayedPrediction.Points)
                                    {
                                        PointF drawingPoint = ConvertMetersPointToDrawingPoint(new PointF((float)p.Location.X, (float)p.Location.Y), _regionBottomLeftInMeters, pixelsPerMeter, bitmapDimensions);
                                        RectangleF circle = GetCircleBoundingBox(drawingPoint, _pointDrawingDiameter);
                                        g.FillEllipse(brush, circle);
                                        g.DrawEllipse(pen, circle);
                                    }
                                }
                                #endregion
                            }
                        }
                    }));

                t.Start(i);
                threads.Add(t);
            }

            foreach (Thread t in threads)
                t.Join();
            #endregion

            if (_sliceThreatSurface != null)
            {
                foreach (Bitmap threatSurface in _sliceThreatSurface.Values)
                    threatSurface.Dispose();

                _sliceThreatSurface.Clear();
            }

            _sliceThreatSurface = newSliceThreatSurface;

            timeSlice.ValueChanged -= new EventHandler(timeSlice_ValueChanged);
            timeSlice.Minimum = (int)_sliceThreatSurface.Keys.Min();
            timeSlice.Maximum = (int)_sliceThreatSurface.Keys.Max();

            if (displayFirstSlice)
                timeSlice.Value = timeSlice.Minimum;

            timeSlice.ValueChanged += new EventHandler(timeSlice_ValueChanged);

            _zoomedImageWidth = CurrentThreatSurface.Width;

            Invalidate();
        }
        public override void Display(Prediction prediction, IEnumerable<Overlay> overlays)
        {
            base.Display(prediction, overlays);

            _dragging = false;
            _draggingStart = System.Drawing.Point.Empty;
            _panOffset = new Size(0, 0);
            _panIncrement = 50;

            DiscreteChoiceModel model = prediction.Model;

            Dictionary<int, Point> idPoint = new Dictionary<int, Point>();
            foreach (Point p in prediction.Points)
                idPoint.Add(p.Id, p);

            _incidentColor = new Dictionary<string, Color>();
            _sliceIncidentPointScores = new Dictionary<long, Dictionary<string, List<Tuple<PointF, double>>>>();
            float minPointX = float.MaxValue;
            float minPointY = float.MaxValue;
            float maxPointX = float.MinValue;
            float maxPointY = float.MinValue;
            foreach (PointPrediction pointPrediction in prediction.PointPredictions)
            {
                long slice = -1;
                if (model is TimeSliceDCM)
                    slice = (long)(pointPrediction.Time.Ticks / (model as TimeSliceDCM).TimeSliceTicks);

                _sliceIncidentPointScores.EnsureContainsKey(slice, typeof(Dictionary<string, List<Tuple<PointF, double>>>));

                Point point = idPoint[pointPrediction.PointId];

                foreach (string incident in pointPrediction.IncidentScore.Keys)
                {
                    Color color;
                    if (!_incidentColor.TryGetValue(incident, out color))
                    {
                        color = ColorPalette.GetColor();
                        _incidentColor.Add(incident, color);
                    }

                    double score = pointPrediction.IncidentScore[incident];

                    _sliceIncidentPointScores[slice].EnsureContainsKey(incident, typeof(List<Tuple<PointF, double>>));
                    _sliceIncidentPointScores[slice][incident].Add(new Tuple<PointF, double>(new PointF((float)point.Location.X, (float)point.Location.Y), score));
                }

                float x = (float)point.Location.X;
                float y = (float)point.Location.Y;
                if (x < minPointX) minPointX = x;
                if (x > maxPointX) maxPointX = x;
                if (y < minPointY) minPointY = y;
                if (y > maxPointY) maxPointY = y;
            }

            if (_sliceIncidentPointScores.Count == 0)
            {
                Console.Out.WriteLine("No prediction points were generated for this prediction. There is nothing to display or evaluate.");
                Clear();
                return;
            }

            Invoke(new Action(delegate()
                {
                    incidentTypeCheckBoxes.Controls.Clear();
                    bool first = true;
                    foreach (string incidentType in _incidentColor.Keys)
                    {
                        ColoredCheckBox cb = new ColoredCheckBox(true, first ? CheckState.Checked : CheckState.Unchecked, incidentType, _incidentColor[incidentType]);
                        cb.CheckBoxCheckStateChanged += new EventHandler(IncidentCheckBox_CheckStateChanged);
                        cb.LabelClicked += new EventHandler(IncidentCheckBox_LabelClicked);
                        incidentTypeCheckBoxes.Controls.Add(cb);
                        first = false;
                    }

                    overlayCheckBoxes.Controls.Clear();
                    foreach (Overlay overlay in Overlays)
                    {
                        ColoredCheckBox cb = new ColoredCheckBox(false, overlay.Displayed ? CheckState.Checked : CheckState.Unchecked, overlay.Name, overlay.Color);
                        cb.CheckBoxCheckedChanged += new EventHandler(OverlayCheckBox_CheckedChanged);
                        cb.LabelClicked += new EventHandler(OverlayCheckBox_LabelClicked);
                        overlayCheckBoxes.Controls.Add(cb);

                        IEnumerable<float> xs = overlay.Points.SelectMany(points => points).Select(point => point.X);
                        IEnumerable<float> ys = overlay.Points.SelectMany(points => points).Select(point => point.Y);
                        float minX = xs.Min();
                        float maxX = xs.Max();
                        float minY = ys.Min();
                        float maxY = ys.Max();
                        if (minX < minPointX) minPointX = minX;
                        if (maxX > maxPointX) maxPointX = maxX;
                        if (minY < minPointY) minPointY = minY;
                        if (maxY > maxPointY) maxPointY = maxY;
                    }

                    ColoredCheckBox displayPredictionPointsCheckbox = new ColoredCheckBox(false, CheckState.Unchecked, "prediction points", _predictionPointColor);
                    displayPredictionPointsCheckbox.CheckBoxCheckedChanged += new EventHandler(DisplayPredictionPoints_CheckedChanged);
                    displayPredictionPointsCheckbox.LabelClicked += new EventHandler(DisplayPredictionPoints_LabelClicked);
                    overlayCheckBoxes.Controls.Add(displayPredictionPointsCheckbox);
                    _displayPredictionPoints = displayPredictionPointsCheckbox.Checked;

                    _regionBottomLeftInMeters = new PointF(minPointX, minPointY);
                    _regionSizeInMeters = new SizeF(maxPointX - minPointX, maxPointY - minPointY);

                    bool generateThreatSurfaces = threatResolution.Value != prediction.PredictionPointSpacing; // changing the threat resolution will generate new threat surfaces, so only do it here if we won't be changing the current resolution value
                    threatResolution.Value = threatResolution.Minimum = prediction.PredictionPointSpacing;
                    if (!generateThreatSurfaces)
                        GetThreatSurfaces(ClientRectangle, true);

                    GetSliceTimeText();
                }));
        }
Exemple #25
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="wordNetDirectory">Path to WorNet directory (the one with the data and index files in it)</param>
        /// <param name="inMemory">Whether or not to store all data in memory. In-memory storage requires quite a bit of space
        /// but it is also very quick. The alternative (false) will cause the data to be searched on-disk with an efficient
        /// binary search algorithm.</param>
        public WordNetEngine(string wordNetDirectory, bool inMemory)
        {
            _wordNetDirectory = wordNetDirectory;
            _inMemory = inMemory;
            _posIndexWordSearchStream = null;
            _posSynSetDataFile = null;

            if (!System.IO.Directory.Exists(_wordNetDirectory))
                throw new DirectoryNotFoundException("Отсутствует WordNet директория:  " + _wordNetDirectory);

            // get data and index paths
            string[] dataPaths = new string[]
            {
                Path.Combine(_wordNetDirectory, "data.adj"),
                Path.Combine(_wordNetDirectory, "data.adv"),
                Path.Combine(_wordNetDirectory, "data.noun"),
                Path.Combine(_wordNetDirectory, "data.verb")
            };

            string[] indexPaths = new string[]
            {
                Path.Combine(_wordNetDirectory, "index.adj"),
                Path.Combine(_wordNetDirectory, "index.adv"),
                Path.Combine(_wordNetDirectory, "index.noun"),
                Path.Combine(_wordNetDirectory, "index.verb")
            };

            // make sure all files exist
            foreach (string path in dataPaths.Union(indexPaths))
                if (!System.IO.File.Exists(path))
                    throw new FileNotFoundException("Failed to find WordNet file:  " + path);

            #region index file sorting
            string sortFlagPath = Path.Combine(_wordNetDirectory, ".sorted_for_dot_net");
            if (!System.IO.File.Exists(sortFlagPath))
            {
                /* make sure the index files are sorted according to the current sort order. the index files in the
                 * wordnet distribution are sorted in the order needed for (presumably) the java api, which uses
                 * a different sort order than the .net runtime. thus, unless we resort the lines in the index 
                 * files, we won't be able to do a proper binary search over the data. */
                foreach (string indexPath in indexPaths)
                {
                    // create temporary file for sorted lines
                    string tempPath = Path.GetTempFileName();
                    StreamWriter tempFile = new StreamWriter(tempPath);

                    // get number of words (lines) in file
                    int numWords = 0;
                    StreamReader indexFile = new StreamReader(indexPath);
                    string line;
                    while (indexFile.TryReadLine(out line))
                        if (!line.StartsWith(" "))
                            ++numWords;

                    // get lines in file, sorted by first column (i.e., the word)
                    Dictionary<string, string> wordLine = new Dictionary<string, string>(numWords);
                    indexFile = new StreamReader(indexPath);
                    while (indexFile.TryReadLine(out line))
                        // write header lines to temp file immediately
                        if (line.StartsWith(" "))
                            tempFile.WriteLine(line);
                        else
                        {
                            // trim useless blank spaces from line and map line to first column
                            line = line.Trim();
                            wordLine.Add(line.Substring(0, line.IndexOf(' ')), line);
                        }

                    // get sorted words
                    List<string> sortedWords = new List<string>(wordLine.Count);
                    sortedWords.AddRange(wordLine.Keys);
                    sortedWords.Sort();

                    // write lines sorted by word
                    foreach (string word in sortedWords)
                        tempFile.WriteLine(wordLine[word]);

                    tempFile.Close();

                    // replace original index file with properly sorted one
                    System.IO.File.Delete(indexPath);
                    System.IO.File.Move(tempPath, indexPath);
                }

                // create flag file, indicating that we've sorted the data
                StreamWriter sortFlagFile = new StreamWriter(sortFlagPath);
                sortFlagFile.WriteLine("This file serves no purpose other than to indicate that the WordNet distribution data in the current directory has been sorted for use by the .NET API.");
                sortFlagFile.Close();
            }
            #endregion

            #region engine init
            if (inMemory)
            {
                // pass 1:  get total number of synsets
                int totalSynsets = 0;
                foreach (string dataPath in dataPaths)
                {
                    // scan synset data file for lines that don't start with a space...these are synset definition lines
                    StreamReader dataFile = new StreamReader(dataPath);
                    string line;
                    while (dataFile.TryReadLine(out line))
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                            ++totalSynsets;
                    }
                }

                // pass 2:  create synset shells (pos and offset only)
                _idSynset = new Dictionary<string, SynSet>(totalSynsets);
                foreach (string dataPath in dataPaths)
                {
                    POS pos = GetFilePOS(dataPath);

                    // scan synset data file
                    StreamReader dataFile = new StreamReader(dataPath);
                    string line;
                    while (dataFile.TryReadLine(out line))
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            // get offset and create synset shell
                            int offset = int.Parse(line.Substring(0, firstSpace));
                            SynSet synset = new SynSet(pos, offset, null);

                            _idSynset.Add(synset.ID, synset);
                        }
                    }
                }

                // pass 3:  instantiate synsets (hooks up relations, set glosses, etc.)
                foreach (string dataPath in dataPaths)
                {
                    POS pos = GetFilePOS(dataPath);

                    // scan synset data file
                    StreamReader dataFile = new StreamReader(dataPath);
                    string line;
                    while (dataFile.TryReadLine(out line))
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                            // instantiate synset defined on current line, using the instantiated synsets for all references
                            _idSynset[pos + ":" + int.Parse(line.Substring(0, firstSpace))].Instantiate(line, _idSynset);
                    }
                }

                // organize synsets by pos and words...also set most common synset for word-pos pairs that have multiple synsets
                _posWordSynSets = new Dictionary<POS, Dictionary<string, Set<SynSet>>>();
                foreach (string indexPath in indexPaths)
                {
                    POS pos = GetFilePOS(indexPath);

                    _posWordSynSets.EnsureContainsKey(pos, typeof(Dictionary<string, Set<SynSet>>));

                    // scan word index file, skipping header lines
                    StreamReader indexFile = new StreamReader(indexPath);
                    string line;
                    while (indexFile.TryReadLine(out line))
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            // grab word and synset shells, along with the most common synset
                            string word = line.Substring(0, firstSpace);
                            SynSet mostCommonSynSet;
                            Set<SynSet> synsets = GetSynSetShells(line, pos, out mostCommonSynSet, null);

                            // set flag on most common synset if it's ambiguous
                            if (synsets.Count > 1)
                                _idSynset[mostCommonSynSet.ID].SetAsMostCommonSynsetFor(word);

                            // use reference to the synsets that we instantiated in our three-pass routine above
                            _posWordSynSets[pos].Add(word, new Set<SynSet>(synsets.Count));
                            foreach (SynSet synset in synsets)
                                _posWordSynSets[pos][word].Add(_idSynset[synset.ID]);
                        }
                    }
                }
            }
            else
            {
                // open binary search streams for index files
                _posIndexWordSearchStream = new Dictionary<POS, BinarySearchTextStream>();
                foreach (string indexPath in indexPaths)
                {
                    // create binary search stream for index file
                    BinarySearchTextStream searchStream = new BinarySearchTextStream(indexPath, new BinarySearchTextStream.SearchComparisonDelegate(delegate(object searchWord, string currentLine)
                        {
                            // if we landed on the header text, search further down
                            if (currentLine[0] == ' ')
                                return 1;

                            // get word on current line
                            string currentWord = currentLine.Substring(0, currentLine.IndexOf(' '));

                            // compare searched-for word to the current word
                            return ((string)searchWord).CompareTo(currentWord);
                        }));

                    // add search stream for current POS
                    _posIndexWordSearchStream.Add(GetFilePOS(indexPath), searchStream);
                }

                // open readers for synset data files
                _posSynSetDataFile = new Dictionary<POS, StreamReader>();
                foreach (string dataPath in dataPaths)
                    _posSynSetDataFile.Add(GetFilePOS(dataPath), new StreamReader(dataPath));
            }
            #endregion
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="WordNetMemoryProvider"/> class.
        /// </summary>
        /// <param name="dataPath">The data path.</param>
        /// <exception cref="System.ArgumentNullException">dataPath</exception>
        /// <exception cref="System.IO.DirectoryNotFoundException">The data directory does not exist.</exception>
        /// <exception cref="System.IO.FileNotFoundException">A required WordNet file does not exist: [filename]</exception>
        public WordNetMemoryProvider(string dataPath) {
            if (string.IsNullOrEmpty(dataPath))
                throw new ArgumentNullException("dataPath");

            var dir = new DirectoryInfo(dataPath);

            if (!dir.Exists)
                throw new DirectoryNotFoundException("The data directory does not exist.");


            var dataPaths = new [] {
                new FileInfo(Path.Combine(dataPath, "data.adj")),
                new FileInfo(Path.Combine(dataPath, "data.adv")),
                new FileInfo(Path.Combine(dataPath, "data.noun")),
                new FileInfo(Path.Combine(dataPath, "data.verb"))
            };

            var indexPaths = new [] {
                new FileInfo(Path.Combine(dataPath, "index.adj")),
                new FileInfo(Path.Combine(dataPath, "index.adv")),
                new FileInfo(Path.Combine(dataPath, "index.noun")),
                new FileInfo(Path.Combine(dataPath, "index.verb"))
            };

            foreach (var file in dataPaths.Union(indexPaths).Where(file => !file.Exists))
                throw new FileNotFoundException("A required WordNet file does not exist: " + file.Name);

            // Pass 1: Get total number of synsets
            var totalSynsets = 0;
            foreach (var dataInfo in dataPaths) {
                // scan synset data file for lines that don't start with a space... 
                // these are synset definition lines
                using (var dataFile = new StreamReader(dataInfo.FullName)) {
                    string line;
                    while ((line = dataFile.ReadLine()) != null) {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                            ++totalSynsets;
                    }
                }

            }

            // Pass 2: Create synset shells (pos and offset only)
            idSynset = new Dictionary<string, SynSet>(totalSynsets);
            foreach (var dataInfo in dataPaths) {
                var pos = WordNetFileProvider.GetFilePos(dataInfo.FullName);

                // scan synset data file
                using (var dataFile = new StreamReader(dataInfo.FullName)) {
                    string line;
                    while ((line = dataFile.ReadLine()) != null) {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace <= 0) 
                            continue;

                        // get offset and create synset shell
                        var offset = int.Parse(line.Substring(0, firstSpace));
                        var synset = new SynSet(pos, offset, null);

                        idSynset.Add(synset.Id, synset);
                    }
                }

            }

            // Pass 3: Instantiate synsets (hooks up relations, set glosses, etc.)
            foreach (var dataInfo in dataPaths) {
                var pos = WordNetFileProvider.GetFilePos(dataInfo.FullName);

                // scan synset data file
                using (var dataFile = new StreamReader(dataInfo.FullName)) {
                    string line;
                    while ((line = dataFile.ReadLine()) != null) {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                            // instantiate synset defined on current line, using the instantiated synsets for all references
                            idSynset[pos + ":" + int.Parse(line.Substring(0, firstSpace))].Instantiate(line, idSynset);
                    }
                }

            }

            // organize synsets by pos and words... 
            // also set most common synset for word-pos pairs that have multiple synsets

            posWordSynSets = new Dictionary<WordNetPos, Dictionary<string, List<SynSet>>>();

            foreach (var indexInfo in indexPaths) {
                var pos = WordNetFileProvider.GetFilePos(indexInfo.FullName);

                posWordSynSets.EnsureContainsKey(pos, typeof(Dictionary<string, List<SynSet>>));

                // scan word index file, skipping header lines
                using (var indexFile = new StreamReader(indexInfo.FullName)) {
                    string line;
                    while ((line = indexFile.ReadLine()) != null) {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace <= 0)
                            continue;

                        // grab word and synset shells, along with the most common synset
                        var word = line.Substring(0, firstSpace);
                        SynSet mostCommonSynSet;
                        var synsets = WordNetFileProvider.GetSynSetShells(line, pos, out mostCommonSynSet, wordNet);

                        // set flag on most common synset if it's ambiguous
                        if (synsets.Count > 1)
                            idSynset[mostCommonSynSet.Id].SetAsMostCommonSynsetFor(word);

                        // use reference to the synsets that we instantiated in our three-pass routine above
                        posWordSynSets[pos].Add(word, new List<SynSet>(synsets.Count));
                        foreach (var synset in synsets)
                            posWordSynSets[pos][word].Add(idSynset[synset.Id]);
                    }
                }
            }
        }
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="dataDirectory">Path to the SemLink data directory</param>
        public SemLinkEngine(string dataDirectory)
        {
            _dataDirectory = dataDirectory;

            #region propbank-verbnet
            // check for file
            if (!File.Exists(PropBankVerbNetLinkingPath))
            {
                throw new FileNotFoundException("Failed to find PropBank-VerbNet mapping file:  " + PropBankVerbNetLinkingPath);
            }

            // read each predicate mapping
            _propBankRoleVerbNetRoles = new Dictionary <string, Set <string> >();
            XmlParser propBankVerbNetP = new XmlParser(File.ReadAllText(PropBankVerbNetLinkingPath));
            string    predicateXML;
            while ((predicateXML = propBankVerbNetP.OuterXML("predicate")) != null)
            {
                XmlParser predicateP = new XmlParser(predicateXML);

                string pbVerb = predicateP.AttributeValue("predicate", "lemma").Trim();
                if (pbVerb == "")
                {
                    throw new Exception("Blank PropBank verb");
                }

                // get argument mappings
                string argMapXML;
                while ((argMapXML = predicateP.OuterXML("argmap")) != null)
                {
                    XmlParser argMapP = new XmlParser(argMapXML);

                    // get role set for current argument mapping
                    string pbRoleSetStr = argMapP.AttributeValue("argmap", "pb-roleset");
                    int    pbRoleSet    = int.Parse(pbRoleSetStr.Substring(pbRoleSetStr.IndexOf('.') + 1));
                    if (pbRoleSet <= 0)
                    {
                        throw new Exception("Invalid PropBank role set:  " + pbRoleSet);
                    }

                    // get verbnet class, using periods instead of dashes
                    string vnClass = argMapP.AttributeValue("argmap", "vn-class").Trim().Replace("-", ".");
                    if (vnClass == "")
                    {
                        throw new Exception("Blank VerbNet class");
                    }

                    // read argument mapping
                    string roleXML;
                    while ((roleXML = argMapP.OuterXML("role")) != null)
                    {
                        XmlParser roleP = new XmlParser(roleXML);

                        // get fully-specified propbank role
                        string pbArgStr = roleP.AttributeValue("role", "pb-arg");
                        if (pbArgStr == "M" || pbArgStr == "A")
                        {
                            continue;
                        }

                        int    pbArg      = int.Parse(pbArgStr);
                        string fullPbRole = pbVerb + "." + pbRoleSet + "." + pbArg;

                        // get fully-specified verbnet role
                        string vnRole = roleP.AttributeValue("role", "vn-theta").Trim();
                        if (vnRole == "")
                        {
                            throw new Exception("Blank VerbNet role");
                        }

                        string fullVnRole = vnClass + "." + vnRole;

                        // create entry
                        _propBankRoleVerbNetRoles.EnsureContainsKey(fullPbRole, typeof(Set <string>));
                        _propBankRoleVerbNetRoles[fullPbRole].Add(fullVnRole);
                    }
                }
            }

            // map verbnet to propbank
            _verbNetRolePropBankRoles = new Dictionary <string, Set <string> >();
            foreach (string propBankRole in _propBankRoleVerbNetRoles.Keys)
            {
                foreach (string verbNetRole in _propBankRoleVerbNetRoles[propBankRole])
                {
                    _verbNetRolePropBankRoles.EnsureContainsKey(verbNetRole, typeof(Set <string>));
                    _verbNetRolePropBankRoles[verbNetRole].Add(propBankRole);
                }
            }
            #endregion

            #region verbnet-framenet
            // check for file
            if (!File.Exists(FrameNetVerbNetLinkingPath))
            {
                throw new FileNotFoundException("Failed to find FrameNet-VerbNet mapping file:  " + FrameNetVerbNetLinkingPath);
            }

            // read each mapping
            _verbNetRoleFrameElements = new Dictionary <string, Set <string> >();
            XmlParser verbNetFrameNetP = new XmlParser(File.ReadAllText(FrameNetVerbNetLinkingPath));
            string    vnClassXML;
            while ((vnClassXML = verbNetFrameNetP.OuterXML("vncls")) != null)
            {
                XmlParser vnClassP = new XmlParser(vnClassXML);

                // get verbnet class and framenet frame
                string vnClass = vnClassP.AttributeValue("vncls", "class").Trim().Replace("-", ".");
                string frame   = vnClassP.AttributeValue("vncls", "fnframe").Trim().ToLower();

                // get each role mapping
                string roleXML;
                while ((roleXML = vnClassP.OuterXML("role")) != null)
                {
                    // get fe and vn role
                    XmlParser roleP  = new XmlParser(roleXML);
                    string    fe     = frame + "." + roleP.AttributeValue("role", "fnrole").Trim().ToLower();
                    string    vnRole = vnClass + "." + roleP.AttributeValue("role", "vnrole").Trim();

                    // add to list of FEs for vn role
                    _verbNetRoleFrameElements.EnsureContainsKey(vnRole, typeof(Set <string>), false);
                    _verbNetRoleFrameElements[vnRole].Add(fe);
                }
            }

            // map frame elements to verbnet roles
            _frameElementVerbNetRoles = new Dictionary <string, Set <string> >();
            foreach (string verbNetRole in _verbNetRoleFrameElements.Keys)
            {
                foreach (string frameElement in _verbNetRoleFrameElements[verbNetRole])
                {
                    _frameElementVerbNetRoles.EnsureContainsKey(frameElement, typeof(Set <string>));
                    _frameElementVerbNetRoles[frameElement].Add(verbNetRole);
                }
            }
            #endregion
        }
        /// <summary>
        /// Loads the propositions file
        /// </summary>
        /// <param name="propsPath">Path to prop.txt file</param>
        private void LoadProps(string propsPath)
        {
            if (!File.Exists(propsPath))
            {
                throw new Exception("Invalid PropBank propositions file:  \"" + propsPath + "\"");
            }

            // reuse existing index files if present
            if (File.Exists(VerbInfoPath) &&
                File.Exists(VerbInfoFilePositionPath) &&
                File.Exists(MrgSentenceInfoPath) &&
                File.Exists(MrgSentenceInfoFilePositionsPath))
            {
                // load verb info positions
                _verbInfoFilePosition = new Dictionary <string, long>();
                StreamReader positionsFile = new StreamReader(VerbInfoFilePositionPath);
                string       line;
                while ((line = positionsFile.ReadLine()) != null)
                {
                    // format:  position verb
                    int spaceLoc = line.IndexOf(' ');
                    _verbInfoFilePosition.Add(line.Substring(spaceLoc + 1), long.Parse(line.Substring(0, spaceLoc)));
                }
                positionsFile.Close();

                // load mrg-sentence verb info positions
                _mrgSentInfoFilePosition = new Dictionary <string, Dictionary <int, long> >();
                positionsFile            = new StreamReader(MrgSentenceInfoFilePositionsPath);
                while ((line = positionsFile.ReadLine()) != null)
                {
                    // each line lists a MRG file then a series of pipe-delimited sentence-position pairs...get MRG file first
                    int    pipeLoc = line.IndexOf('|');
                    string mrgFile = line.Substring(0, pipeLoc);
                    _mrgSentInfoFilePosition.Add(mrgFile, new Dictionary <int, long>());

                    // get sentence-position pairs
                    while (pipeLoc != line.Length - 1)
                    {
                        // find next pipe and space
                        int nextPipe = line.IndexOf('|', pipeLoc + 1);
                        int spaceLoc = line.IndexOf(' ', pipeLoc + 1);

                        // get sentence and position
                        int  sent     = int.Parse(line.Substring(pipeLoc + 1, spaceLoc - pipeLoc - 1));
                        long position = long.Parse(line.Substring(spaceLoc + 1, nextPipe - spaceLoc - 1));

                        // add to index
                        _mrgSentInfoFilePosition[mrgFile].Add(sent, position);

                        pipeLoc = nextPipe;
                    }
                }
                positionsFile.Close();

                return;
            }

            // maps each verb to a list of VerbInfo objects, each of which stores an annotation instance
            Dictionary <string, List <VerbInfo> > verbInfo = new Dictionary <string, List <VerbInfo> >();

            // maps each mrg file and sentence number to a list of VerbInfo objects for that sentence
            Dictionary <string, Dictionary <int, List <VerbInfo> > > mrgSentInfo = new Dictionary <string, Dictionary <int, List <VerbInfo> > >();

            // read propositions file
            StreamReader propFile = new StreamReader(propsPath);
            string       propLine;

            while ((propLine = propFile.ReadLine()) != null)
            {
                VerbInfo vi = new VerbInfo(propLine);

                // add to mapping from verbs to their information
                verbInfo.EnsureContainsKey(vi.Verb, typeof(List <VerbInfo>));
                verbInfo[vi.Verb].Add(vi);

                // add to mapping from file-sentence pairs to their information
                mrgSentInfo.EnsureContainsKey(vi.File, typeof(Dictionary <int, List <VerbInfo> >));
                mrgSentInfo[vi.File].EnsureContainsKey(vi.SentenceNumber, typeof(List <VerbInfo>));
                mrgSentInfo[vi.File][vi.SentenceNumber].Add(vi);
            }

            propFile.Close();

            // write verb index to disk and record file positions of verb info lists
            _verbInfoFilePosition = new Dictionary <string, long>();
            FileStream saveStream = new FileStream(VerbInfoPath, FileMode.Create);

            foreach (string verb in verbInfo.Keys)
            {
                // save position of VerbInfo list
                _verbInfoFilePosition.Add(verb, saveStream.Position);

                WriteVerbInfoList(verbInfo[verb], saveStream);
            }
            saveStream.Close();

            // save file positions
            StreamWriter verbInfoPositionFile = new StreamWriter(VerbInfoFilePositionPath);

            foreach (string verb in _verbInfoFilePosition.Keys)
            {
                verbInfoPositionFile.WriteLine(_verbInfoFilePosition[verb] + " " + verb);
            }
            verbInfoPositionFile.Close();

            // save mrg-sentence info
            _mrgSentInfoFilePosition = new Dictionary <string, Dictionary <int, long> >();
            saveStream = new FileStream(MrgSentenceInfoPath, FileMode.Create);
            foreach (string mrgFile in mrgSentInfo.Keys)
            {
                // add each sentence, recording position
                Dictionary <int, long> sentInfoPosition = new Dictionary <int, long>();
                foreach (int sentNum in mrgSentInfo[mrgFile].Keys)
                {
                    // add index of sentence to file position
                    sentInfoPosition.Add(sentNum, saveStream.Position);

                    // write VerbInfo list for sentence
                    WriteVerbInfoList(mrgSentInfo[mrgFile][sentNum], saveStream);
                }

                _mrgSentInfoFilePosition.Add(mrgFile, sentInfoPosition);
            }
            saveStream.Close();

            // save file positions for MRG file index
            StreamWriter mrgSentInfoPositionsFile = new StreamWriter(MrgSentenceInfoFilePositionsPath);

            foreach (string mrgFile in _mrgSentInfoFilePosition.Keys)
            {
                mrgSentInfoPositionsFile.Write(mrgFile);
                foreach (int sent in _mrgSentInfoFilePosition[mrgFile].Keys)
                {
                    mrgSentInfoPositionsFile.Write("|" + sent + " " + _mrgSentInfoFilePosition[mrgFile][sent]);
                }

                mrgSentInfoPositionsFile.WriteLine("|");
            }
            mrgSentInfoPositionsFile.Close();
        }
        private Dictionary<int, float> GetPerClassWeights(StreamReader trainingInstancesReader)
        {
            Dictionary<int, int> classCount = new Dictionary<int, int>();
            string line;
            while (trainingInstancesReader.TryReadLine(out line))
            {
                int firstSpace = line.IndexOf(' ');
                if (firstSpace == -1)
                    firstSpace = line.Length;

                int classNum = int.Parse(line.Substring(0, firstSpace));
                classCount.EnsureContainsKey(classNum, typeof(int));
                classCount[classNum]++;
            }

            Dictionary<int, float> classWeight = new Dictionary<int, float>();
            int total = classCount.Values.Sum();
            foreach (int classNum in classCount.Keys)
                if (_libLinear.GetUnmappedLabel(classNum.ToString()) != PointPrediction.NullLabel)
                    classWeight.Add(classNum, (total - classCount[classNum]) / (float)classCount[classNum]);

            return classWeight;
        }
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="path">Path to the NomLex dictionary</param>
        public NomLexEngine(string path)
        {
            if (!File.Exists(path))
            {
                throw new FileNotFoundException("Invalid NomLex file:  \"" + path + "\"");
            }

            string nomLex = File.ReadAllText(path);

            // get number of entities
            int numEntries = 0;
            int entryStart = 0;

            while (entryStart >= 0 && entryStart < nomLex.Length)
            {
                // should be on the open paren
                if (nomLex[entryStart] != '(')
                {
                    throw new Exception("Invalid entry");
                }

                // get text for entry
                int entryEnd = IndexOfBalancingParen(nomLex, entryStart);

                // start at next entry
                entryStart = nomLex.IndexOf('(', entryEnd + 1);

                ++numEntries;
            }

            // extract entries
            _nounEntries = new Dictionary <string, List <NomLexEntry> >(numEntries);
            _classes     = new Set <string>(false);
            entryStart   = 0;
            while (entryStart >= 0 && entryStart < nomLex.Length)
            {
                // should be on the open paren
                if (nomLex[entryStart] != '(')
                {
                    throw new Exception("Invalid entry");
                }

                // get text for entry
                int    entryEnd  = IndexOfBalancingParen(nomLex, entryStart);
                string entryText = nomLex.Substring(entryStart, entryEnd - entryStart + 1);

                // extract entry
                NomLexEntry entry = ExtractEntry(entryText);

                // get noun from entry
                string noun = entry.Features["orth"].ToString();

                // add entry to list
                _nounEntries.EnsureContainsKey(noun, typeof(List <NomLexEntry>));
                _nounEntries[noun].Add(entry);

                // add to class index
                _classes.Add(entry.Name);

                // start at next entry
                entryStart = nomLex.IndexOf('(', entryEnd + 1);
            }
        }
        public static Dictionary<long, Dictionary<string, List<double>>> GetSliceLocationThreats(Prediction prediction)
        {
            Dictionary<long, Dictionary<string, List<double>>> sliceLocationThreats = new Dictionary<long, Dictionary<string, List<double>>>();

            DiscreteChoiceModel model = prediction.Model;
            long sliceTicks = -1;
            if (model is TimeSliceDCM)
                sliceTicks = (model as TimeSliceDCM).TimeSliceTicks;

            Dictionary<int, Point> idPoint = new Dictionary<int, Point>();
            foreach (Point point in prediction.Points)
                idPoint.Add(point.Id, point);

            foreach (PointPrediction pointPrediction in prediction.PointPredictions)
            {
                long slice = 1;
                if (sliceTicks > 0)
                    slice = pointPrediction.Time.Ticks / sliceTicks;

                PostGIS.Point point = idPoint[pointPrediction.PointId].Location;
                int row = (int)((point.Y - prediction.PredictionArea.BoundingBox.MinY) / prediction.PredictionPointSpacing);
                int col = (int)((point.X - prediction.PredictionArea.BoundingBox.MinX) / prediction.PredictionPointSpacing);
                string location = row + "-" + col;

                sliceLocationThreats.EnsureContainsKey(slice, typeof(Dictionary<string, List<double>>));
                sliceLocationThreats[slice].EnsureContainsKey(location, typeof(List<double>));
                sliceLocationThreats[slice][location].Add(pointPrediction.TotalThreat);
            }

            return sliceLocationThreats;
        }
        /// <summary>
        /// Gets spans for a set of argument nodes, indexed by sentence
        /// </summary>
        /// <param name="argNodes">Argument nodes</param>
        /// <returns>Contiguous spans, indexed by sentence</returns>
        private Dictionary <int, List <Span> > GetSpans(List <TreeBankNode> argNodes)
        {
            // make sure all discourse annotation nodes come from the same source document
            List <TreeBankNode> allNodes = new List <TreeBankNode>();

            foreach (TreeBankNode node in argNodes)
            {
                if (node.MrgFile != argNodes[0].MrgFile)
                {
                    throw new Exception("MRG file mismatch");
                }
                else
                {
                    allNodes.Add(node);
                }
            }

            // remove any null nodes and sort the result by node position
            for (int i = 0; i < allNodes.Count;)
            {
                if (allNodes[i].IsNullElement)
                {
                    allNodes.RemoveAt(i);
                }
                else
                {
                    ++i;
                }
            }

            if (allNodes.Count == 0)
            {
                throw new Exception("Invalid node list");
            }

            // group nodes by sentence
            Dictionary <int, List <TreeBankNode> > sentenceNodes = new Dictionary <int, List <TreeBankNode> >();

            foreach (TreeBankNode node in allNodes)
            {
                sentenceNodes.EnsureContainsKey(node.SentenceNumber, typeof(List <TreeBankNode>));
                sentenceNodes[node.SentenceNumber].Add(node);
            }

            // create spans for each sentence
            Dictionary <int, List <Span> > sentenceSpans = new Dictionary <int, List <Span> >();

            foreach (int sentNum in sentenceNodes.Keys)
            {
                // create span for each set of contiguous nodes
                List <Span> spans = new List <Span>();
                foreach (List <TreeBankNode> nodes in TreeBankNode.GetContiguousNodes(sentenceNodes[sentNum]))
                {
                    spans.Add(new Span(nodes[0].FirstToken.TokenNumber, nodes[nodes.Count - 1].LastToken.TokenNumber));
                }

                sentenceSpans.Add(sentNum, spans);
            }

            return(sentenceSpans);
        }
Exemple #33
0
        /// <summary>
        /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from
        /// idSynset; otherwise, related synsets are created as shells.
        /// </summary>
        /// <param name="definition">Definition line of synset from data file</param>
        /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param>
        internal void Instantiate(string definition, Dictionary <string, SynSet> idSynset)
        {
            // don't re-instantiate
            if (Instantiated)
            {
                throw new Exception("Synset has already been instantiated");
            }

            /* get lexicographer file name...the enumeration lines up precisely with the wordnet spec (see the lexnames file) except that
             * it starts with None, so we need to add 1 to the definition line's value to get the correct file name */
            var lexicographerFileNumber = int.Parse(GetField(definition, 1)) + 1;

            if (lexicographerFileNumber <= 0)
            {
                throw new Exception("Invalid lexicographer file name number. Should be >= 1.");
            }

            LexicographerFileName = (LexicographerFileName)lexicographerFileNumber;

            // get number of words in the synset and the start character of the word list
            int wordStart;
            var numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber);

            wordStart = definition.IndexOf(' ', wordStart) + 1;

            // get words in synset
            Words = new List <string>(numWords);
            for (var i = 0; i < numWords; ++i)
            {
                var wordEnd = definition.IndexOf(' ', wordStart + 1) - 1;
                var wordLen = wordEnd - wordStart + 1;
                var word    = definition.Substring(wordStart, wordLen);
                if (word.Contains(' '))
                {
                    throw new Exception("Unexpected space in word:  " + word);
                }

                Words.Add(word);

                // skip lex_id field
                wordStart = definition.IndexOf(' ', wordEnd + 2) + 1;
            }

            // get gloss
            Gloss = definition.Substring(definition.IndexOf('|') + 1).Trim();
            if (Gloss.Contains('|'))
            {
                throw new Exception("Unexpected pipe in gloss");
            }

            // get number and start of relations
            var relationCountField = 3 + (Words.Count * 2) + 1;
            int relationFieldStart;
            var numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart));

            relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1;

            // grab each related synset
            relationSynSets  = new Dictionary <SynSetRelation, List <SynSet> >();
            lexicalRelations = new Dictionary <SynSetRelation, Dictionary <SynSet, Dictionary <int, List <int> > > >();
            for (var relationNum = 0; relationNum < numRelations; ++relationNum)
            {
                string relationSymbol      = null;
                var    relatedSynSetOffset = -1;
                var    relatedSynSetPOS    = WordNetPos.None;
                var    sourceWordIndex     = -1;
                var    targetWordIndex     = -1;

                // each relation has four columns
                for (var relationField = 0; relationField <= 3; ++relationField)
                {
                    var fieldEnd   = definition.IndexOf(' ', relationFieldStart + 1) - 1;
                    var fieldLen   = fieldEnd - relationFieldStart + 1;
                    var fieldValue = definition.Substring(relationFieldStart, fieldLen);

                    // relation symbol
                    if (relationField == 0)
                    {
                        relationSymbol = fieldValue;
                    }
                    // related synset offset
                    else if (relationField == 1)
                    {
                        relatedSynSetOffset = int.Parse(fieldValue);
                    }
                    // related synset POS
                    else if (relationField == 2)
                    {
                        relatedSynSetPOS = GetPos(fieldValue);
                    }
                    // source/target word for lexical relation
                    else if (relationField == 3)
                    {
                        sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber);
                        targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber);
                    }
                    else
                    {
                        throw new Exception();
                    }

                    relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1;
                }

                // get related synset...create shell if we don't have a lookup
                var relatedSynSet = idSynset != null
                    ? idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset]
                    : new SynSet(relatedSynSetPOS, relatedSynSetOffset, wordNet);

                // get relation
                var relation = WordNet.GetSynSetRelation(Pos, relationSymbol);

                // add semantic relation if we have neither a source nor a target word index
                if (sourceWordIndex == 0 && targetWordIndex == 0)
                {
                    relationSynSets.EnsureContainsKey(relation, typeof(List <SynSet>));
                    relationSynSets[relation].Add(relatedSynSet);
                }
                // add lexical relation
                else
                {
                    lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary <SynSet, Dictionary <int, List <int> > >));
                    lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary <int, List <int> >));
                    lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(List <int>));

                    if (!lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex))
                    {
                        lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex);
                    }
                }
            }

            Instantiated = true;
        }
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="verbNetDirectory">Path to the VerbNet directory</param>
        public VerbNetEngine(string verbNetDirectory)
        {
            // check directory
            if (!Directory.Exists(verbNetDirectory))
            {
                throw new Exception("Invalid VerbNet directory");
            }

            // extract all verb classes
            _idVerbClass = new Dictionary <string, VerbClass>();
            foreach (string classPath in Directory.GetFiles(verbNetDirectory, "*.xml"))
            {
                ExtractClass(File.ReadAllText(classPath), false);
            }

            // add root class
            _rootVerbClass = new VerbClass("0");
            _idVerbClass.Add(_rootVerbClass.ID, _rootVerbClass);

            // assemble hierarchy
            while (true)
            {
                // create empty "connector" classes for verbnet classes whose direct parent is not defined in verbnet
                Dictionary <string, VerbClass> newIdVerbClass = new Dictionary <string, VerbClass>();

                // check all non-root classes that don't have a parent
                foreach (string id in _idVerbClass.Keys)
                {
                    if (id != _rootVerbClass.ID && _idVerbClass[id].Parent == null)
                    {
                        VerbClass currentClass = _idVerbClass[id];

                        // get id of parent by removing the final location
                        string parentID = id.Substring(0, id.LastIndexOf('.'));

                        // try to get parent class
                        VerbClass parentClass;
                        if (!_idVerbClass.TryGetValue(parentID, out parentClass))
                        {
                            // create new connector class, reusing existing connector class if we have one
                            VerbClass connectorClass;
                            if (!newIdVerbClass.TryGetValue(parentID, out connectorClass))
                            {
                                connectorClass = new VerbClass(parentID);
                                newIdVerbClass.Add(connectorClass.ID, connectorClass);
                            }

                            parentClass = connectorClass;
                        }

                        // add current class as sub-class of parent
                        parentClass.AddChild(currentClass);
                    }
                }

                // add all newly created connector classes
                foreach (string id in newIdVerbClass.Keys)
                {
                    _idVerbClass.Add(id, newIdVerbClass[id]);
                }

                // if we didn't add any connector classes, each class (except the root) has a parent defined in _idVerbClass - hierarchy is complete
                if (newIdVerbClass.Count == 0)
                {
                    break;
                }
            }

            // map each verb to its classes
            _verbVerbClasses = new Dictionary <string, Set <VerbClass> >();
            foreach (VerbClass verbClass in _rootVerbClass.GetChildren(true))
            {
                foreach (string verb in verbClass.GetVerbs(false))
                {
                    _verbVerbClasses.EnsureContainsKey(verb, typeof(Set <VerbClass>));
                    _verbVerbClasses[verb].Add(verbClass);
                }
            }

            // make sure all verb classes except for the root has a parent (i.e., make sure we have a rooted tree)
            foreach (VerbClass vnClass in _idVerbClass.Values)
            {
                if (vnClass.ID != "0" && vnClass.Parent == null)
                {
                    throw new Exception("Invalid VerbNet tree structure!");
                }
            }
        }
Exemple #35
0
        protected override void Run(Prediction prediction)
        {
            List <PostGIS.Point> predictionPoints = new List <PostGIS.Point>();
            Area   predictionArea = prediction.PredictionArea;
            double areaMinX       = predictionArea.BoundingBox.MinX;
            double areaMaxX       = predictionArea.BoundingBox.MaxX;
            double areaMinY       = predictionArea.BoundingBox.MinY;
            double areaMaxY       = predictionArea.BoundingBox.MaxY;

            for (double x = areaMinX + prediction.PredictionPointSpacing / 2d; x <= areaMaxX; x += prediction.PredictionPointSpacing)  // place points in the middle of the square boxes that cover the region - we get display errors from pixel rounding if the points are exactly on the boundaries
            {
                for (double y = areaMinY + prediction.PredictionPointSpacing / 2d; y <= areaMaxY; y += prediction.PredictionPointSpacing)
                {
                    predictionPoints.Add(new PostGIS.Point(x, y, predictionArea.Shapefile.SRID));
                }
            }

            List <PostGIS.Point> incidentPoints = new List <PostGIS.Point>(Incident.Get(TrainingStart, TrainingEnd, predictionArea, IncidentTypes.ToArray()).Select(i => i.Location));

            predictionPoints.AddRange(incidentPoints);

            Console.Out.WriteLine("Filtering prediction points to prediction area");
            predictionPoints = predictionArea.Intersects(predictionPoints, prediction.PredictionPointSpacing / 2f).Select(i => predictionPoints[i]).ToList();

            NpgsqlConnection connection = DB.Connection.OpenConnection;

            try
            {
                Console.Out.WriteLine("Inserting points into prediction");
                Point.CreateTable(prediction, predictionArea.Shapefile.SRID);
                List <int> predictionPointIds = Point.Insert(connection, predictionPoints.Select(p => new Tuple <PostGIS.Point, string, DateTime>(p, PointPrediction.NullLabel, DateTime.MinValue)), prediction, predictionArea, false);

                Console.Out.WriteLine("Running overall KDE for " + IncidentTypes.Count + " incident type(s)");
                List <float>            density = GetDensityEstimate(incidentPoints, _trainingSampleSize, false, 0, 0, predictionPoints, _normalize);
                Dictionary <int, float> pointIdOverallDensity = new Dictionary <int, float>(predictionPointIds.Count);
                int pointNum = 0;
                foreach (int predictionPointId in predictionPointIds)
                {
                    pointIdOverallDensity.Add(predictionPointId, density[pointNum++]);
                }

                Dictionary <int, Dictionary <string, double> > pointIdIncidentDensity = new Dictionary <int, Dictionary <string, double> >(pointIdOverallDensity.Count);
                if (IncidentTypes.Count == 1)
                {
                    string incident = IncidentTypes.First();
                    foreach (int pointId in pointIdOverallDensity.Keys)
                    {
                        Dictionary <string, double> incidentDensity = new Dictionary <string, double>();
                        incidentDensity.Add(incident, pointIdOverallDensity[pointId]);
                        pointIdIncidentDensity.Add(pointId, incidentDensity);
                    }
                }
                else
                {
                    foreach (string incidentType in IncidentTypes)
                    {
                        Console.Out.WriteLine("Running KDE for incident \"" + incidentType + "\"");
                        incidentPoints = new List <PostGIS.Point>(Incident.Get(TrainingStart, TrainingEnd, predictionArea, incidentType).Select(i => i.Location));
                        density        = GetDensityEstimate(incidentPoints, _trainingSampleSize, false, 0, 0, predictionPoints, _normalize);
                        if (density.Count > 0)
                        {
                            pointNum = 0;
                            foreach (int predictionPointId in predictionPointIds)
                            {
                                pointIdIncidentDensity.EnsureContainsKey(predictionPointId, typeof(Dictionary <string, double>));
                                pointIdIncidentDensity[predictionPointId].Add(incidentType, density[pointNum++]);
                            }
                        }
                    }
                }

                PointPrediction.CreateTable(prediction);
                PointPrediction.Insert(GetPointPredictionValues(pointIdOverallDensity, pointIdIncidentDensity), prediction, false);

                Smooth(prediction);
            }
            finally
            {
                DB.Connection.Return(connection);
            }
        }
Exemple #36
0
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="wordNetDirectory">Path to WorNet directory (the one with the data and index files in it)</param>
        /// <param name="inMemory">Whether or not to store all data in memory. In-memory storage requires quite a bit of space
        /// but it is also very quick. The alternative (false) will cause the data to be searched on-disk with an efficient
        /// binary search algorithm.</param>
        public WordNetEngine(string wordNetDirectory, bool inMemory)
        {
            _wordNetDirectory         = wordNetDirectory;
            _inMemory                 = inMemory;
            _posIndexWordSearchStream = null;
            _posSynSetDataFile        = null;

            if (!System.IO.Directory.Exists(_wordNetDirectory))
            {
                throw new DirectoryNotFoundException("Error 502");
            }

            // get data and index paths
            string[] dataPaths = new string[]
            {
                Path.Combine(_wordNetDirectory, "data.adj"),
                Path.Combine(_wordNetDirectory, "data.adv"),
                Path.Combine(_wordNetDirectory, "data.noun"),
                Path.Combine(_wordNetDirectory, "data.verb")
            };

            string[] indexPaths = new string[]
            {
                Path.Combine(_wordNetDirectory, "index.adj"),
                Path.Combine(_wordNetDirectory, "index.adv"),
                Path.Combine(_wordNetDirectory, "index.noun"),
                Path.Combine(_wordNetDirectory, "index.verb")
            };

            // make sure all files exist
            foreach (string path in dataPaths.Union(indexPaths))
            {
                if (!System.IO.File.Exists(path))
                {
                    throw new FileNotFoundException("Error 502");
                }
            }

            // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
            // *                                                               *
            // *   UPDATE [HASSAN:11/03/2017]: The lemmatizer requires except- *
            // *   tion dictionary for each POS to be loaded as stream         *
            // *                                                               *
            // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

            LemmaExcptionsFile = new Dictionary <string, StreamReader>(4);
            LemmaExcptionsFile.Add("noun", new StreamReader(wordNetDirectory + "\\noun.exc"));
            LemmaExcptionsFile.Add("verb", new StreamReader(wordNetDirectory + "\\verb.exc"));

            // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
            // *                                                               *
            // *   UPDATE [HASSAN:11/07/2017]: The lemmatizer requires except- *
            // *   tion dictionary for noun only in the context of SemCluster  *
            // *    tool. In order to implement lemmatizer for all 4-POS tags  *
            // *    you will need the following:                               *
            // *    1) Uncomment the following lines.                          *
            // *    2) Uncomment the lines in suffixMap variable.              *
            // *    3) Uncomment the GetSynsets Switch section                 *
            // *    4) Add Exception files for each POS in the data folder     *
            // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

            //LemmaExcptionsFile.Add("adjective", new StreamReader(wordNetDirectory + "\\adj.exc"));
            //LemmaExcptionsFile.Add("adverb", new StreamReader(wordNetDirectory + "\\adv.exc"));

            // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
            // *                                                               *
            // *   UPDATE [HASSAN:28/01/2016]: The #region index file sorting  *
            // *   has been removed here,since its required to run only for    *
            // *   first program execution                                     *
            // *                                                               *
            // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

            #region engine init
            if (inMemory)
            {
                // pass 1:  get total number of synsets
                int totalSynsets = 0;
                foreach (string dataPath in dataPaths)
                {
                    // scan synset data file for lines that don't start with a space...these are synset definition lines
                    StreamReader dataFile = new StreamReader(dataPath);
                    string       line;
                    while (dataFile.TryReadLine(out line))
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            ++totalSynsets;
                        }
                    }
                }

                // pass 2:  create synset shells (pos and offset only)
                _idSynset = new Dictionary <string, SynSet>(totalSynsets);
                foreach (string dataPath in dataPaths)
                {
                    POS pos = GetFilePOS(dataPath);

                    // scan synset data file
                    StreamReader dataFile = new StreamReader(dataPath);
                    string       line;
                    while (dataFile.TryReadLine(out line))
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            // get offset and create synset shell
                            int    offset = int.Parse(line.Substring(0, firstSpace));
                            SynSet synset = new SynSet(pos, offset, null);

                            _idSynset.Add(synset.ID, synset);
                        }
                    }
                }

                // pass 3:  instantiate synsets (hooks up relations, set glosses, etc.)
                foreach (string dataPath in dataPaths)
                {
                    POS pos = GetFilePOS(dataPath);

                    // scan synset data file
                    StreamReader dataFile = new StreamReader(dataPath);
                    string       line;
                    while (dataFile.TryReadLine(out line))
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            // instantiate synset defined on current line, using the instantiated synsets for all references
                            _idSynset[pos + ":" + int.Parse(line.Substring(0, firstSpace))].Instantiate(line, _idSynset);
                        }
                    }
                }

                // organize synsets by pos and words
                _posWordSynSets = new Dictionary <POS, Dictionary <string, List <SynSet> > >();
                foreach (string indexPath in indexPaths)
                {
                    POS pos = GetFilePOS(indexPath);

                    _posWordSynSets.EnsureContainsKey(pos, typeof(Dictionary <string, List <SynSet> >));

                    // scan word index file, skipping header lines
                    StreamReader indexFile = new StreamReader(indexPath);
                    string       line;
                    while (indexFile.TryReadLine(out line))
                    {
                        int firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            // grab word and synset shells
                            string        word    = line.Substring(0, firstSpace);
                            List <SynSet> synsets = GetSynSetShells(line, pos, null);

                            // use reference to the synsets that we instantiated in our three-pass routine above
                            _posWordSynSets[pos].Add(word, new List <SynSet>(synsets.Count));
                            foreach (SynSet synset in synsets)
                            {
                                _posWordSynSets[pos][word].Add(_idSynset[synset.ID]);
                            }
                        }
                    }
                }
            }
            else
            {
                // open binary search streams for index files
                _posIndexWordSearchStream = new Dictionary <POS, BinarySearchTextStream>();
                foreach (string indexPath in indexPaths)
                {
                    // create binary search stream for index file
                    BinarySearchTextStream searchStream = new BinarySearchTextStream(indexPath, new BinarySearchTextStream.SearchComparisonDelegate(

                                                                                         delegate(string searchWord, string currentLine)
                    {
                        // if we landed on the header text, search further down
                        if (currentLine[0] == ' ')
                        {
                            return(1);
                        }

                        // get word on current line
                        string currentWord = currentLine.Substring(0, currentLine.IndexOf(' '));

                        // compare searched-for word to the current word
                        return(((string)searchWord).CompareTo(currentWord));
                    }

                                                                                         ));

                    // add search stream for current POS
                    _posIndexWordSearchStream.Add(GetFilePOS(indexPath), searchStream);
                }
                // open readers for synset data files
                _posSynSetDataFile = new Dictionary <POS, StreamReader>();
                foreach (string dataPath in dataPaths)
                {
                    _posSynSetDataFile.Add(GetFilePOS(dataPath), new StreamReader(dataPath));
                }
            }
            #endregion
        }