private Dictionary <int, float> GetPerClassWeights(StreamReader trainingInstancesReader) { Dictionary <int, int> classCount = new Dictionary <int, int>(); string line; while (trainingInstancesReader.TryReadLine(out line)) { int firstSpace = line.IndexOf(' '); if (firstSpace == -1) { firstSpace = line.Length; } int classNum = int.Parse(line.Substring(0, firstSpace)); classCount.EnsureContainsKey(classNum, typeof(int)); classCount[classNum]++; } Dictionary <int, float> classWeight = new Dictionary <int, float>(); int total = classCount.Values.Sum(); foreach (int classNum in classCount.Keys) { if (_libLinear.GetUnmappedLabel(classNum.ToString()) != PointPrediction.NullLabel) { classWeight.Add(classNum, (total - classCount[classNum]) / (float)classCount[classNum]); } } return(classWeight); }
public static Dictionary <long, Dictionary <string, int> > GetSliceLocationTrueCount(IEnumerable <Incident> incidents, Prediction prediction) { Dictionary <long, Dictionary <string, int> > sliceLocationTrueCount = new Dictionary <long, Dictionary <string, int> >(); DiscreteChoiceModel model = prediction.Model; long sliceTicks = -1; if (model is TimeSliceDCM) { sliceTicks = (model as TimeSliceDCM).TimeSliceTicks; } foreach (Incident incident in incidents) { long slice = 1; if (sliceTicks > 0) { slice = incident.Time.Ticks / sliceTicks; } int row = (int)((incident.Location.Y - prediction.PredictionArea.BoundingBox.MinY) / prediction.PredictionPointSpacing); int col = (int)((incident.Location.X - prediction.PredictionArea.BoundingBox.MinX) / prediction.PredictionPointSpacing); string location = row + "-" + col; sliceLocationTrueCount.EnsureContainsKey(slice, typeof(Dictionary <string, int>)); sliceLocationTrueCount[slice].EnsureContainsKey(location, typeof(int)); sliceLocationTrueCount[slice][location]++; } return(sliceLocationTrueCount); }
/// <summary> /// Gets lexically related words for the current synset. Many of the relations in WordNet are lexical instead of semantic. Whereas /// the latter indicate relations between entire synsets (e.g., hypernym), the former indicate relations between specific /// words in synsets. This method retrieves all lexical relations and the words related thereby. /// </summary> /// <returns>Mapping from relations to mappings from words in the current synset to related words in the related synsets</returns> public Dictionary <SynSetRelation, Dictionary <string, List <string> > > GetLexicallyRelatedWords() { var relatedWords = new Dictionary <SynSetRelation, Dictionary <string, List <string> > >(); foreach (var relation in lexicalRelations.Keys) { relatedWords.EnsureContainsKey(relation, typeof(Dictionary <string, List <string> >)); foreach (var relatedSynSet in lexicalRelations[relation].Keys) { // make sure related synset is initialized if (!relatedSynSet.Instantiated) { relatedSynSet.Instantiate(wordNet.Provider); } foreach (var sourceWordIndex in lexicalRelations[relation][relatedSynSet].Keys) { var sourceWord = Words[sourceWordIndex - 1]; relatedWords[relation].EnsureContainsKey(sourceWord, typeof(List <string>), false); foreach (var targetWordIndex in lexicalRelations[relation][relatedSynSet][sourceWordIndex]) { var targetWord = relatedSynSet.Words[targetWordIndex - 1]; relatedWords[relation][sourceWord].Add(targetWord); } } } } return(relatedWords); }
/// <summary> /// Gets lexically related words for the current synset. Many of the relations in WordNet are lexical instead of semantic. Whereas /// the latter indicate relations between entire synsets (e.g., hypernym), the former indicate relations between specific /// words in synsets. This method retrieves all lexical relations and the words related thereby. /// </summary> /// <returns>Mapping from relations to mappings from words in the current synset to related words in the related synsets</returns> public Dictionary <WordNetEngine.SynSetRelation, Dictionary <string, Set <string> > > GetLexicallyRelatedWords() { Dictionary <WordNetEngine.SynSetRelation, Dictionary <string, Set <string> > > relatedWords = new Dictionary <WordNetEngine.SynSetRelation, Dictionary <string, Set <string> > >(); foreach (WordNetEngine.SynSetRelation relation in _lexicalRelations.Keys) { relatedWords.EnsureContainsKey(relation, typeof(Dictionary <string, Set <string> >)); foreach (SynSet relatedSynSet in _lexicalRelations[relation].Keys) { // make sure related synset is initialized if (!relatedSynSet.Instantiated) { relatedSynSet.Instantiate(); } foreach (int sourceWordIndex in _lexicalRelations[relation][relatedSynSet].Keys) { string sourceWord = _words[sourceWordIndex - 1]; relatedWords[relation].EnsureContainsKey(sourceWord, typeof(Set <string>), false); foreach (int targetWordIndex in _lexicalRelations[relation][relatedSynSet][sourceWordIndex]) { string targetWord = relatedSynSet.Words[targetWordIndex - 1]; relatedWords[relation][sourceWord].Add(targetWord); } } } } return(relatedWords); }
/// <summary> /// Adds a frame element to this set /// </summary> /// <param name="frameElement">Frame element to add</param> public void Add(FrameElement frameElement) { _frameElements.Add(frameElement); _idFrameElement.Add(frameElement.ID, frameElement); string lowerName = frameElement.Name.ToLower(); _nameFrameElements.EnsureContainsKey(lowerName, typeof(Set <FrameElement>)); _nameFrameElements[lowerName].Add(frameElement); }
/// <summary> /// Looks up all information for a given verb, organized by sense. /// Key: Sense of verb (role set ID) /// Value: List of VerbInfo objects for senses of verb /// </summary> /// <param name="verb">Verb to look up information for</param> /// <returns>Verb information, organized by sense</returns> public Dictionary <int, List <VerbInfo> > GetVerbInfoBySense(string verb) { // get all verb info Dictionary <int, List <VerbInfo> > info = new Dictionary <int, List <VerbInfo> >(); foreach (VerbInfo vi in GetVerbInfo(verb)) { info.EnsureContainsKey(vi.RoleSetId, typeof(List <VerbInfo>)); info[vi.RoleSetId].Add(vi); } return(info); }
/// <summary> /// Saves the PropBank-to-VerbNet mapping to file /// </summary> /// <param name="path">Path to file</param> public void SavePropBankVerbNetLinking(string path) { // gather propbank-verbnet linking - organized by verb, role set, verbnet class, then tuples of pb-vn argument links Dictionary <string, Dictionary <int, Dictionary <string, List <Tuple <int, string> > > > > pbVnLinking = new Dictionary <string, Dictionary <int, Dictionary <string, List <Tuple <int, string> > > > >(); foreach (string propBankRole in PropBankRoles) { string[] parts = propBankRole.Split('.'); string verb = parts[0]; int roleSet = int.Parse(parts[1]); int arg = int.Parse(parts[2]); pbVnLinking.EnsureContainsKey(verb, typeof(Dictionary <int, Dictionary <string, List <Tuple <int, string> > > >)); pbVnLinking[verb].EnsureContainsKey(roleSet, typeof(Dictionary <string, List <Tuple <int, string> > >)); foreach (string verbNetRole in GetVerbNetRolesForPropBank(propBankRole)) { string verbNetClass = verbNetRole.Substring(0, verbNetRole.LastIndexOf('.')); string themeRole = verbNetRole.Substring(verbNetRole.LastIndexOf('.') + 1); pbVnLinking[verb][roleSet].EnsureContainsKey(verbNetClass, typeof(List <Tuple <int, string> >)); pbVnLinking[verb][roleSet][verbNetClass].Add(new Tuple <int, string>(arg, themeRole)); } } // write linking file StreamWriter file = new StreamWriter(path); file.WriteLine("<pbvn-typemap>"); foreach (string predicate in pbVnLinking.Keys) { file.WriteLine(" <predicate lemma=\"" + predicate + "\">"); foreach (int roleSet in pbVnLinking[predicate].Keys) { foreach (string vnClass in pbVnLinking[predicate][roleSet].Keys) { file.WriteLine(" <argmap pb-roleset=\"" + predicate + "." + roleSet + "\" vn-class=\"" + vnClass + "\">"); foreach (Tuple <int, string> map in pbVnLinking[predicate][roleSet][vnClass]) { file.WriteLine(" <role pb-arg=\"" + map.Item1 + "\" vn-theta=\"" + map.Item2 + "\" />"); } file.WriteLine(" </argmap>"); } } file.WriteLine(" </predicate>"); } file.WriteLine("</pbvn-typemap>"); file.Close(); }
/// <summary> /// Adds a mapping from FrameNet to VerbNet /// </summary> /// <param name="frameElement">Frame element</param> /// <param name="verbNetRole">VerbNet role to add to frame element</param> public void AddVerbNetRoleForFrameNet(string frameElement, string verbNetRole) { // map framenet to verbnet _frameElementVerbNetRoles.EnsureContainsKey(frameElement, typeof(Set <string>)); if (!_frameElementVerbNetRoles[frameElement].Contains(verbNetRole)) { _frameElementVerbNetRoles[frameElement].Add(verbNetRole); } // map verbnet to framenet _verbNetRoleFrameElements.EnsureContainsKey(verbNetRole, typeof(Set <string>)); if (!_verbNetRoleFrameElements[verbNetRole].Contains(frameElement)) { _verbNetRoleFrameElements[verbNetRole].Add(frameElement); } }
/// <summary> /// Adds a mapping from PropBank to VerbNet /// </summary> /// <param name="propBankRole">PropBank role</param> /// <param name="verbNetRole">VerbNet role to add to PropBank role</param> public void AddVerbNetRoleForPropBank(string propBankRole, string verbNetRole) { // map propbank to verbnet _propBankRoleVerbNetRoles.EnsureContainsKey(propBankRole, typeof(Set <string>)); if (!_propBankRoleVerbNetRoles[propBankRole].Contains(verbNetRole)) { _propBankRoleVerbNetRoles[propBankRole].Add(verbNetRole); } // map verbnet to propbank _verbNetRolePropBankRoles.EnsureContainsKey(verbNetRole, typeof(Set <string>)); if (!_verbNetRolePropBankRoles[verbNetRole].Contains(propBankRole)) { _verbNetRolePropBankRoles[verbNetRole].Add(propBankRole); } }
/// <summary> /// Saves the FrameNet-to-VerbNet mapping to file /// </summary> /// <param name="path">Path to file</param> public void SaveFrameNetVerbNetLinking(string path) { // gather framenet-verbnet linking - organized by frame, verbnet class, then tuples of fn-vn links Dictionary <string, Dictionary <string, List <Tuple <string, string> > > > fnVnLinking = new Dictionary <string, Dictionary <string, List <Tuple <string, string> > > >(); foreach (string frameElement in FrameElements) { string[] parts = frameElement.Split('.'); string frame = InitialCharactersToUpper(parts[0], 1); string fe = InitialCharactersToUpper(parts[1], 1); fnVnLinking.EnsureContainsKey(frame, typeof(Dictionary <string, List <Tuple <string, string> > >)); // gather roles for frame element foreach (string verbNetRole in GetVerbNetRolesForFrameNet(frameElement)) { string verbNetClass = verbNetRole.Substring(0, verbNetRole.LastIndexOf('.')); string themeRole = verbNetRole.Substring(verbNetRole.LastIndexOf('.') + 1); fnVnLinking[frame].EnsureContainsKey(verbNetClass, typeof(List <Tuple <string, string> >)); fnVnLinking[frame][verbNetClass].Add(new Tuple <string, string>(fe, themeRole)); } } // write linking file...sort everything to make version control more informative StreamWriter file = new StreamWriter(path); file.WriteLine("<verbnetRoles-framenetFEs_RoleMappingData>"); foreach (string frame in new SortedSet <string>(fnVnLinking.Keys)) { foreach (string vnClass in new SortedSet <string>(fnVnLinking[frame].Keys)) { file.WriteLine(" <vncls class='" + vnClass + "' fnframe='" + frame + "'>" + Environment.NewLine + " <roles>"); foreach (Tuple <string, string> map in new SortedSet <Tuple <string, string> >(fnVnLinking[frame][vnClass])) { file.WriteLine(" <role fnrole='" + map.Item1 + "' vnrole='" + map.Item2 + "'/>"); } file.WriteLine(" </roles>" + Environment.NewLine + " </vncls>"); } } file.WriteLine("</verbnetRoles-framenetFEs_RoleMappingData>"); file.Close(); }
public static Dictionary <long, Dictionary <string, List <double> > > GetSliceLocationThreats(Prediction prediction) { Dictionary <long, Dictionary <string, List <double> > > sliceLocationThreats = new Dictionary <long, Dictionary <string, List <double> > >(); DiscreteChoiceModel model = prediction.Model; long sliceTicks = -1; if (model is TimeSliceDCM) { sliceTicks = (model as TimeSliceDCM).TimeSliceTicks; } Dictionary <int, Point> idPoint = new Dictionary <int, Point>(); foreach (Point point in prediction.Points) { idPoint.Add(point.Id, point); } foreach (PointPrediction pointPrediction in prediction.PointPredictions) { long slice = 1; if (sliceTicks > 0) { slice = pointPrediction.Time.Ticks / sliceTicks; } PostGIS.Point point = idPoint[pointPrediction.PointId].Location; int row = (int)((point.Y - prediction.PredictionArea.BoundingBox.MinY) / prediction.PredictionPointSpacing); int col = (int)((point.X - prediction.PredictionArea.BoundingBox.MinX) / prediction.PredictionPointSpacing); string location = row + "-" + col; sliceLocationThreats.EnsureContainsKey(slice, typeof(Dictionary <string, List <double> >)); sliceLocationThreats[slice].EnsureContainsKey(location, typeof(List <double>)); sliceLocationThreats[slice][location].Add(pointPrediction.TotalThreat); } return(sliceLocationThreats); }
/// <summary> /// Gets lexically related words for the current synset. Many of the relations in WordNet are lexical instead of semantic. Whereas /// the latter indicate relations between entire synsets (e.g., hypernym), the former indicate relations between specific /// words in synsets. This method retrieves all lexical relations and the words related thereby. /// </summary> /// <returns>Mapping from relations to mappings from words in the current synset to related words in the related synsets</returns> public Dictionary<WordNetEngine.SynSetRelation, Dictionary<string, Set<string>>> GetLexicallyRelatedWords() { Dictionary<WordNetEngine.SynSetRelation, Dictionary<string, Set<string>>> relatedWords = new Dictionary<WordNetEngine.SynSetRelation, Dictionary<string, Set<string>>>(); foreach (WordNetEngine.SynSetRelation relation in _lexicalRelations.Keys) { relatedWords.EnsureContainsKey(relation, typeof(Dictionary<string, Set<string>>)); foreach (SynSet relatedSynSet in _lexicalRelations[relation].Keys) { // make sure related synset is initialized if (!relatedSynSet.Instantiated) relatedSynSet.Instantiate(); foreach (int sourceWordIndex in _lexicalRelations[relation][relatedSynSet].Keys) { string sourceWord = _words[sourceWordIndex - 1]; relatedWords[relation].EnsureContainsKey(sourceWord, typeof(Set<string>), false); foreach (int targetWordIndex in _lexicalRelations[relation][relatedSynSet][sourceWordIndex]) { string targetWord = relatedSynSet.Words[targetWordIndex - 1]; relatedWords[relation][sourceWord].Add(targetWord); } } } } return relatedWords; }
/// <summary> /// Constructor /// </summary> /// <param name="wordNetDirectory">Path to WorNet directory (the one with the data and index files in it)</param> /// <param name="inMemory">Whether or not to store all data in memory. In-memory storage requires quite a bit of space /// but it is also very quick. The alternative (false) will cause the data to be searched on-disk with an efficient /// binary search algorithm.</param> public WordNetEngine(string wordNetDirectory, bool inMemory) { _wordNetDirectory = wordNetDirectory; _inMemory = inMemory; _posIndexWordSearchStream = null; _posSynSetDataFile = null; if (!System.IO.Directory.Exists(_wordNetDirectory)) { throw new DirectoryNotFoundException("Non-existent WordNet directory: " + _wordNetDirectory); } // get data and index paths string[] dataPaths = new string[] { Path.Combine(_wordNetDirectory, "data.adj"), Path.Combine(_wordNetDirectory, "data.adv"), Path.Combine(_wordNetDirectory, "data.noun"), Path.Combine(_wordNetDirectory, "data.verb") }; string[] indexPaths = new string[] { Path.Combine(_wordNetDirectory, "index.adj"), Path.Combine(_wordNetDirectory, "index.adv"), Path.Combine(_wordNetDirectory, "index.noun"), Path.Combine(_wordNetDirectory, "index.verb") }; // make sure all files exist foreach (string path in dataPaths.Union(indexPaths)) { if (!System.IO.File.Exists(path)) { throw new FileNotFoundException("Failed to find WordNet file: " + path); } } #region index file sorting string sortFlagPath = Path.Combine(_wordNetDirectory, ".sorted_for_dot_net"); if (!System.IO.File.Exists(sortFlagPath)) { /* make sure the index files are sorted according to the current sort order. the index files in the * wordnet distribution are sorted in the order needed for (presumably) the java api, which uses * a different sort order than the .net runtime. thus, unless we resort the lines in the index * files, we won't be able to do a proper binary search over the data. */ foreach (string indexPath in indexPaths) { // create temporary file for sorted lines string tempPath = Path.GetTempFileName(); StreamWriter tempFile = new StreamWriter(tempPath); // get number of words (lines) in file int numWords = 0; StreamReader indexFile = new StreamReader(indexPath); string line; while (indexFile.TryReadLine(out line)) { if (!line.StartsWith(" ")) { ++numWords; } } // get lines in file, sorted by first column (i.e., the word) Dictionary <string, string> wordLine = new Dictionary <string, string>(numWords); indexFile = new StreamReader(indexPath); while (indexFile.TryReadLine(out line)) { // write header lines to temp file immediately if (line.StartsWith(" ")) { tempFile.WriteLine(line); } else { // trim useless blank spaces from line and map line to first column line = line.Trim(); wordLine.Add(line.Substring(0, line.IndexOf(' ')), line); } } // get sorted words List <string> sortedWords = new List <string>(wordLine.Count); sortedWords.AddRange(wordLine.Keys); sortedWords.Sort(); // write lines sorted by word foreach (string word in sortedWords) { tempFile.WriteLine(wordLine[word]); } tempFile.Close(); // replace original index file with properly sorted one System.IO.File.Delete(indexPath); System.IO.File.Move(tempPath, indexPath); } // create flag file, indicating that we've sorted the data StreamWriter sortFlagFile = new StreamWriter(sortFlagPath); sortFlagFile.WriteLine("This file serves no purpose other than to indicate that the WordNet distribution data in the current directory has been sorted for use by the .NET API."); sortFlagFile.Close(); } #endregion #region engine init if (inMemory) { // pass 1: get total number of synsets int totalSynsets = 0; foreach (string dataPath in dataPaths) { // scan synset data file for lines that don't start with a space...these are synset definition lines StreamReader dataFile = new StreamReader(dataPath); string line; while (dataFile.TryReadLine(out line)) { int firstSpace = line.IndexOf(' '); if (firstSpace > 0) { ++totalSynsets; } } } // pass 2: create synset shells (pos and offset only) _idSynset = new Dictionary <string, SynSet>(totalSynsets); foreach (string dataPath in dataPaths) { POS pos = GetFilePOS(dataPath); // scan synset data file StreamReader dataFile = new StreamReader(dataPath); string line; while (dataFile.TryReadLine(out line)) { int firstSpace = line.IndexOf(' '); if (firstSpace > 0) { // get offset and create synset shell int offset = int.Parse(line.Substring(0, firstSpace)); SynSet synset = new SynSet(pos, offset, null); _idSynset.Add(synset.ID, synset); } } } // pass 3: instantiate synsets (hooks up relations, set glosses, etc.) foreach (string dataPath in dataPaths) { POS pos = GetFilePOS(dataPath); // scan synset data file StreamReader dataFile = new StreamReader(dataPath); string line; while (dataFile.TryReadLine(out line)) { int firstSpace = line.IndexOf(' '); if (firstSpace > 0) { // instantiate synset defined on current line, using the instantiated synsets for all references _idSynset[pos + ":" + int.Parse(line.Substring(0, firstSpace))].Instantiate(line, _idSynset); } } } // organize synsets by pos and words...also set most common synset for word-pos pairs that have multiple synsets _posWordSynSets = new Dictionary <POS, Dictionary <string, Set <SynSet> > >(); foreach (string indexPath in indexPaths) { POS pos = GetFilePOS(indexPath); _posWordSynSets.EnsureContainsKey(pos, typeof(Dictionary <string, Set <SynSet> >)); // scan word index file, skipping header lines StreamReader indexFile = new StreamReader(indexPath); string line; while (indexFile.TryReadLine(out line)) { int firstSpace = line.IndexOf(' '); if (firstSpace > 0) { // grab word and synset shells, along with the most common synset string word = line.Substring(0, firstSpace); SynSet mostCommonSynSet; Set <SynSet> synsets = GetSynSetShells(line, pos, out mostCommonSynSet, null); // set flag on most common synset if it's ambiguous if (synsets.Count > 1) { _idSynset[mostCommonSynSet.ID].SetAsMostCommonSynsetFor(word); } // use reference to the synsets that we instantiated in our three-pass routine above _posWordSynSets[pos].Add(word, new Set <SynSet>(synsets.Count)); foreach (SynSet synset in synsets) { _posWordSynSets[pos][word].Add(_idSynset[synset.ID]); } } } } } else { // open binary search streams for index files _posIndexWordSearchStream = new Dictionary <POS, BinarySearchTextStream>(); foreach (string indexPath in indexPaths) { // create binary search stream for index file BinarySearchTextStream searchStream = new BinarySearchTextStream(indexPath, new BinarySearchTextStream.SearchComparisonDelegate(delegate(object searchWord, string currentLine) { // if we landed on the header text, search further down if (currentLine[0] == ' ') { return(1); } // get word on current line string currentWord = currentLine.Substring(0, currentLine.IndexOf(' ')); // compare searched-for word to the current word return(((string)searchWord).CompareTo(currentWord)); })); // add search stream for current POS _posIndexWordSearchStream.Add(GetFilePOS(indexPath), searchStream); } // open readers for synset data files _posSynSetDataFile = new Dictionary <POS, StreamReader>(); foreach (string dataPath in dataPaths) { _posSynSetDataFile.Add(GetFilePOS(dataPath), new StreamReader(dataPath)); } } #endregion }
public static void EnsureContainsKey <K, V>(this Dictionary <K, V> dictionary, K key, Type valueType) { dictionary.EnsureContainsKey(key, valueType, null); }
/// <summary> /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from /// idSynset; otherwise, related synsets are created as shells. /// </summary> /// <param name="definition">Definition line of synset from data file</param> /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param> internal void Instantiate(string definition, Dictionary<string, SynSet> idSynset) { // don't re-instantiate if (_instantiated) throw new Exception("Synset has already been instantiated"); /* get lexicographer file name...the enumeration lines up precisely with the wordnet spec (see the lexnames file) except that * it starts with None, so we need to add 1 to the definition line's value to get the correct file name */ int lexicographerFileNumber = int.Parse(GetField(definition, 1)) + 1; if (lexicographerFileNumber <= 0) throw new Exception("Invalid lexicographer file name number. Should be >= 1."); _lexicographerFileName = (WordNetEngine.LexicographerFileName)lexicographerFileNumber; // get number of words in the synset and the start character of the word list int wordStart; int numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber); wordStart = definition.IndexOf(' ', wordStart) + 1; // get words in synset _words = new List<string>(numWords); for (int i = 0; i < numWords; ++i) { int wordEnd = definition.IndexOf(' ', wordStart + 1) - 1; int wordLen = wordEnd - wordStart + 1; string word = definition.Substring(wordStart, wordLen); if (word.Contains(' ')) throw new Exception("Unexpected space in word: " + word); _words.Add(word); // skip lex_id field wordStart = definition.IndexOf(' ', wordEnd + 2) + 1; } // get gloss _gloss = definition.Substring(definition.IndexOf('|') + 1).Trim(); if (_gloss.Contains('|')) throw new Exception("Unexpected pipe in gloss"); // get number and start of relations int relationCountField = 3 + (_words.Count * 2) + 1; int relationFieldStart; int numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart)); relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1; // grab each related synset _relationSynSets = new Dictionary<WordNetEngine.SynSetRelation, Set<SynSet>>(); _lexicalRelations = new Dictionary<WordNetEngine.SynSetRelation, Dictionary<SynSet, Dictionary<int, Set<int>>>>(); for (int relationNum = 0; relationNum < numRelations; ++relationNum) { string relationSymbol = null; int relatedSynSetOffset = -1; WordNetEngine.POS relatedSynSetPOS = WordNetEngine.POS.None; int sourceWordIndex = -1; int targetWordIndex = -1; // each relation has four columns for (int relationField = 0; relationField <= 3; ++relationField) { int fieldEnd = definition.IndexOf(' ', relationFieldStart + 1) - 1; int fieldLen = fieldEnd - relationFieldStart + 1; string fieldValue = definition.Substring(relationFieldStart, fieldLen); // relation symbol if (relationField == 0) relationSymbol = fieldValue; // related synset offset else if (relationField == 1) relatedSynSetOffset = int.Parse(fieldValue); // related synset POS else if (relationField == 2) relatedSynSetPOS = GetPOS(fieldValue); // source/target word for lexical relation else if (relationField == 3) { sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber); targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber); } else throw new Exception(); relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1; } // get related synset...create shell if we don't have a lookup SynSet relatedSynSet; if (idSynset == null) relatedSynSet = new SynSet(relatedSynSetPOS, relatedSynSetOffset, _wordNetEngine); // look up related synset directly else relatedSynSet = idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset]; // get relation WordNetEngine.SynSetRelation relation = WordNetEngine.GetSynSetRelation(_pos, relationSymbol); // add semantic relation if we have neither a source nor a target word index if (sourceWordIndex == 0 && targetWordIndex == 0) { _relationSynSets.EnsureContainsKey(relation, typeof(Set<SynSet>)); _relationSynSets[relation].Add(relatedSynSet); } // add lexical relation else { _lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary<SynSet, Dictionary<int, Set<int>>>)); _lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary<int, Set<int>>)); _lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(Set<int>)); if (!_lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex)) _lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex); } } // release the wordnet engine if we have one...don't need it anymore if (_wordNetEngine != null) _wordNetEngine = null; _instantiated = true; }
/// <summary> /// Gets lexically related words for the current synset. Many of the relations in WordNet are lexical instead of semantic. Whereas /// the latter indicate relations between entire synsets (e.g., hypernym), the former indicate relations between specific /// words in synsets. This method retrieves all lexical relations and the words related thereby. /// </summary> /// <returns>Mapping from relations to mappings from words in the current synset to related words in the related synsets</returns> public Dictionary<SynSetRelation, Dictionary<string, List<string>>> GetLexicallyRelatedWords() { var relatedWords = new Dictionary<SynSetRelation, Dictionary<string, List<string>>>(); foreach (var relation in lexicalRelations.Keys) { relatedWords.EnsureContainsKey(relation, typeof(Dictionary<string, List<string>>)); foreach (var relatedSynSet in lexicalRelations[relation].Keys) { // make sure related synset is initialized if (!relatedSynSet.Instantiated) relatedSynSet.Instantiate(wordNet.Provider); foreach (var sourceWordIndex in lexicalRelations[relation][relatedSynSet].Keys) { var sourceWord = Words[sourceWordIndex - 1]; relatedWords[relation].EnsureContainsKey(sourceWord, typeof(List<string>), false); foreach (var targetWordIndex in lexicalRelations[relation][relatedSynSet][sourceWordIndex]) { var targetWord = relatedSynSet.Words[targetWordIndex - 1]; relatedWords[relation][sourceWord].Add(targetWord); } } } } return relatedWords; }
/// <summary> /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from /// idSynset; otherwise, related synsets are created as shells. /// </summary> /// <param name="definition">Definition line of synset from data file</param> /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param> internal void Instantiate(string definition, Dictionary <string, SynSet> idSynset) { // don't re-instantiate if (!_instantiated) { // get number of words in the synset and the start character of the word list int wordStart; int numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber); wordStart = definition.IndexOf(' ', wordStart) + 1; _words = new List <string>(numWords); // get words in synset for (int i = 0; i < numWords; ++i) { int wordEnd = definition.IndexOf(' ', wordStart + 1) - 1; int wordLen = wordEnd - wordStart + 1; string word = definition.Substring(wordStart, wordLen); _words.Add(word); // get lex_id lex_id = Convert.ToInt32(definition.Substring(definition.IndexOf(' ') + 1, 2)); // skip lex_id field wordStart = definition.IndexOf(' ', wordEnd + 2) + 1; } // get gloss _gloss = definition.Substring(definition.IndexOf('|') + 1).Trim(); // get number and start of relations int relationCountField = 3 + (_words.Count * 2) + 1; int relationFieldStart; int numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart)); relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1; // grab each related synset _relationSynSets = new Dictionary <WordNetEngine.SynSetRelation, List <SynSet> >(); _lexicalRelations = new Dictionary <WordNetEngine.SynSetRelation, Dictionary <SynSet, Dictionary <int, List <int> > > >(); for (int relationNum = 0; relationNum < numRelations; ++relationNum) { string relationSymbol = null; int relatedSynSetOffset = -1; WordNetEngine.POS relatedSynSetPOS = WordNetEngine.POS.None; int sourceWordIndex = -1; int targetWordIndex = -1; // each relation has four columns for (int relationField = 0; relationField <= 3; ++relationField) { int fieldEnd = definition.IndexOf(' ', relationFieldStart + 1) - 1; int fieldLen = fieldEnd - relationFieldStart + 1; string fieldValue = definition.Substring(relationFieldStart, fieldLen); // relation symbol if (relationField == 0) { relationSymbol = fieldValue; } // related synset offset else if (relationField == 1) { relatedSynSetOffset = int.Parse(fieldValue); } // related synset POS else if (relationField == 2) { relatedSynSetPOS = GetPOS(fieldValue); } // source/target word for lexical relation else if (relationField == 3) { sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber); targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber); } else { throw new Exception(); } relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1; } // get related synset...create shell if we don't have a lookup SynSet relatedSynSet; if (idSynset == null) { relatedSynSet = new SynSet(relatedSynSetPOS, relatedSynSetOffset, _wordNetEngine); } // look up related synset directly else { relatedSynSet = idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset]; } // get relation WordNetEngine.SynSetRelation relation = WordNetEngine.GetSynSetRelation(_pos, relationSymbol); // add semantic relation if we have neither a source nor a target word index if (sourceWordIndex == 0 && targetWordIndex == 0) { _relationSynSets.EnsureContainsKey(relation, typeof(List <SynSet>)); _relationSynSets[relation].Add(relatedSynSet); } // add lexical relation else { _lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary <SynSet, Dictionary <int, List <int> > >)); _lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary <int, List <int> >)); _lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(List <int>)); if (!_lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex)) { _lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex); } } } _instantiated = true; } // release the wordnet engine if we have one...don't need it anymore if (_wordNetEngine != null) { _wordNetEngine = null; } }
protected override void Run(Prediction prediction) { List<PostGIS.Point> predictionPoints = new List<PostGIS.Point>(); Area predictionArea = prediction.PredictionArea; double areaMinX = predictionArea.BoundingBox.MinX; double areaMaxX = predictionArea.BoundingBox.MaxX; double areaMinY = predictionArea.BoundingBox.MinY; double areaMaxY = predictionArea.BoundingBox.MaxY; for (double x = areaMinX + prediction.PredictionPointSpacing / 2d; x <= areaMaxX; x += prediction.PredictionPointSpacing) // place points in the middle of the square boxes that cover the region - we get display errors from pixel rounding if the points are exactly on the boundaries for (double y = areaMinY + prediction.PredictionPointSpacing / 2d; y <= areaMaxY; y += prediction.PredictionPointSpacing) predictionPoints.Add(new PostGIS.Point(x, y, predictionArea.Shapefile.SRID)); List<PostGIS.Point> incidentPoints = new List<PostGIS.Point>(Incident.Get(TrainingStart, TrainingEnd, predictionArea, IncidentTypes.ToArray()).Select(i => i.Location)); predictionPoints.AddRange(incidentPoints); Console.Out.WriteLine("Filtering prediction points to prediction area"); predictionPoints = predictionArea.Intersects(predictionPoints, prediction.PredictionPointSpacing / 2f).Select(i => predictionPoints[i]).ToList(); NpgsqlConnection connection = DB.Connection.OpenConnection; try { Console.Out.WriteLine("Inserting points into prediction"); Point.CreateTable(prediction, predictionArea.Shapefile.SRID); List<int> predictionPointIds = Point.Insert(connection, predictionPoints.Select(p => new Tuple<PostGIS.Point, string, DateTime>(p, PointPrediction.NullLabel, DateTime.MinValue)), prediction, predictionArea, false); Console.Out.WriteLine("Running overall KDE for " + IncidentTypes.Count + " incident type(s)"); List<float> density = GetDensityEstimate(incidentPoints, _trainingSampleSize, false, 0, 0, predictionPoints, _normalize); Dictionary<int, float> pointIdOverallDensity = new Dictionary<int, float>(predictionPointIds.Count); int pointNum = 0; foreach (int predictionPointId in predictionPointIds) pointIdOverallDensity.Add(predictionPointId, density[pointNum++]); Dictionary<int, Dictionary<string, double>> pointIdIncidentDensity = new Dictionary<int, Dictionary<string, double>>(pointIdOverallDensity.Count); if (IncidentTypes.Count == 1) { string incident = IncidentTypes.First(); foreach (int pointId in pointIdOverallDensity.Keys) { Dictionary<string, double> incidentDensity = new Dictionary<string, double>(); incidentDensity.Add(incident, pointIdOverallDensity[pointId]); pointIdIncidentDensity.Add(pointId, incidentDensity); } } else foreach (string incidentType in IncidentTypes) { Console.Out.WriteLine("Running KDE for incident \"" + incidentType + "\""); incidentPoints = new List<PostGIS.Point>(Incident.Get(TrainingStart, TrainingEnd, predictionArea, incidentType).Select(i => i.Location)); density = GetDensityEstimate(incidentPoints, _trainingSampleSize, false, 0, 0, predictionPoints, _normalize); if (density.Count > 0) { pointNum = 0; foreach (int predictionPointId in predictionPointIds) { pointIdIncidentDensity.EnsureContainsKey(predictionPointId, typeof(Dictionary<string, double>)); pointIdIncidentDensity[predictionPointId].Add(incidentType, density[pointNum++]); } } } PointPrediction.CreateTable(prediction); PointPrediction.Insert(GetPointPredictionValues(pointIdOverallDensity, pointIdIncidentDensity), prediction, false); Smooth(prediction); } finally { DB.Connection.Return(connection); } }
/// <summary> /// Initializes a new instance of the <see cref="WordNetMemoryProvider"/> class. /// </summary> /// <param name="dataPath">The data path.</param> /// <exception cref="System.ArgumentNullException">dataPath</exception> /// <exception cref="System.IO.DirectoryNotFoundException">The data directory does not exist.</exception> /// <exception cref="System.IO.FileNotFoundException">A required WordNet file does not exist: [filename]</exception> public WordNetMemoryProvider(string dataPath) { if (string.IsNullOrEmpty(dataPath)) { throw new ArgumentNullException("dataPath"); } var dir = new DirectoryInfo(dataPath); if (!dir.Exists) { throw new DirectoryNotFoundException("The data directory does not exist."); } var dataPaths = new [] { new FileInfo(Path.Combine(dataPath, "data.adj")), new FileInfo(Path.Combine(dataPath, "data.adv")), new FileInfo(Path.Combine(dataPath, "data.noun")), new FileInfo(Path.Combine(dataPath, "data.verb")) }; var indexPaths = new [] { new FileInfo(Path.Combine(dataPath, "index.adj")), new FileInfo(Path.Combine(dataPath, "index.adv")), new FileInfo(Path.Combine(dataPath, "index.noun")), new FileInfo(Path.Combine(dataPath, "index.verb")) }; foreach (var file in dataPaths.Union(indexPaths).Where(file => !file.Exists)) { throw new FileNotFoundException("A required WordNet file does not exist: " + file.Name); } // Pass 1: Get total number of synsets var totalSynsets = 0; foreach (var dataInfo in dataPaths) { // scan synset data file for lines that don't start with a space... // these are synset definition lines using (var dataFile = new StreamReader(dataInfo.FullName)) { string line; while ((line = dataFile.ReadLine()) != null) { var firstSpace = line.IndexOf(' '); if (firstSpace > 0) { ++totalSynsets; } } } } // Pass 2: Create synset shells (pos and offset only) idSynset = new Dictionary <string, SynSet>(totalSynsets); foreach (var dataInfo in dataPaths) { var pos = WordNetFileProvider.GetFilePos(dataInfo.FullName); // scan synset data file using (var dataFile = new StreamReader(dataInfo.FullName)) { string line; while ((line = dataFile.ReadLine()) != null) { var firstSpace = line.IndexOf(' '); if (firstSpace <= 0) { continue; } // get offset and create synset shell var offset = int.Parse(line.Substring(0, firstSpace)); var synset = new SynSet(pos, offset, null); idSynset.Add(synset.Id, synset); } } } // Pass 3: Instantiate synsets (hooks up relations, set glosses, etc.) foreach (var dataInfo in dataPaths) { var pos = WordNetFileProvider.GetFilePos(dataInfo.FullName); // scan synset data file using (var dataFile = new StreamReader(dataInfo.FullName)) { string line; while ((line = dataFile.ReadLine()) != null) { var firstSpace = line.IndexOf(' '); if (firstSpace > 0) { // instantiate synset defined on current line, using the instantiated synsets for all references idSynset[pos + ":" + int.Parse(line.Substring(0, firstSpace))].Instantiate(line, idSynset); } } } } // organize synsets by pos and words... // also set most common synset for word-pos pairs that have multiple synsets posWordSynSets = new Dictionary <WordNetPos, Dictionary <string, List <SynSet> > >(); foreach (var indexInfo in indexPaths) { var pos = WordNetFileProvider.GetFilePos(indexInfo.FullName); posWordSynSets.EnsureContainsKey(pos, typeof(Dictionary <string, List <SynSet> >)); // scan word index file, skipping header lines using (var indexFile = new StreamReader(indexInfo.FullName)) { string line; while ((line = indexFile.ReadLine()) != null) { var firstSpace = line.IndexOf(' '); if (firstSpace <= 0) { continue; } // grab word and synset shells, along with the most common synset var word = line.Substring(0, firstSpace); SynSet mostCommonSynSet; var synsets = WordNetFileProvider.GetSynSetShells(line, pos, out mostCommonSynSet, wordNet); // set flag on most common synset if it's ambiguous if (synsets.Count > 1) { idSynset[mostCommonSynSet.Id].SetAsMostCommonSynsetFor(word); } // use reference to the synsets that we instantiated in our three-pass routine above posWordSynSets[pos].Add(word, new List <SynSet>(synsets.Count)); foreach (var synset in synsets) { posWordSynSets[pos][word].Add(idSynset[synset.Id]); } } } } }
/// <summary> /// Ensures that the current dictionary contains a given key. If it does not, a new key-value pair is added /// using the given key and the value resulting from calling the default constructor for the valueType type. /// </summary> /// <typeparam name="KeyType">Key type</typeparam> /// <typeparam name="ValueType">Value type</typeparam> /// <param name="dictionary">Dictionary</param> /// <param name="key">Key to ensure the existence of</param> /// <param name="valueType">Type of value</param> public static void EnsureContainsKey <KeyType, ValueType>(this Dictionary <KeyType, ValueType> dictionary, KeyType key, Type valueType) { dictionary.EnsureContainsKey <KeyType, ValueType>(key, valueType, (object[])null); }
public static Dictionary<long, Dictionary<string, int>> GetSliceLocationTrueCount(IEnumerable<Incident> incidents, Prediction prediction) { Dictionary<long, Dictionary<string, int>> sliceLocationTrueCount = new Dictionary<long, Dictionary<string, int>>(); DiscreteChoiceModel model = prediction.Model; long sliceTicks = -1; if (model is TimeSliceDCM) sliceTicks = (model as TimeSliceDCM).TimeSliceTicks; foreach (Incident incident in incidents) { long slice = 1; if (sliceTicks > 0) slice = incident.Time.Ticks / sliceTicks; int row = (int)((incident.Location.Y - prediction.PredictionArea.BoundingBox.MinY) / prediction.PredictionPointSpacing); int col = (int)((incident.Location.X - prediction.PredictionArea.BoundingBox.MinX) / prediction.PredictionPointSpacing); string location = row + "-" + col; sliceLocationTrueCount.EnsureContainsKey(slice, typeof(Dictionary<string, int>)); sliceLocationTrueCount[slice].EnsureContainsKey(location, typeof(int)); sliceLocationTrueCount[slice][location]++; } return sliceLocationTrueCount; }
/// <summary> /// Constructor /// </summary> /// <param name="frameNetDirectory">Path to FrameNet distribution directory</param> /// <param name="version">FrameNet version</param> public FrameNetEngine(string frameNetDirectory, Version version) { if (!System.IO.Directory.Exists(frameNetDirectory)) { throw new DirectoryNotFoundException("Invalid FrameNet directory"); } _frameNameFrame = new Dictionary <string, Frame>(); _frameElementIdFrameElement = new Dictionary <int, FrameElement>(); _lexemeLexicalUnitIDs = new Dictionary <string, Set <int> >(); _lexicalUnitIdFrame = new Dictionary <int, Frame>(); _lexicalUnitLexicalUnitIDs = new Dictionary <string, Set <int> >(); _lexicalUnitIdLexicalUnit = new Dictionary <int, LexicalUnit>(); if (version == Version.FrameNet_1_3) { // init annotation engine _lexicalUnitAnnotationEngine = new LexicalUnitAnnotationEngine(LAIR.CommonPort.IO.Directory.FindDirectory(frameNetDirectory, "luXML"), version); #region get frames Set <int> uniqueFrameIDCheck = new Set <int>(); XmlParser framesP = new XmlParser(System.IO.File.ReadAllText(LAIR.CommonPort.IO.Directory.FindFile(frameNetDirectory, "frames.xml"))); while (framesP.SkipToElement("frame")) { // create frame string frameXML = framesP.OuterXML("frame"); XmlParser frameP = new XmlParser(frameXML); int frameID = int.Parse(frameP.AttributeValue("frame", "ID")); string frameName = frameP.AttributeValue("frame", "name").ToLower().Trim(); // use lowercase for all frame names string frameDefinition = frameP.ElementText("definition"); Frame frame = new Frame(frameName, frameDefinition, frameID); // add to frame index index _frameNameFrame.Add(frame.Name, frame); uniqueFrameIDCheck.Add(frame.ID); // get frame elements string fesXML = frameP.OuterXML("fes"); XmlParser fesP = new XmlParser(fesXML); string feXML; while ((feXML = fesP.OuterXML("fe")) != null) { // get frame element XmlParser feParser = new XmlParser(feXML); int feID = int.Parse(feParser.AttributeValue("fe", "ID")); string feName = feParser.AttributeValue("fe", "name").Trim().ToLower(); string feDef = feParser.ElementText("definition"); FrameElement fe = new FrameElement(feID, feName, feDef, frame); frame.FrameElements.Add(fe); // add to index _frameElementIdFrameElement.Add(fe.ID, fe); } // get lexical units string lusXML = frameP.OuterXML("lexunits"); XmlParser lusParser = new XmlParser(lusXML); string luXML; while ((luXML = lusParser.OuterXML("lexunit")) != null) { XmlParser luParser = new XmlParser(luXML); int luID = int.Parse(luParser.AttributeValue("lexunit", "ID")); string luName = luParser.AttributeValue("lexunit", "name"); luName = luName.Substring(0, luName.IndexOf('.')); string luPos = luParser.AttributeValue("lexunit", "pos"); string luDef = luParser.ElementText("definition"); // get lexemes for this lexunit...we may get duplicates...don't worry about them Set <Lexeme> lexemes = new Set <Lexeme>(false); string lexemesXML = luParser.OuterXML("lexemes"); XmlParser lexemesP = new XmlParser(lexemesXML); string lexemeXML; while ((lexemeXML = lexemesP.OuterXML("lexeme")) != null) { XmlParser lexemeP = new XmlParser(lexemeXML); string pos = lexemeP.AttributeValue("lexeme", "pos"); bool breakBefore = bool.Parse(lexemeP.AttributeValue("lexeme", "breakBefore")); bool head = bool.Parse(lexemeP.AttributeValue("lexeme", "headword")); string value = lexemeP.ElementText("lexeme"); lexemes.Add(new Lexeme(value, pos, breakBefore, head)); } // create lexical unit and add to frame LexicalUnit lexicalUnit = new LexicalUnit(luID, luName, luPos, luDef, lexemes); frame.LexicalUnits.Add(lexicalUnit); // add map from full lexeme string to lexical unit id string lexemeString = lexicalUnit.ToString(); _lexemeLexicalUnitIDs.EnsureContainsKey(lexemeString, typeof(Set <int>), false); _lexemeLexicalUnitIDs[lexemeString].Add(luID); // add map from lexical unit to frame _lexicalUnitIdFrame.Add(lexicalUnit.ID, frame); // add map from lexical unit to lexical unit id _lexicalUnitLexicalUnitIDs.EnsureContainsKey(lexicalUnit.Name, typeof(Set <int>)); _lexicalUnitLexicalUnitIDs[lexicalUnit.Name].Add(lexicalUnit.ID); // add map from lexical unit ID to lexical unit _lexicalUnitIdLexicalUnit.Add(lexicalUnit.ID, lexicalUnit); } } #endregion #region get frame relations framesP = new XmlParser(System.IO.File.ReadAllText(LAIR.CommonPort.IO.Directory.FindFile(frameNetDirectory, "frRelation.xml"))); string relationsXML; while ((relationsXML = framesP.OuterXML("frame-relation-type")) != null) { // get relation type XmlParser relationsP = new XmlParser(relationsXML); Frame.FrameRelation relation = Frame.GetFrameRelation(relationsP.AttributeValue("frame-relation-type", "name")); string relationXML; while ((relationXML = relationsP.OuterXML("frame-relation")) != null) { XmlParser relationP = new XmlParser(relationXML); string superFrameName = relationP.AttributeValue("frame-relation", "superFrameName").ToLower(); string subFrameName = relationP.AttributeValue("frame-relation", "subFrameName").ToLower(); Frame superFrame = _frameNameFrame[superFrameName]; Frame subFrame = _frameNameFrame[subFrameName]; superFrame.GetSubFrames(relation).Add(subFrame); subFrame.GetSuperFrames(relation).Add(superFrame); // add FE relations while (relationP.SkipToElement("fe-relation")) { int superFeID = int.Parse(relationP.AttributeValue("fe-relation", "supId")); int subFeID = int.Parse(relationP.AttributeValue("fe-relation", "subId")); FrameElement superFE = superFrame.FrameElements.Get(superFeID); FrameElement subFE = subFrame.FrameElements.Get(subFeID); superFE.AddSubFrameElement(subFE, relation); subFE.AddSuperFrameElement(superFE, relation); } } } #endregion } else if (version == Version.FrameNet_1_5) { // init annotation engine _lexicalUnitAnnotationEngine = new LexicalUnitAnnotationEngine(LAIR.CommonPort.IO.Directory.FindDirectory(frameNetDirectory, "lu"), version); #region get frames Set <int> uniqueFrameIDCheck = new Set <int>(); foreach (string framePath in System.IO.Directory.GetFiles(LAIR.CommonPort.IO.Directory.FindDirectory(frameNetDirectory, "frame"), "*.xml")) { // create frame XmlParser frameP = new XmlParser(System.IO.File.ReadAllText(framePath)); int frameID = int.Parse(frameP.AttributeValue("frame", "ID")); string frameName = frameP.AttributeValue("frame", "name").ToLower().Trim(); // use lowercase for all frame names string frameDefinition = frameP.ElementText("definition"); Frame frame = new Frame(frameName, frameDefinition, frameID); // add to frame index index _frameNameFrame.Add(frame.Name, frame); uniqueFrameIDCheck.Add(frame.ID); // get frame elements string feXML; while ((feXML = frameP.OuterXML("FE")) != null) { // get frame element XmlParser feParser = new XmlParser(feXML); int feID = int.Parse(feParser.AttributeValue("FE", "ID")); string feName = feParser.AttributeValue("FE", "name").Trim().ToLower(); string feDef = feParser.ElementText("definition"); FrameElement fe = new FrameElement(feID, feName, feDef, frame); frame.FrameElements.Add(fe); // add to index _frameElementIdFrameElement.Add(fe.ID, fe); } // get lexical units frameP.Reset(); string luXML; while ((luXML = frameP.OuterXML("lexUnit")) != null) { XmlParser luParser = new XmlParser(luXML); string luPos = luParser.AttributeValue("lexUnit", "POS"); string luName = luParser.AttributeValue("lexUnit", "name"); luName = luName.Substring(0, luName.IndexOf('.')); int luID = int.Parse(luParser.AttributeValue("lexUnit", "ID")); string luDef = luParser.ElementText("definition"); // get lexemes for this lexunit...we may get duplicates...don't worry about them Set <Lexeme> lexemes = new Set <Lexeme>(false); string lexemeXML; while ((lexemeXML = luParser.OuterXML("lexeme")) != null) { XmlParser lexemeP = new XmlParser(lexemeXML); bool head = bool.Parse(lexemeP.AttributeValue("lexeme", "headword")); bool breakBefore = bool.Parse(lexemeP.AttributeValue("lexeme", "breakBefore")); string pos = lexemeP.AttributeValue("lexeme", "POS"); string value = lexemeP.AttributeValue("lexeme", "name"); lexemes.Add(new Lexeme(value, pos, breakBefore, head)); } // create lexical unit and add to frame LexicalUnit lexicalUnit = new LexicalUnit(luID, luName, luPos, luDef, lexemes); frame.LexicalUnits.Add(lexicalUnit); // add map from full lexeme string to lexical unit id string lexemeString = lexicalUnit.ToString(); _lexemeLexicalUnitIDs.EnsureContainsKey(lexemeString, typeof(Set <int>), false); _lexemeLexicalUnitIDs[lexemeString].Add(luID); // add map from lexical unit to frame _lexicalUnitIdFrame.Add(lexicalUnit.ID, frame); // add map from lexical unit to lexical unit id _lexicalUnitLexicalUnitIDs.EnsureContainsKey(lexicalUnit.Name, typeof(Set <int>)); _lexicalUnitLexicalUnitIDs[lexicalUnit.Name].Add(lexicalUnit.ID); // add map from lexical unit ID to lexical unit _lexicalUnitIdLexicalUnit.Add(lexicalUnit.ID, lexicalUnit); } } #endregion #region get relations XmlParser allRelationsP = new XmlParser(System.IO.File.ReadAllText(LAIR.CommonPort.IO.Directory.FindFile(frameNetDirectory, "frRelation.xml"))); string relationsXML; while ((relationsXML = allRelationsP.OuterXML("frameRelationType")) != null) { // get relation type XmlParser relationsP = new XmlParser(relationsXML); Frame.FrameRelation relation = Frame.GetFrameRelation(relationsP.AttributeValue("frameRelationType", "name")); // read each instance of the relation string relationXML; while ((relationXML = relationsP.OuterXML("frameRelation")) != null) { XmlParser relationP = new XmlParser(relationXML); // get related frames Frame subFrame = _frameNameFrame[relationP.AttributeValue("frameRelation", "subFrameName").ToLower()]; Frame superFrame = _frameNameFrame[relationP.AttributeValue("frameRelation", "superFrameName").ToLower()]; subFrame.GetSuperFrames(relation).Add(superFrame); superFrame.GetSubFrames(relation).Add(subFrame); // add FE relations while (relationP.SkipToElement("FERelation")) { FrameElement subFE = subFrame.FrameElements.Get(int.Parse(relationP.AttributeValue("FERelation", "subID"))); FrameElement superFE = superFrame.FrameElements.Get(int.Parse(relationP.AttributeValue("FERelation", "supID"))); subFE.AddSuperFrameElement(superFE, relation); superFE.AddSubFrameElement(subFE, relation); } } } #endregion } else { throw new Exception("Unrecognized FrameNet version: " + version); } }
private void GetThreatSurfaces(Rectangle bitmapDimensions, bool displayFirstSlice, Dictionary<long, List<Tuple<RectangleF, double, string>>> sliceSquareThreatType = null) { if (_sliceIncidentPointScores == null) return; Set<string> selectedIncidents = new Set<string>(incidentTypeCheckBoxes.Controls.Cast<ColoredCheckBox>().Where(c => c.CheckState != CheckState.Unchecked).Select(c => c.Text).ToArray()); float pixelsPerMeter; float threatRectanglePixelWidth; GetDrawingParameters(bitmapDimensions, out pixelsPerMeter, out threatRectanglePixelWidth); List<long> slices = _sliceIncidentPointScores.Keys.OrderBy(s => s).ToList(); Dictionary<long, Dictionary<int, Dictionary<int, Tuple<double, string>>>> sliceRowColScoreIncident = new Dictionary<long, Dictionary<int, Dictionary<int, Tuple<double, string>>>>(slices.Count); Dictionary<long, Bitmap> newSliceThreatSurface = new Dictionary<long, Bitmap>(slices.Count); double overallMinScore = double.MaxValue; double overallMaxScore = double.MinValue; List<Thread> threads = new List<Thread>(Configuration.ProcessorCount); for (int i = 0; i < Configuration.ProcessorCount; ++i) { Thread t = new Thread(new ParameterizedThreadStart(core => { for (int j = (int)core; j < slices.Count; j += Configuration.ProcessorCount) { long slice = slices[j]; #region create bitmap for current slice's threat surface try { lock (newSliceThreatSurface) { newSliceThreatSurface.Add(slice, new Bitmap(bitmapDimensions.Width, bitmapDimensions.Height, PixelFormat.Format16bppRgb565)); } } catch (ArgumentException) { Console.Out.WriteLine("Maximum zoom exceeded. Reset zoom to refresh display."); return; } #endregion #region get incident scores for each row and column of current slice Dictionary<int, Dictionary<int, Dictionary<string, List<double>>>> rowColIncidentScores = new Dictionary<int, Dictionary<int, Dictionary<string, List<double>>>>(); foreach (string incident in _sliceIncidentPointScores[slice].Keys) if (selectedIncidents.Contains(incident)) foreach (Tuple<PointF, double> pointScore in _sliceIncidentPointScores[slice][incident]) { PointF drawingPoint = ConvertMetersPointToDrawingPoint(pointScore.Item1, _regionBottomLeftInMeters, pixelsPerMeter, bitmapDimensions); int row, col; GetThreatRectangleRowColumn(drawingPoint, threatRectanglePixelWidth, out row, out col); rowColIncidentScores.EnsureContainsKey(row, typeof(Dictionary<int, Dictionary<string, List<double>>>)); rowColIncidentScores[row].EnsureContainsKey(col, typeof(Dictionary<string, List<double>>)); rowColIncidentScores[row][col].EnsureContainsKey(incident, typeof(List<double>)); rowColIncidentScores[row][col][incident].Add(pointScore.Item2); } #endregion #region get score/incident pairs for each cell, tracking min and max scores Dictionary<int, Dictionary<int, Tuple<double, string>>> rowColScoreIncident = new Dictionary<int, Dictionary<int, Tuple<double, string>>>(); double sliceMinScore = double.MaxValue; double sliceMaxScore = double.MinValue; foreach (int row in rowColIncidentScores.Keys) foreach (int col in rowColIncidentScores[row].Keys) { Dictionary<string, List<double>> incidentScores = rowColIncidentScores[row][col]; string mostLikelyIncident = null; double scoreForMostLikelyIncident = double.MinValue; foreach (string incident in incidentScores.Keys) { double score = incidentScores[incident].Average(); if (score > scoreForMostLikelyIncident) { mostLikelyIncident = incident; scoreForMostLikelyIncident = score; } } if (scoreForMostLikelyIncident < sliceMinScore) sliceMinScore = scoreForMostLikelyIncident; if (scoreForMostLikelyIncident > sliceMaxScore) sliceMaxScore = scoreForMostLikelyIncident; rowColScoreIncident.EnsureContainsKey(row, typeof(Dictionary<int, Tuple<double, string>>)); rowColScoreIncident[row].Add(col, new Tuple<double, string>(scoreForMostLikelyIncident, mostLikelyIncident)); } #endregion #region store information from thread lock (sliceRowColScoreIncident) { sliceRowColScoreIncident.Add(slice, rowColScoreIncident); } lock (this) { if (sliceMinScore < overallMinScore) overallMinScore = sliceMinScore; } lock (this) { if (sliceMaxScore > overallMaxScore) overallMaxScore = sliceMaxScore; } #endregion } })); t.Start(i); threads.Add(t); } foreach (Thread t in threads) t.Join(); #region draw threat surfaces double scoreRange = overallMaxScore - overallMinScore; if (scoreRange == 0) scoreRange = float.Epsilon; threads.Clear(); for (int i = 0; i < Configuration.ProcessorCount; ++i) { Thread t = new Thread(new ParameterizedThreadStart(core => { using(Pen pen = new Pen(BackColor, 1)) using(SolidBrush brush = new SolidBrush(BackColor)) { for (int j = (int)core; j < slices.Count; j += Configuration.ProcessorCount) { long slice = slices[j]; Graphics g = Graphics.FromImage(newSliceThreatSurface[slice]); g.Clear(BackColor); #region threat foreach (int row in sliceRowColScoreIncident[slice].Keys) foreach (int col in sliceRowColScoreIncident[slice][row].Keys) { Tuple<double, string> scoreIncident = sliceRowColScoreIncident[slice][row][col]; double scaledScore = (scoreIncident.Item1 - overallMinScore) / scoreRange; double percentTransparent = 1 - scaledScore; Color color = _incidentColor[scoreIncident.Item2]; byte red = (byte)(scaledScore * color.R + percentTransparent * BackColor.R); byte green = (byte)(scaledScore * color.G + percentTransparent * BackColor.G); byte blue = (byte)(scaledScore * color.B + percentTransparent * BackColor.B); brush.Color = Color.FromArgb(red, green, blue); RectangleF threatSquare = new RectangleF(col * threatRectanglePixelWidth, row * threatRectanglePixelWidth, threatRectanglePixelWidth, threatRectanglePixelWidth); g.FillRectangle(brush, threatSquare); if (sliceSquareThreatType != null) { sliceSquareThreatType.EnsureContainsKey(slice, typeof(List<Tuple<RectangleF, double, string>>)); sliceSquareThreatType[slice].Add(new Tuple<RectangleF, double, string>(threatSquare, scoreIncident.Item1, scoreIncident.Item2)); } } #endregion #region overlays foreach (Overlay overlay in Overlays) if (overlay.Displayed) { pen.Color = overlay.Color; brush.Color = overlay.Color; foreach (List<PointF> points in overlay.Points) if (points.Count == 1) { PointF drawingPoint = ConvertMetersPointToDrawingPoint(points[0], _regionBottomLeftInMeters, pixelsPerMeter, bitmapDimensions); RectangleF circle = GetCircleBoundingBox(drawingPoint, _pointDrawingDiameter); g.FillEllipse(brush, circle); g.DrawEllipse(pen, circle); } else for (int p = 1; p < points.Count; ++p) g.DrawLine(pen, ConvertMetersPointToDrawingPoint(points[p - 1], _regionBottomLeftInMeters, pixelsPerMeter, bitmapDimensions), ConvertMetersPointToDrawingPoint(points[p], _regionBottomLeftInMeters, pixelsPerMeter, bitmapDimensions)); } #endregion #region true incidents Set<string> selectedTrueIncidentOverlays = new Set<string>(incidentTypeCheckBoxes.Controls.Cast<ColoredCheckBox>().Where(c => c.CheckState == CheckState.Checked).Select(c => c.Text).ToArray()); DateTime sliceStart = DisplayedPrediction.PredictionStartTime; DateTime sliceEnd = DisplayedPrediction.PredictionEndTime; if (slice != -1) { if (!(DisplayedPrediction.Model is TimeSliceDCM)) throw new Exception("Expected TimeSliceDCM since slice != 1"); long sliceTicks = (DisplayedPrediction.Model as TimeSliceDCM).TimeSliceTicks; sliceStart = new DateTime(slice * sliceTicks); sliceEnd = sliceStart + new TimeSpan(sliceTicks); } foreach (string trueIncidentOverlay in selectedTrueIncidentOverlays) { brush.Color = _incidentColor[trueIncidentOverlay]; pen.Color = Color.Black; foreach (Incident incident in Incident.Get(sliceStart, sliceEnd, DisplayedPrediction.PredictionArea, trueIncidentOverlay)) { PointF drawingPoint = ConvertMetersPointToDrawingPoint(new PointF((float)incident.Location.X, (float)incident.Location.Y), _regionBottomLeftInMeters, pixelsPerMeter, bitmapDimensions); RectangleF circle = GetCircleBoundingBox(drawingPoint, _pointDrawingDiameter); g.FillEllipse(brush, circle); g.DrawEllipse(pen, circle); } } #endregion #region prediction points if (_displayPredictionPoints) { brush.Color = _predictionPointColor; pen.Color = Color.Black; foreach (Point p in DisplayedPrediction.Points) { PointF drawingPoint = ConvertMetersPointToDrawingPoint(new PointF((float)p.Location.X, (float)p.Location.Y), _regionBottomLeftInMeters, pixelsPerMeter, bitmapDimensions); RectangleF circle = GetCircleBoundingBox(drawingPoint, _pointDrawingDiameter); g.FillEllipse(brush, circle); g.DrawEllipse(pen, circle); } } #endregion } } })); t.Start(i); threads.Add(t); } foreach (Thread t in threads) t.Join(); #endregion if (_sliceThreatSurface != null) { foreach (Bitmap threatSurface in _sliceThreatSurface.Values) threatSurface.Dispose(); _sliceThreatSurface.Clear(); } _sliceThreatSurface = newSliceThreatSurface; timeSlice.ValueChanged -= new EventHandler(timeSlice_ValueChanged); timeSlice.Minimum = (int)_sliceThreatSurface.Keys.Min(); timeSlice.Maximum = (int)_sliceThreatSurface.Keys.Max(); if (displayFirstSlice) timeSlice.Value = timeSlice.Minimum; timeSlice.ValueChanged += new EventHandler(timeSlice_ValueChanged); _zoomedImageWidth = CurrentThreatSurface.Width; Invalidate(); }
public override void Display(Prediction prediction, IEnumerable<Overlay> overlays) { base.Display(prediction, overlays); _dragging = false; _draggingStart = System.Drawing.Point.Empty; _panOffset = new Size(0, 0); _panIncrement = 50; DiscreteChoiceModel model = prediction.Model; Dictionary<int, Point> idPoint = new Dictionary<int, Point>(); foreach (Point p in prediction.Points) idPoint.Add(p.Id, p); _incidentColor = new Dictionary<string, Color>(); _sliceIncidentPointScores = new Dictionary<long, Dictionary<string, List<Tuple<PointF, double>>>>(); float minPointX = float.MaxValue; float minPointY = float.MaxValue; float maxPointX = float.MinValue; float maxPointY = float.MinValue; foreach (PointPrediction pointPrediction in prediction.PointPredictions) { long slice = -1; if (model is TimeSliceDCM) slice = (long)(pointPrediction.Time.Ticks / (model as TimeSliceDCM).TimeSliceTicks); _sliceIncidentPointScores.EnsureContainsKey(slice, typeof(Dictionary<string, List<Tuple<PointF, double>>>)); Point point = idPoint[pointPrediction.PointId]; foreach (string incident in pointPrediction.IncidentScore.Keys) { Color color; if (!_incidentColor.TryGetValue(incident, out color)) { color = ColorPalette.GetColor(); _incidentColor.Add(incident, color); } double score = pointPrediction.IncidentScore[incident]; _sliceIncidentPointScores[slice].EnsureContainsKey(incident, typeof(List<Tuple<PointF, double>>)); _sliceIncidentPointScores[slice][incident].Add(new Tuple<PointF, double>(new PointF((float)point.Location.X, (float)point.Location.Y), score)); } float x = (float)point.Location.X; float y = (float)point.Location.Y; if (x < minPointX) minPointX = x; if (x > maxPointX) maxPointX = x; if (y < minPointY) minPointY = y; if (y > maxPointY) maxPointY = y; } if (_sliceIncidentPointScores.Count == 0) { Console.Out.WriteLine("No prediction points were generated for this prediction. There is nothing to display or evaluate."); Clear(); return; } Invoke(new Action(delegate() { incidentTypeCheckBoxes.Controls.Clear(); bool first = true; foreach (string incidentType in _incidentColor.Keys) { ColoredCheckBox cb = new ColoredCheckBox(true, first ? CheckState.Checked : CheckState.Unchecked, incidentType, _incidentColor[incidentType]); cb.CheckBoxCheckStateChanged += new EventHandler(IncidentCheckBox_CheckStateChanged); cb.LabelClicked += new EventHandler(IncidentCheckBox_LabelClicked); incidentTypeCheckBoxes.Controls.Add(cb); first = false; } overlayCheckBoxes.Controls.Clear(); foreach (Overlay overlay in Overlays) { ColoredCheckBox cb = new ColoredCheckBox(false, overlay.Displayed ? CheckState.Checked : CheckState.Unchecked, overlay.Name, overlay.Color); cb.CheckBoxCheckedChanged += new EventHandler(OverlayCheckBox_CheckedChanged); cb.LabelClicked += new EventHandler(OverlayCheckBox_LabelClicked); overlayCheckBoxes.Controls.Add(cb); IEnumerable<float> xs = overlay.Points.SelectMany(points => points).Select(point => point.X); IEnumerable<float> ys = overlay.Points.SelectMany(points => points).Select(point => point.Y); float minX = xs.Min(); float maxX = xs.Max(); float minY = ys.Min(); float maxY = ys.Max(); if (minX < minPointX) minPointX = minX; if (maxX > maxPointX) maxPointX = maxX; if (minY < minPointY) minPointY = minY; if (maxY > maxPointY) maxPointY = maxY; } ColoredCheckBox displayPredictionPointsCheckbox = new ColoredCheckBox(false, CheckState.Unchecked, "prediction points", _predictionPointColor); displayPredictionPointsCheckbox.CheckBoxCheckedChanged += new EventHandler(DisplayPredictionPoints_CheckedChanged); displayPredictionPointsCheckbox.LabelClicked += new EventHandler(DisplayPredictionPoints_LabelClicked); overlayCheckBoxes.Controls.Add(displayPredictionPointsCheckbox); _displayPredictionPoints = displayPredictionPointsCheckbox.Checked; _regionBottomLeftInMeters = new PointF(minPointX, minPointY); _regionSizeInMeters = new SizeF(maxPointX - minPointX, maxPointY - minPointY); bool generateThreatSurfaces = threatResolution.Value != prediction.PredictionPointSpacing; // changing the threat resolution will generate new threat surfaces, so only do it here if we won't be changing the current resolution value threatResolution.Value = threatResolution.Minimum = prediction.PredictionPointSpacing; if (!generateThreatSurfaces) GetThreatSurfaces(ClientRectangle, true); GetSliceTimeText(); })); }
/// <summary> /// Constructor /// </summary> /// <param name="wordNetDirectory">Path to WorNet directory (the one with the data and index files in it)</param> /// <param name="inMemory">Whether or not to store all data in memory. In-memory storage requires quite a bit of space /// but it is also very quick. The alternative (false) will cause the data to be searched on-disk with an efficient /// binary search algorithm.</param> public WordNetEngine(string wordNetDirectory, bool inMemory) { _wordNetDirectory = wordNetDirectory; _inMemory = inMemory; _posIndexWordSearchStream = null; _posSynSetDataFile = null; if (!System.IO.Directory.Exists(_wordNetDirectory)) throw new DirectoryNotFoundException("Отсутствует WordNet директория: " + _wordNetDirectory); // get data and index paths string[] dataPaths = new string[] { Path.Combine(_wordNetDirectory, "data.adj"), Path.Combine(_wordNetDirectory, "data.adv"), Path.Combine(_wordNetDirectory, "data.noun"), Path.Combine(_wordNetDirectory, "data.verb") }; string[] indexPaths = new string[] { Path.Combine(_wordNetDirectory, "index.adj"), Path.Combine(_wordNetDirectory, "index.adv"), Path.Combine(_wordNetDirectory, "index.noun"), Path.Combine(_wordNetDirectory, "index.verb") }; // make sure all files exist foreach (string path in dataPaths.Union(indexPaths)) if (!System.IO.File.Exists(path)) throw new FileNotFoundException("Failed to find WordNet file: " + path); #region index file sorting string sortFlagPath = Path.Combine(_wordNetDirectory, ".sorted_for_dot_net"); if (!System.IO.File.Exists(sortFlagPath)) { /* make sure the index files are sorted according to the current sort order. the index files in the * wordnet distribution are sorted in the order needed for (presumably) the java api, which uses * a different sort order than the .net runtime. thus, unless we resort the lines in the index * files, we won't be able to do a proper binary search over the data. */ foreach (string indexPath in indexPaths) { // create temporary file for sorted lines string tempPath = Path.GetTempFileName(); StreamWriter tempFile = new StreamWriter(tempPath); // get number of words (lines) in file int numWords = 0; StreamReader indexFile = new StreamReader(indexPath); string line; while (indexFile.TryReadLine(out line)) if (!line.StartsWith(" ")) ++numWords; // get lines in file, sorted by first column (i.e., the word) Dictionary<string, string> wordLine = new Dictionary<string, string>(numWords); indexFile = new StreamReader(indexPath); while (indexFile.TryReadLine(out line)) // write header lines to temp file immediately if (line.StartsWith(" ")) tempFile.WriteLine(line); else { // trim useless blank spaces from line and map line to first column line = line.Trim(); wordLine.Add(line.Substring(0, line.IndexOf(' ')), line); } // get sorted words List<string> sortedWords = new List<string>(wordLine.Count); sortedWords.AddRange(wordLine.Keys); sortedWords.Sort(); // write lines sorted by word foreach (string word in sortedWords) tempFile.WriteLine(wordLine[word]); tempFile.Close(); // replace original index file with properly sorted one System.IO.File.Delete(indexPath); System.IO.File.Move(tempPath, indexPath); } // create flag file, indicating that we've sorted the data StreamWriter sortFlagFile = new StreamWriter(sortFlagPath); sortFlagFile.WriteLine("This file serves no purpose other than to indicate that the WordNet distribution data in the current directory has been sorted for use by the .NET API."); sortFlagFile.Close(); } #endregion #region engine init if (inMemory) { // pass 1: get total number of synsets int totalSynsets = 0; foreach (string dataPath in dataPaths) { // scan synset data file for lines that don't start with a space...these are synset definition lines StreamReader dataFile = new StreamReader(dataPath); string line; while (dataFile.TryReadLine(out line)) { int firstSpace = line.IndexOf(' '); if (firstSpace > 0) ++totalSynsets; } } // pass 2: create synset shells (pos and offset only) _idSynset = new Dictionary<string, SynSet>(totalSynsets); foreach (string dataPath in dataPaths) { POS pos = GetFilePOS(dataPath); // scan synset data file StreamReader dataFile = new StreamReader(dataPath); string line; while (dataFile.TryReadLine(out line)) { int firstSpace = line.IndexOf(' '); if (firstSpace > 0) { // get offset and create synset shell int offset = int.Parse(line.Substring(0, firstSpace)); SynSet synset = new SynSet(pos, offset, null); _idSynset.Add(synset.ID, synset); } } } // pass 3: instantiate synsets (hooks up relations, set glosses, etc.) foreach (string dataPath in dataPaths) { POS pos = GetFilePOS(dataPath); // scan synset data file StreamReader dataFile = new StreamReader(dataPath); string line; while (dataFile.TryReadLine(out line)) { int firstSpace = line.IndexOf(' '); if (firstSpace > 0) // instantiate synset defined on current line, using the instantiated synsets for all references _idSynset[pos + ":" + int.Parse(line.Substring(0, firstSpace))].Instantiate(line, _idSynset); } } // organize synsets by pos and words...also set most common synset for word-pos pairs that have multiple synsets _posWordSynSets = new Dictionary<POS, Dictionary<string, Set<SynSet>>>(); foreach (string indexPath in indexPaths) { POS pos = GetFilePOS(indexPath); _posWordSynSets.EnsureContainsKey(pos, typeof(Dictionary<string, Set<SynSet>>)); // scan word index file, skipping header lines StreamReader indexFile = new StreamReader(indexPath); string line; while (indexFile.TryReadLine(out line)) { int firstSpace = line.IndexOf(' '); if (firstSpace > 0) { // grab word and synset shells, along with the most common synset string word = line.Substring(0, firstSpace); SynSet mostCommonSynSet; Set<SynSet> synsets = GetSynSetShells(line, pos, out mostCommonSynSet, null); // set flag on most common synset if it's ambiguous if (synsets.Count > 1) _idSynset[mostCommonSynSet.ID].SetAsMostCommonSynsetFor(word); // use reference to the synsets that we instantiated in our three-pass routine above _posWordSynSets[pos].Add(word, new Set<SynSet>(synsets.Count)); foreach (SynSet synset in synsets) _posWordSynSets[pos][word].Add(_idSynset[synset.ID]); } } } } else { // open binary search streams for index files _posIndexWordSearchStream = new Dictionary<POS, BinarySearchTextStream>(); foreach (string indexPath in indexPaths) { // create binary search stream for index file BinarySearchTextStream searchStream = new BinarySearchTextStream(indexPath, new BinarySearchTextStream.SearchComparisonDelegate(delegate(object searchWord, string currentLine) { // if we landed on the header text, search further down if (currentLine[0] == ' ') return 1; // get word on current line string currentWord = currentLine.Substring(0, currentLine.IndexOf(' ')); // compare searched-for word to the current word return ((string)searchWord).CompareTo(currentWord); })); // add search stream for current POS _posIndexWordSearchStream.Add(GetFilePOS(indexPath), searchStream); } // open readers for synset data files _posSynSetDataFile = new Dictionary<POS, StreamReader>(); foreach (string dataPath in dataPaths) _posSynSetDataFile.Add(GetFilePOS(dataPath), new StreamReader(dataPath)); } #endregion }
/// <summary> /// Initializes a new instance of the <see cref="WordNetMemoryProvider"/> class. /// </summary> /// <param name="dataPath">The data path.</param> /// <exception cref="System.ArgumentNullException">dataPath</exception> /// <exception cref="System.IO.DirectoryNotFoundException">The data directory does not exist.</exception> /// <exception cref="System.IO.FileNotFoundException">A required WordNet file does not exist: [filename]</exception> public WordNetMemoryProvider(string dataPath) { if (string.IsNullOrEmpty(dataPath)) throw new ArgumentNullException("dataPath"); var dir = new DirectoryInfo(dataPath); if (!dir.Exists) throw new DirectoryNotFoundException("The data directory does not exist."); var dataPaths = new [] { new FileInfo(Path.Combine(dataPath, "data.adj")), new FileInfo(Path.Combine(dataPath, "data.adv")), new FileInfo(Path.Combine(dataPath, "data.noun")), new FileInfo(Path.Combine(dataPath, "data.verb")) }; var indexPaths = new [] { new FileInfo(Path.Combine(dataPath, "index.adj")), new FileInfo(Path.Combine(dataPath, "index.adv")), new FileInfo(Path.Combine(dataPath, "index.noun")), new FileInfo(Path.Combine(dataPath, "index.verb")) }; foreach (var file in dataPaths.Union(indexPaths).Where(file => !file.Exists)) throw new FileNotFoundException("A required WordNet file does not exist: " + file.Name); // Pass 1: Get total number of synsets var totalSynsets = 0; foreach (var dataInfo in dataPaths) { // scan synset data file for lines that don't start with a space... // these are synset definition lines using (var dataFile = new StreamReader(dataInfo.FullName)) { string line; while ((line = dataFile.ReadLine()) != null) { var firstSpace = line.IndexOf(' '); if (firstSpace > 0) ++totalSynsets; } } } // Pass 2: Create synset shells (pos and offset only) idSynset = new Dictionary<string, SynSet>(totalSynsets); foreach (var dataInfo in dataPaths) { var pos = WordNetFileProvider.GetFilePos(dataInfo.FullName); // scan synset data file using (var dataFile = new StreamReader(dataInfo.FullName)) { string line; while ((line = dataFile.ReadLine()) != null) { var firstSpace = line.IndexOf(' '); if (firstSpace <= 0) continue; // get offset and create synset shell var offset = int.Parse(line.Substring(0, firstSpace)); var synset = new SynSet(pos, offset, null); idSynset.Add(synset.Id, synset); } } } // Pass 3: Instantiate synsets (hooks up relations, set glosses, etc.) foreach (var dataInfo in dataPaths) { var pos = WordNetFileProvider.GetFilePos(dataInfo.FullName); // scan synset data file using (var dataFile = new StreamReader(dataInfo.FullName)) { string line; while ((line = dataFile.ReadLine()) != null) { var firstSpace = line.IndexOf(' '); if (firstSpace > 0) // instantiate synset defined on current line, using the instantiated synsets for all references idSynset[pos + ":" + int.Parse(line.Substring(0, firstSpace))].Instantiate(line, idSynset); } } } // organize synsets by pos and words... // also set most common synset for word-pos pairs that have multiple synsets posWordSynSets = new Dictionary<WordNetPos, Dictionary<string, List<SynSet>>>(); foreach (var indexInfo in indexPaths) { var pos = WordNetFileProvider.GetFilePos(indexInfo.FullName); posWordSynSets.EnsureContainsKey(pos, typeof(Dictionary<string, List<SynSet>>)); // scan word index file, skipping header lines using (var indexFile = new StreamReader(indexInfo.FullName)) { string line; while ((line = indexFile.ReadLine()) != null) { var firstSpace = line.IndexOf(' '); if (firstSpace <= 0) continue; // grab word and synset shells, along with the most common synset var word = line.Substring(0, firstSpace); SynSet mostCommonSynSet; var synsets = WordNetFileProvider.GetSynSetShells(line, pos, out mostCommonSynSet, wordNet); // set flag on most common synset if it's ambiguous if (synsets.Count > 1) idSynset[mostCommonSynSet.Id].SetAsMostCommonSynsetFor(word); // use reference to the synsets that we instantiated in our three-pass routine above posWordSynSets[pos].Add(word, new List<SynSet>(synsets.Count)); foreach (var synset in synsets) posWordSynSets[pos][word].Add(idSynset[synset.Id]); } } } }
/// <summary> /// Constructor /// </summary> /// <param name="dataDirectory">Path to the SemLink data directory</param> public SemLinkEngine(string dataDirectory) { _dataDirectory = dataDirectory; #region propbank-verbnet // check for file if (!File.Exists(PropBankVerbNetLinkingPath)) { throw new FileNotFoundException("Failed to find PropBank-VerbNet mapping file: " + PropBankVerbNetLinkingPath); } // read each predicate mapping _propBankRoleVerbNetRoles = new Dictionary <string, Set <string> >(); XmlParser propBankVerbNetP = new XmlParser(File.ReadAllText(PropBankVerbNetLinkingPath)); string predicateXML; while ((predicateXML = propBankVerbNetP.OuterXML("predicate")) != null) { XmlParser predicateP = new XmlParser(predicateXML); string pbVerb = predicateP.AttributeValue("predicate", "lemma").Trim(); if (pbVerb == "") { throw new Exception("Blank PropBank verb"); } // get argument mappings string argMapXML; while ((argMapXML = predicateP.OuterXML("argmap")) != null) { XmlParser argMapP = new XmlParser(argMapXML); // get role set for current argument mapping string pbRoleSetStr = argMapP.AttributeValue("argmap", "pb-roleset"); int pbRoleSet = int.Parse(pbRoleSetStr.Substring(pbRoleSetStr.IndexOf('.') + 1)); if (pbRoleSet <= 0) { throw new Exception("Invalid PropBank role set: " + pbRoleSet); } // get verbnet class, using periods instead of dashes string vnClass = argMapP.AttributeValue("argmap", "vn-class").Trim().Replace("-", "."); if (vnClass == "") { throw new Exception("Blank VerbNet class"); } // read argument mapping string roleXML; while ((roleXML = argMapP.OuterXML("role")) != null) { XmlParser roleP = new XmlParser(roleXML); // get fully-specified propbank role string pbArgStr = roleP.AttributeValue("role", "pb-arg"); if (pbArgStr == "M" || pbArgStr == "A") { continue; } int pbArg = int.Parse(pbArgStr); string fullPbRole = pbVerb + "." + pbRoleSet + "." + pbArg; // get fully-specified verbnet role string vnRole = roleP.AttributeValue("role", "vn-theta").Trim(); if (vnRole == "") { throw new Exception("Blank VerbNet role"); } string fullVnRole = vnClass + "." + vnRole; // create entry _propBankRoleVerbNetRoles.EnsureContainsKey(fullPbRole, typeof(Set <string>)); _propBankRoleVerbNetRoles[fullPbRole].Add(fullVnRole); } } } // map verbnet to propbank _verbNetRolePropBankRoles = new Dictionary <string, Set <string> >(); foreach (string propBankRole in _propBankRoleVerbNetRoles.Keys) { foreach (string verbNetRole in _propBankRoleVerbNetRoles[propBankRole]) { _verbNetRolePropBankRoles.EnsureContainsKey(verbNetRole, typeof(Set <string>)); _verbNetRolePropBankRoles[verbNetRole].Add(propBankRole); } } #endregion #region verbnet-framenet // check for file if (!File.Exists(FrameNetVerbNetLinkingPath)) { throw new FileNotFoundException("Failed to find FrameNet-VerbNet mapping file: " + FrameNetVerbNetLinkingPath); } // read each mapping _verbNetRoleFrameElements = new Dictionary <string, Set <string> >(); XmlParser verbNetFrameNetP = new XmlParser(File.ReadAllText(FrameNetVerbNetLinkingPath)); string vnClassXML; while ((vnClassXML = verbNetFrameNetP.OuterXML("vncls")) != null) { XmlParser vnClassP = new XmlParser(vnClassXML); // get verbnet class and framenet frame string vnClass = vnClassP.AttributeValue("vncls", "class").Trim().Replace("-", "."); string frame = vnClassP.AttributeValue("vncls", "fnframe").Trim().ToLower(); // get each role mapping string roleXML; while ((roleXML = vnClassP.OuterXML("role")) != null) { // get fe and vn role XmlParser roleP = new XmlParser(roleXML); string fe = frame + "." + roleP.AttributeValue("role", "fnrole").Trim().ToLower(); string vnRole = vnClass + "." + roleP.AttributeValue("role", "vnrole").Trim(); // add to list of FEs for vn role _verbNetRoleFrameElements.EnsureContainsKey(vnRole, typeof(Set <string>), false); _verbNetRoleFrameElements[vnRole].Add(fe); } } // map frame elements to verbnet roles _frameElementVerbNetRoles = new Dictionary <string, Set <string> >(); foreach (string verbNetRole in _verbNetRoleFrameElements.Keys) { foreach (string frameElement in _verbNetRoleFrameElements[verbNetRole]) { _frameElementVerbNetRoles.EnsureContainsKey(frameElement, typeof(Set <string>)); _frameElementVerbNetRoles[frameElement].Add(verbNetRole); } } #endregion }
/// <summary> /// Loads the propositions file /// </summary> /// <param name="propsPath">Path to prop.txt file</param> private void LoadProps(string propsPath) { if (!File.Exists(propsPath)) { throw new Exception("Invalid PropBank propositions file: \"" + propsPath + "\""); } // reuse existing index files if present if (File.Exists(VerbInfoPath) && File.Exists(VerbInfoFilePositionPath) && File.Exists(MrgSentenceInfoPath) && File.Exists(MrgSentenceInfoFilePositionsPath)) { // load verb info positions _verbInfoFilePosition = new Dictionary <string, long>(); StreamReader positionsFile = new StreamReader(VerbInfoFilePositionPath); string line; while ((line = positionsFile.ReadLine()) != null) { // format: position verb int spaceLoc = line.IndexOf(' '); _verbInfoFilePosition.Add(line.Substring(spaceLoc + 1), long.Parse(line.Substring(0, spaceLoc))); } positionsFile.Close(); // load mrg-sentence verb info positions _mrgSentInfoFilePosition = new Dictionary <string, Dictionary <int, long> >(); positionsFile = new StreamReader(MrgSentenceInfoFilePositionsPath); while ((line = positionsFile.ReadLine()) != null) { // each line lists a MRG file then a series of pipe-delimited sentence-position pairs...get MRG file first int pipeLoc = line.IndexOf('|'); string mrgFile = line.Substring(0, pipeLoc); _mrgSentInfoFilePosition.Add(mrgFile, new Dictionary <int, long>()); // get sentence-position pairs while (pipeLoc != line.Length - 1) { // find next pipe and space int nextPipe = line.IndexOf('|', pipeLoc + 1); int spaceLoc = line.IndexOf(' ', pipeLoc + 1); // get sentence and position int sent = int.Parse(line.Substring(pipeLoc + 1, spaceLoc - pipeLoc - 1)); long position = long.Parse(line.Substring(spaceLoc + 1, nextPipe - spaceLoc - 1)); // add to index _mrgSentInfoFilePosition[mrgFile].Add(sent, position); pipeLoc = nextPipe; } } positionsFile.Close(); return; } // maps each verb to a list of VerbInfo objects, each of which stores an annotation instance Dictionary <string, List <VerbInfo> > verbInfo = new Dictionary <string, List <VerbInfo> >(); // maps each mrg file and sentence number to a list of VerbInfo objects for that sentence Dictionary <string, Dictionary <int, List <VerbInfo> > > mrgSentInfo = new Dictionary <string, Dictionary <int, List <VerbInfo> > >(); // read propositions file StreamReader propFile = new StreamReader(propsPath); string propLine; while ((propLine = propFile.ReadLine()) != null) { VerbInfo vi = new VerbInfo(propLine); // add to mapping from verbs to their information verbInfo.EnsureContainsKey(vi.Verb, typeof(List <VerbInfo>)); verbInfo[vi.Verb].Add(vi); // add to mapping from file-sentence pairs to their information mrgSentInfo.EnsureContainsKey(vi.File, typeof(Dictionary <int, List <VerbInfo> >)); mrgSentInfo[vi.File].EnsureContainsKey(vi.SentenceNumber, typeof(List <VerbInfo>)); mrgSentInfo[vi.File][vi.SentenceNumber].Add(vi); } propFile.Close(); // write verb index to disk and record file positions of verb info lists _verbInfoFilePosition = new Dictionary <string, long>(); FileStream saveStream = new FileStream(VerbInfoPath, FileMode.Create); foreach (string verb in verbInfo.Keys) { // save position of VerbInfo list _verbInfoFilePosition.Add(verb, saveStream.Position); WriteVerbInfoList(verbInfo[verb], saveStream); } saveStream.Close(); // save file positions StreamWriter verbInfoPositionFile = new StreamWriter(VerbInfoFilePositionPath); foreach (string verb in _verbInfoFilePosition.Keys) { verbInfoPositionFile.WriteLine(_verbInfoFilePosition[verb] + " " + verb); } verbInfoPositionFile.Close(); // save mrg-sentence info _mrgSentInfoFilePosition = new Dictionary <string, Dictionary <int, long> >(); saveStream = new FileStream(MrgSentenceInfoPath, FileMode.Create); foreach (string mrgFile in mrgSentInfo.Keys) { // add each sentence, recording position Dictionary <int, long> sentInfoPosition = new Dictionary <int, long>(); foreach (int sentNum in mrgSentInfo[mrgFile].Keys) { // add index of sentence to file position sentInfoPosition.Add(sentNum, saveStream.Position); // write VerbInfo list for sentence WriteVerbInfoList(mrgSentInfo[mrgFile][sentNum], saveStream); } _mrgSentInfoFilePosition.Add(mrgFile, sentInfoPosition); } saveStream.Close(); // save file positions for MRG file index StreamWriter mrgSentInfoPositionsFile = new StreamWriter(MrgSentenceInfoFilePositionsPath); foreach (string mrgFile in _mrgSentInfoFilePosition.Keys) { mrgSentInfoPositionsFile.Write(mrgFile); foreach (int sent in _mrgSentInfoFilePosition[mrgFile].Keys) { mrgSentInfoPositionsFile.Write("|" + sent + " " + _mrgSentInfoFilePosition[mrgFile][sent]); } mrgSentInfoPositionsFile.WriteLine("|"); } mrgSentInfoPositionsFile.Close(); }
private Dictionary<int, float> GetPerClassWeights(StreamReader trainingInstancesReader) { Dictionary<int, int> classCount = new Dictionary<int, int>(); string line; while (trainingInstancesReader.TryReadLine(out line)) { int firstSpace = line.IndexOf(' '); if (firstSpace == -1) firstSpace = line.Length; int classNum = int.Parse(line.Substring(0, firstSpace)); classCount.EnsureContainsKey(classNum, typeof(int)); classCount[classNum]++; } Dictionary<int, float> classWeight = new Dictionary<int, float>(); int total = classCount.Values.Sum(); foreach (int classNum in classCount.Keys) if (_libLinear.GetUnmappedLabel(classNum.ToString()) != PointPrediction.NullLabel) classWeight.Add(classNum, (total - classCount[classNum]) / (float)classCount[classNum]); return classWeight; }
/// <summary> /// Constructor /// </summary> /// <param name="path">Path to the NomLex dictionary</param> public NomLexEngine(string path) { if (!File.Exists(path)) { throw new FileNotFoundException("Invalid NomLex file: \"" + path + "\""); } string nomLex = File.ReadAllText(path); // get number of entities int numEntries = 0; int entryStart = 0; while (entryStart >= 0 && entryStart < nomLex.Length) { // should be on the open paren if (nomLex[entryStart] != '(') { throw new Exception("Invalid entry"); } // get text for entry int entryEnd = IndexOfBalancingParen(nomLex, entryStart); // start at next entry entryStart = nomLex.IndexOf('(', entryEnd + 1); ++numEntries; } // extract entries _nounEntries = new Dictionary <string, List <NomLexEntry> >(numEntries); _classes = new Set <string>(false); entryStart = 0; while (entryStart >= 0 && entryStart < nomLex.Length) { // should be on the open paren if (nomLex[entryStart] != '(') { throw new Exception("Invalid entry"); } // get text for entry int entryEnd = IndexOfBalancingParen(nomLex, entryStart); string entryText = nomLex.Substring(entryStart, entryEnd - entryStart + 1); // extract entry NomLexEntry entry = ExtractEntry(entryText); // get noun from entry string noun = entry.Features["orth"].ToString(); // add entry to list _nounEntries.EnsureContainsKey(noun, typeof(List <NomLexEntry>)); _nounEntries[noun].Add(entry); // add to class index _classes.Add(entry.Name); // start at next entry entryStart = nomLex.IndexOf('(', entryEnd + 1); } }
public static Dictionary<long, Dictionary<string, List<double>>> GetSliceLocationThreats(Prediction prediction) { Dictionary<long, Dictionary<string, List<double>>> sliceLocationThreats = new Dictionary<long, Dictionary<string, List<double>>>(); DiscreteChoiceModel model = prediction.Model; long sliceTicks = -1; if (model is TimeSliceDCM) sliceTicks = (model as TimeSliceDCM).TimeSliceTicks; Dictionary<int, Point> idPoint = new Dictionary<int, Point>(); foreach (Point point in prediction.Points) idPoint.Add(point.Id, point); foreach (PointPrediction pointPrediction in prediction.PointPredictions) { long slice = 1; if (sliceTicks > 0) slice = pointPrediction.Time.Ticks / sliceTicks; PostGIS.Point point = idPoint[pointPrediction.PointId].Location; int row = (int)((point.Y - prediction.PredictionArea.BoundingBox.MinY) / prediction.PredictionPointSpacing); int col = (int)((point.X - prediction.PredictionArea.BoundingBox.MinX) / prediction.PredictionPointSpacing); string location = row + "-" + col; sliceLocationThreats.EnsureContainsKey(slice, typeof(Dictionary<string, List<double>>)); sliceLocationThreats[slice].EnsureContainsKey(location, typeof(List<double>)); sliceLocationThreats[slice][location].Add(pointPrediction.TotalThreat); } return sliceLocationThreats; }
/// <summary> /// Gets spans for a set of argument nodes, indexed by sentence /// </summary> /// <param name="argNodes">Argument nodes</param> /// <returns>Contiguous spans, indexed by sentence</returns> private Dictionary <int, List <Span> > GetSpans(List <TreeBankNode> argNodes) { // make sure all discourse annotation nodes come from the same source document List <TreeBankNode> allNodes = new List <TreeBankNode>(); foreach (TreeBankNode node in argNodes) { if (node.MrgFile != argNodes[0].MrgFile) { throw new Exception("MRG file mismatch"); } else { allNodes.Add(node); } } // remove any null nodes and sort the result by node position for (int i = 0; i < allNodes.Count;) { if (allNodes[i].IsNullElement) { allNodes.RemoveAt(i); } else { ++i; } } if (allNodes.Count == 0) { throw new Exception("Invalid node list"); } // group nodes by sentence Dictionary <int, List <TreeBankNode> > sentenceNodes = new Dictionary <int, List <TreeBankNode> >(); foreach (TreeBankNode node in allNodes) { sentenceNodes.EnsureContainsKey(node.SentenceNumber, typeof(List <TreeBankNode>)); sentenceNodes[node.SentenceNumber].Add(node); } // create spans for each sentence Dictionary <int, List <Span> > sentenceSpans = new Dictionary <int, List <Span> >(); foreach (int sentNum in sentenceNodes.Keys) { // create span for each set of contiguous nodes List <Span> spans = new List <Span>(); foreach (List <TreeBankNode> nodes in TreeBankNode.GetContiguousNodes(sentenceNodes[sentNum])) { spans.Add(new Span(nodes[0].FirstToken.TokenNumber, nodes[nodes.Count - 1].LastToken.TokenNumber)); } sentenceSpans.Add(sentNum, spans); } return(sentenceSpans); }
/// <summary> /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from /// idSynset; otherwise, related synsets are created as shells. /// </summary> /// <param name="definition">Definition line of synset from data file</param> /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param> internal void Instantiate(string definition, Dictionary <string, SynSet> idSynset) { // don't re-instantiate if (Instantiated) { throw new Exception("Synset has already been instantiated"); } /* get lexicographer file name...the enumeration lines up precisely with the wordnet spec (see the lexnames file) except that * it starts with None, so we need to add 1 to the definition line's value to get the correct file name */ var lexicographerFileNumber = int.Parse(GetField(definition, 1)) + 1; if (lexicographerFileNumber <= 0) { throw new Exception("Invalid lexicographer file name number. Should be >= 1."); } LexicographerFileName = (LexicographerFileName)lexicographerFileNumber; // get number of words in the synset and the start character of the word list int wordStart; var numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber); wordStart = definition.IndexOf(' ', wordStart) + 1; // get words in synset Words = new List <string>(numWords); for (var i = 0; i < numWords; ++i) { var wordEnd = definition.IndexOf(' ', wordStart + 1) - 1; var wordLen = wordEnd - wordStart + 1; var word = definition.Substring(wordStart, wordLen); if (word.Contains(' ')) { throw new Exception("Unexpected space in word: " + word); } Words.Add(word); // skip lex_id field wordStart = definition.IndexOf(' ', wordEnd + 2) + 1; } // get gloss Gloss = definition.Substring(definition.IndexOf('|') + 1).Trim(); if (Gloss.Contains('|')) { throw new Exception("Unexpected pipe in gloss"); } // get number and start of relations var relationCountField = 3 + (Words.Count * 2) + 1; int relationFieldStart; var numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart)); relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1; // grab each related synset relationSynSets = new Dictionary <SynSetRelation, List <SynSet> >(); lexicalRelations = new Dictionary <SynSetRelation, Dictionary <SynSet, Dictionary <int, List <int> > > >(); for (var relationNum = 0; relationNum < numRelations; ++relationNum) { string relationSymbol = null; var relatedSynSetOffset = -1; var relatedSynSetPOS = WordNetPos.None; var sourceWordIndex = -1; var targetWordIndex = -1; // each relation has four columns for (var relationField = 0; relationField <= 3; ++relationField) { var fieldEnd = definition.IndexOf(' ', relationFieldStart + 1) - 1; var fieldLen = fieldEnd - relationFieldStart + 1; var fieldValue = definition.Substring(relationFieldStart, fieldLen); // relation symbol if (relationField == 0) { relationSymbol = fieldValue; } // related synset offset else if (relationField == 1) { relatedSynSetOffset = int.Parse(fieldValue); } // related synset POS else if (relationField == 2) { relatedSynSetPOS = GetPos(fieldValue); } // source/target word for lexical relation else if (relationField == 3) { sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber); targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber); } else { throw new Exception(); } relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1; } // get related synset...create shell if we don't have a lookup var relatedSynSet = idSynset != null ? idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset] : new SynSet(relatedSynSetPOS, relatedSynSetOffset, wordNet); // get relation var relation = WordNet.GetSynSetRelation(Pos, relationSymbol); // add semantic relation if we have neither a source nor a target word index if (sourceWordIndex == 0 && targetWordIndex == 0) { relationSynSets.EnsureContainsKey(relation, typeof(List <SynSet>)); relationSynSets[relation].Add(relatedSynSet); } // add lexical relation else { lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary <SynSet, Dictionary <int, List <int> > >)); lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary <int, List <int> >)); lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(List <int>)); if (!lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex)) { lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex); } } } Instantiated = true; }
/// <summary> /// Constructor /// </summary> /// <param name="verbNetDirectory">Path to the VerbNet directory</param> public VerbNetEngine(string verbNetDirectory) { // check directory if (!Directory.Exists(verbNetDirectory)) { throw new Exception("Invalid VerbNet directory"); } // extract all verb classes _idVerbClass = new Dictionary <string, VerbClass>(); foreach (string classPath in Directory.GetFiles(verbNetDirectory, "*.xml")) { ExtractClass(File.ReadAllText(classPath), false); } // add root class _rootVerbClass = new VerbClass("0"); _idVerbClass.Add(_rootVerbClass.ID, _rootVerbClass); // assemble hierarchy while (true) { // create empty "connector" classes for verbnet classes whose direct parent is not defined in verbnet Dictionary <string, VerbClass> newIdVerbClass = new Dictionary <string, VerbClass>(); // check all non-root classes that don't have a parent foreach (string id in _idVerbClass.Keys) { if (id != _rootVerbClass.ID && _idVerbClass[id].Parent == null) { VerbClass currentClass = _idVerbClass[id]; // get id of parent by removing the final location string parentID = id.Substring(0, id.LastIndexOf('.')); // try to get parent class VerbClass parentClass; if (!_idVerbClass.TryGetValue(parentID, out parentClass)) { // create new connector class, reusing existing connector class if we have one VerbClass connectorClass; if (!newIdVerbClass.TryGetValue(parentID, out connectorClass)) { connectorClass = new VerbClass(parentID); newIdVerbClass.Add(connectorClass.ID, connectorClass); } parentClass = connectorClass; } // add current class as sub-class of parent parentClass.AddChild(currentClass); } } // add all newly created connector classes foreach (string id in newIdVerbClass.Keys) { _idVerbClass.Add(id, newIdVerbClass[id]); } // if we didn't add any connector classes, each class (except the root) has a parent defined in _idVerbClass - hierarchy is complete if (newIdVerbClass.Count == 0) { break; } } // map each verb to its classes _verbVerbClasses = new Dictionary <string, Set <VerbClass> >(); foreach (VerbClass verbClass in _rootVerbClass.GetChildren(true)) { foreach (string verb in verbClass.GetVerbs(false)) { _verbVerbClasses.EnsureContainsKey(verb, typeof(Set <VerbClass>)); _verbVerbClasses[verb].Add(verbClass); } } // make sure all verb classes except for the root has a parent (i.e., make sure we have a rooted tree) foreach (VerbClass vnClass in _idVerbClass.Values) { if (vnClass.ID != "0" && vnClass.Parent == null) { throw new Exception("Invalid VerbNet tree structure!"); } } }
protected override void Run(Prediction prediction) { List <PostGIS.Point> predictionPoints = new List <PostGIS.Point>(); Area predictionArea = prediction.PredictionArea; double areaMinX = predictionArea.BoundingBox.MinX; double areaMaxX = predictionArea.BoundingBox.MaxX; double areaMinY = predictionArea.BoundingBox.MinY; double areaMaxY = predictionArea.BoundingBox.MaxY; for (double x = areaMinX + prediction.PredictionPointSpacing / 2d; x <= areaMaxX; x += prediction.PredictionPointSpacing) // place points in the middle of the square boxes that cover the region - we get display errors from pixel rounding if the points are exactly on the boundaries { for (double y = areaMinY + prediction.PredictionPointSpacing / 2d; y <= areaMaxY; y += prediction.PredictionPointSpacing) { predictionPoints.Add(new PostGIS.Point(x, y, predictionArea.Shapefile.SRID)); } } List <PostGIS.Point> incidentPoints = new List <PostGIS.Point>(Incident.Get(TrainingStart, TrainingEnd, predictionArea, IncidentTypes.ToArray()).Select(i => i.Location)); predictionPoints.AddRange(incidentPoints); Console.Out.WriteLine("Filtering prediction points to prediction area"); predictionPoints = predictionArea.Intersects(predictionPoints, prediction.PredictionPointSpacing / 2f).Select(i => predictionPoints[i]).ToList(); NpgsqlConnection connection = DB.Connection.OpenConnection; try { Console.Out.WriteLine("Inserting points into prediction"); Point.CreateTable(prediction, predictionArea.Shapefile.SRID); List <int> predictionPointIds = Point.Insert(connection, predictionPoints.Select(p => new Tuple <PostGIS.Point, string, DateTime>(p, PointPrediction.NullLabel, DateTime.MinValue)), prediction, predictionArea, false); Console.Out.WriteLine("Running overall KDE for " + IncidentTypes.Count + " incident type(s)"); List <float> density = GetDensityEstimate(incidentPoints, _trainingSampleSize, false, 0, 0, predictionPoints, _normalize); Dictionary <int, float> pointIdOverallDensity = new Dictionary <int, float>(predictionPointIds.Count); int pointNum = 0; foreach (int predictionPointId in predictionPointIds) { pointIdOverallDensity.Add(predictionPointId, density[pointNum++]); } Dictionary <int, Dictionary <string, double> > pointIdIncidentDensity = new Dictionary <int, Dictionary <string, double> >(pointIdOverallDensity.Count); if (IncidentTypes.Count == 1) { string incident = IncidentTypes.First(); foreach (int pointId in pointIdOverallDensity.Keys) { Dictionary <string, double> incidentDensity = new Dictionary <string, double>(); incidentDensity.Add(incident, pointIdOverallDensity[pointId]); pointIdIncidentDensity.Add(pointId, incidentDensity); } } else { foreach (string incidentType in IncidentTypes) { Console.Out.WriteLine("Running KDE for incident \"" + incidentType + "\""); incidentPoints = new List <PostGIS.Point>(Incident.Get(TrainingStart, TrainingEnd, predictionArea, incidentType).Select(i => i.Location)); density = GetDensityEstimate(incidentPoints, _trainingSampleSize, false, 0, 0, predictionPoints, _normalize); if (density.Count > 0) { pointNum = 0; foreach (int predictionPointId in predictionPointIds) { pointIdIncidentDensity.EnsureContainsKey(predictionPointId, typeof(Dictionary <string, double>)); pointIdIncidentDensity[predictionPointId].Add(incidentType, density[pointNum++]); } } } } PointPrediction.CreateTable(prediction); PointPrediction.Insert(GetPointPredictionValues(pointIdOverallDensity, pointIdIncidentDensity), prediction, false); Smooth(prediction); } finally { DB.Connection.Return(connection); } }
/// <summary> /// Constructor /// </summary> /// <param name="wordNetDirectory">Path to WorNet directory (the one with the data and index files in it)</param> /// <param name="inMemory">Whether or not to store all data in memory. In-memory storage requires quite a bit of space /// but it is also very quick. The alternative (false) will cause the data to be searched on-disk with an efficient /// binary search algorithm.</param> public WordNetEngine(string wordNetDirectory, bool inMemory) { _wordNetDirectory = wordNetDirectory; _inMemory = inMemory; _posIndexWordSearchStream = null; _posSynSetDataFile = null; if (!System.IO.Directory.Exists(_wordNetDirectory)) { throw new DirectoryNotFoundException("Error 502"); } // get data and index paths string[] dataPaths = new string[] { Path.Combine(_wordNetDirectory, "data.adj"), Path.Combine(_wordNetDirectory, "data.adv"), Path.Combine(_wordNetDirectory, "data.noun"), Path.Combine(_wordNetDirectory, "data.verb") }; string[] indexPaths = new string[] { Path.Combine(_wordNetDirectory, "index.adj"), Path.Combine(_wordNetDirectory, "index.adv"), Path.Combine(_wordNetDirectory, "index.noun"), Path.Combine(_wordNetDirectory, "index.verb") }; // make sure all files exist foreach (string path in dataPaths.Union(indexPaths)) { if (!System.IO.File.Exists(path)) { throw new FileNotFoundException("Error 502"); } } // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // * * // * UPDATE [HASSAN:11/03/2017]: The lemmatizer requires except- * // * tion dictionary for each POS to be loaded as stream * // * * // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * LemmaExcptionsFile = new Dictionary <string, StreamReader>(4); LemmaExcptionsFile.Add("noun", new StreamReader(wordNetDirectory + "\\noun.exc")); LemmaExcptionsFile.Add("verb", new StreamReader(wordNetDirectory + "\\verb.exc")); // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // * * // * UPDATE [HASSAN:11/07/2017]: The lemmatizer requires except- * // * tion dictionary for noun only in the context of SemCluster * // * tool. In order to implement lemmatizer for all 4-POS tags * // * you will need the following: * // * 1) Uncomment the following lines. * // * 2) Uncomment the lines in suffixMap variable. * // * 3) Uncomment the GetSynsets Switch section * // * 4) Add Exception files for each POS in the data folder * // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //LemmaExcptionsFile.Add("adjective", new StreamReader(wordNetDirectory + "\\adj.exc")); //LemmaExcptionsFile.Add("adverb", new StreamReader(wordNetDirectory + "\\adv.exc")); // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // * * // * UPDATE [HASSAN:28/01/2016]: The #region index file sorting * // * has been removed here,since its required to run only for * // * first program execution * // * * // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * #region engine init if (inMemory) { // pass 1: get total number of synsets int totalSynsets = 0; foreach (string dataPath in dataPaths) { // scan synset data file for lines that don't start with a space...these are synset definition lines StreamReader dataFile = new StreamReader(dataPath); string line; while (dataFile.TryReadLine(out line)) { int firstSpace = line.IndexOf(' '); if (firstSpace > 0) { ++totalSynsets; } } } // pass 2: create synset shells (pos and offset only) _idSynset = new Dictionary <string, SynSet>(totalSynsets); foreach (string dataPath in dataPaths) { POS pos = GetFilePOS(dataPath); // scan synset data file StreamReader dataFile = new StreamReader(dataPath); string line; while (dataFile.TryReadLine(out line)) { int firstSpace = line.IndexOf(' '); if (firstSpace > 0) { // get offset and create synset shell int offset = int.Parse(line.Substring(0, firstSpace)); SynSet synset = new SynSet(pos, offset, null); _idSynset.Add(synset.ID, synset); } } } // pass 3: instantiate synsets (hooks up relations, set glosses, etc.) foreach (string dataPath in dataPaths) { POS pos = GetFilePOS(dataPath); // scan synset data file StreamReader dataFile = new StreamReader(dataPath); string line; while (dataFile.TryReadLine(out line)) { int firstSpace = line.IndexOf(' '); if (firstSpace > 0) { // instantiate synset defined on current line, using the instantiated synsets for all references _idSynset[pos + ":" + int.Parse(line.Substring(0, firstSpace))].Instantiate(line, _idSynset); } } } // organize synsets by pos and words _posWordSynSets = new Dictionary <POS, Dictionary <string, List <SynSet> > >(); foreach (string indexPath in indexPaths) { POS pos = GetFilePOS(indexPath); _posWordSynSets.EnsureContainsKey(pos, typeof(Dictionary <string, List <SynSet> >)); // scan word index file, skipping header lines StreamReader indexFile = new StreamReader(indexPath); string line; while (indexFile.TryReadLine(out line)) { int firstSpace = line.IndexOf(' '); if (firstSpace > 0) { // grab word and synset shells string word = line.Substring(0, firstSpace); List <SynSet> synsets = GetSynSetShells(line, pos, null); // use reference to the synsets that we instantiated in our three-pass routine above _posWordSynSets[pos].Add(word, new List <SynSet>(synsets.Count)); foreach (SynSet synset in synsets) { _posWordSynSets[pos][word].Add(_idSynset[synset.ID]); } } } } } else { // open binary search streams for index files _posIndexWordSearchStream = new Dictionary <POS, BinarySearchTextStream>(); foreach (string indexPath in indexPaths) { // create binary search stream for index file BinarySearchTextStream searchStream = new BinarySearchTextStream(indexPath, new BinarySearchTextStream.SearchComparisonDelegate( delegate(string searchWord, string currentLine) { // if we landed on the header text, search further down if (currentLine[0] == ' ') { return(1); } // get word on current line string currentWord = currentLine.Substring(0, currentLine.IndexOf(' ')); // compare searched-for word to the current word return(((string)searchWord).CompareTo(currentWord)); } )); // add search stream for current POS _posIndexWordSearchStream.Add(GetFilePOS(indexPath), searchStream); } // open readers for synset data files _posSynSetDataFile = new Dictionary <POS, StreamReader>(); foreach (string dataPath in dataPaths) { _posSynSetDataFile.Add(GetFilePOS(dataPath), new StreamReader(dataPath)); } } #endregion }