/// <summary> /// Extract a section and its subsections (and their subsections, etc.) from a list of lines /// </summary> /// <param name="lines">Lines to extract from</param> /// <param name="parent">Parent section for top-level sections</param> /// <param name="containingPage">Page that contains these sections</param> /// <param name="sectionLayout">Layout of sections, as produced by Section.Layout.</param> /// <returns>List of Section instances</returns> public static List <Section> ExtractSections(List <string> lines, Section parent, Page containingPage, string sectionLayout) { List <Section> sections = new List <Section>(); XmlParser p = new XmlParser(sectionLayout); while (true) { string def = p.OuterXML("section"); if (def == null) { break; } XmlParser secParser = new XmlParser(def); int parentStart = parent != null ? parent.SectionStart : 0; int absStart = int.Parse(secParser.AttributeValue("section", "start")); int absEnd = int.Parse(secParser.AttributeValue("section", "end")); int numLines = absEnd - absStart + 1; int start = absStart - parentStart; int end = start + numLines - 1; List <string> secLines = Section.ExtractLines(lines, start, end); sections.Add(new Section(secLines, def, parent, containingPage)); } return(sections); }
/// <summary> /// Constructor for non-marked up section /// </summary> /// <param name="lines">Lines to create section from</param> /// <param name="sectionDef">Section definition XML</param> /// <param name="parentSection">Parent section</param> /// <param name="containingPage">Page that contains this section</param> public Section(List <string> lines, string sectionDef, Section parentSection, Page containingPage) : this() { XmlParser defParser = new XmlParser(sectionDef); _lines = lines; _parentSection = parentSection; _containingPage = containingPage; _name = defParser.AttributeValue("section", "name"); _sectionStart = int.Parse(defParser.AttributeValue("section", "start")); _sectionEnd = int.Parse(defParser.AttributeValue("section", "end")); string subsectionDef = defParser.InnerXML("section"); _subSections = ExtractSections(lines, this, containingPage, "<layout>" + subsectionDef + "</layout>"); }
public static void Initialize(string path, bool initializeDB) { if (_initialized) { Console.Out.WriteLine("ATT configuration is already initialized"); return; } _path = path; XmlParser p = new XmlParser(File.ReadAllText(_path)); XmlParser postgresP = new XmlParser(p.OuterXML("postgres")); _postgresHost = postgresP.ElementText("host"); _postgresPort = int.Parse(postgresP.ElementText("port")); _postgresSSL = bool.Parse(postgresP.ElementText("ssl")); _postgresDatabase = postgresP.ElementText("database"); _postgresUser = postgresP.ElementText("user"); _postgresPassword = postgresP.ElementText("password"); _postgresConnectionTimeout = int.Parse(postgresP.ElementText("connection_timeout")); _postgresRetryLimit = int.Parse(postgresP.ElementText("connection_retry_limit")); _postgresCommandTimeout = int.Parse(postgresP.ElementText("command_timeout")); _postgresMaxPoolSize = int.Parse(postgresP.ElementText("max_pool_size")); XmlParser postgisP = new XmlParser(p.OuterXML("postgis")); _shp2pgsqlPath = postgisP.ElementText("shp2pgsql"); _pgsql2shpPath = postgisP.ElementText("pgsql2shp"); _postgisShapefileDirectory = postgisP.ElementText("shapefile_directory"); if (string.IsNullOrWhiteSpace(_shp2pgsqlPath) || !File.Exists(_shp2pgsqlPath)) { throw new FileNotFoundException("Failed to locate shp2pgsql executable. Check configuration."); } if (string.IsNullOrWhiteSpace(_pgsql2shpPath) || !File.Exists(_pgsql2shpPath)) { throw new FileNotFoundException("Failed to locate shp2pgsql executable. Check configuration."); } XmlParser rP = new XmlParser(p.OuterXML("r")); string rExePath = rP.ElementText("exe_path"); if (string.IsNullOrWhiteSpace(rExePath) || !File.Exists(rExePath)) { rExePath = Environment.GetEnvironmentVariable("R_EXE"); } R.ExePath = rExePath; _rPackageInstallDirectory = rP.ElementText("package_install_directory"); if (!string.IsNullOrWhiteSpace(_rPackageInstallDirectory)) { if (!Directory.Exists(_rPackageInstallDirectory)) { Directory.CreateDirectory(_rPackageInstallDirectory); } R.AddLibPath(_rPackageInstallDirectory); } _rCranMirror = rP.ElementText("cran_mirror"); XmlParser javaP = new XmlParser(p.OuterXML("java")); _javaExePath = javaP.ElementText("exe_path"); if (string.IsNullOrWhiteSpace(_javaExePath) || !File.Exists(_javaExePath)) { _javaExePath = Environment.GetEnvironmentVariable("JAVA_EXE"); } if (string.IsNullOrWhiteSpace(_javaExePath) || !File.Exists(_javaExePath)) { throw new FileNotFoundException("Failed to locate java.exe excutable. Check configuration."); } _classifierTypeOptions = new Dictionary <Type, Dictionary <string, string> >(); XmlParser classifiersP = new XmlParser(p.OuterXML("classifiers")); string classifierXML; while ((classifierXML = classifiersP.OuterXML("classifier")) != null) { XmlParser classifierP = new XmlParser(classifierXML); Type type = Reflection.GetType(classifierP.AttributeValue("classifier", "type")); Dictionary <string, string> optionValue = new Dictionary <string, string>(); string option; while ((option = classifierP.MoveToElementNode(false)) != null) { optionValue.Add(option, classifierP.ElementText(option)); } _classifierTypeOptions.Add(type, optionValue); } XmlParser incidentsP = new XmlParser(p.OuterXML("incidents")); _incidentsImportDirectory = incidentsP.ElementText("import_directory"); XmlParser eventsP = new XmlParser(p.OuterXML("events")); _eventsImportDirectory = eventsP.ElementText("import_directory"); XmlParser importersP = new XmlParser(p.OuterXML("importers")); _importersLoadDirectory = importersP.ElementText("load_directory"); XmlParser modelingP = new XmlParser(p.OuterXML("modeling")); _modelsDirectory = modelingP.ElementText("model_directory"); if (string.IsNullOrWhiteSpace(_modelsDirectory)) { _modelsDirectory = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "att", "models"); } if (!Directory.Exists(_modelsDirectory)) { Directory.CreateDirectory(_modelsDirectory); } _modelTypeFeatureExtractorType = new Dictionary <Type, Type>(); _modelTypeFeatureExtractorConfigOptions = new Dictionary <Type, Dictionary <string, string> >(); string featureExtractorsXML = modelingP.OuterXML("feature_extractors"); if (featureExtractorsXML != null) { XmlParser featureExtractorsP = new XmlParser(featureExtractorsXML); string featureExtractorXML; while ((featureExtractorXML = featureExtractorsP.OuterXML("feature_extractor")) != null) { XmlParser featureExtractorConfigP = new XmlParser(featureExtractorXML); Dictionary <string, string> configOptions = new Dictionary <string, string>(); foreach (string option in featureExtractorConfigP.GetAttributeNames("feature_extractor")) { configOptions.Add(option, featureExtractorConfigP.AttributeValue("feature_extractor", option)); } Type modelType = Reflection.GetType(configOptions["model_type"]); // get external feature extractor type string featureExtractorTypeStr = featureExtractorConfigP.ElementText("feature_extractor"); string[] parts = featureExtractorTypeStr.Split('@'); if (parts.Length > 1) { _externalFeatureExtractorDirectory = Path.GetDirectoryName(parts[1]); AppDomain.CurrentDomain.AssemblyResolve += new ResolveEventHandler(LoadExternalFeatureExtractorAssembly); } Type featureExtractorType = Reflection.GetType(featureExtractorTypeStr, null); _modelTypeFeatureExtractorType.Add(modelType, featureExtractorType); _modelTypeFeatureExtractorConfigOptions.Add(modelType, configOptions); } } XmlParser systemP = new XmlParser(p.OuterXML("system")); _processorCount = int.Parse(systemP.ElementText("processor_count")); if (_processorCount == -1) { _processorCount = Environment.ProcessorCount; } if (_processorCount <= 0) { throw new Exception("Invalid processor count (must be >= 1): " + _processorCount); } if (initializeDB) { DB.Initialize(); _initialized = true; } }
/// <summary> /// Builds the Frame index /// </summary> /// <param name="framesDirectory">Directory containing PropBank frame XML files</param> /// <returns>Frame index - a map from verb lemma to corresponding frame</returns> private static Dictionary <string, Frame> BuildFrameIndex(string framesDirectory) { if (!Directory.Exists(framesDirectory)) { throw new Exception("Invalid frame directory: \"" + framesDirectory + "\""); } // build index Dictionary <string, Frame> frameIndex = new Dictionary <string, Frame>(); // get each frame file foreach (string frameFilePath in Directory.GetFiles(framesDirectory)) { Frame currFrame = null; // read entire file and create XML parser StreamReader frameFile = new StreamReader(frameFilePath); string frameXML = frameFile.ReadToEnd(); frameFile.Close(); XmlParser frameP = new XmlParser(frameXML); // get role sets string roleSetXML; while ((roleSetXML = frameP.OuterXML("roleset")) != null) { XmlParser roleSetP = new XmlParser(roleSetXML); // get role set ID string in verb.id format string roleSetIdStr = roleSetP.AttributeValue("roleset", "id").Trim(); int dotIndex = roleSetIdStr.IndexOf('.'); // get role set verb string roleSetVerb = roleSetIdStr.Substring(0, dotIndex); // if this is the first role set, create the frame if (currFrame == null) { currFrame = new Frame(roleSetVerb); } // all role sets must use the same verb else if (roleSetVerb != currFrame.Verb) { throw new Exception("Role set verb mismatch"); } // get role set ID/name and create role set int roleSetID = int.Parse(roleSetIdStr.Substring(dotIndex + 1)); string roleSetName = roleSetP.AttributeValue("roleset", "name"); RoleSet roleSet = new RoleSet(roleSetID, roleSetName); // get roles string roleXML; while ((roleXML = roleSetP.OuterXML("role")) != null) { XmlParser roleP = new XmlParser(roleXML); string description = roleP.AttributeValue("role", "descr"); string roleNumber = roleP.AttributeValue("role", "n").ToLower(); // skip modifier and agentive modifier if (roleNumber == "m" || roleNumber == "a") { continue; } Role role = new Role(description, int.Parse(roleNumber)); roleSet.Add(role); } // add role set to frame currFrame.AddRoleSet(roleSet); } frameIndex.Add(currFrame.Verb, currFrame); } return(frameIndex); }
/// <summary> /// Gets annotations for a lexical unit /// </summary> /// <param name="frame">Frame for which we're getting annotations</param> /// <param name="lexicalUnitID">ID of lexical unit for which to get annotations</param> /// <returns>Annotation information</returns> public List <Attestation> GetAttestations(Frame frame, int lexicalUnitID) { List <Attestation> attestations = new List <Attestation>(); // return nothing if no file exists string attestationFilePath = Path.Combine(_annotationDirectory, "lu" + lexicalUnitID + ".xml"); if (!File.Exists(attestationFilePath)) { return(attestations); } // constraints to skip to FE layer Dictionary <string, string> feAttributeConstraints = new Dictionary <string, string>(); feAttributeConstraints.Add("name", "FE"); // constraints to skip to Target layer Dictionary <string, string> targetAttributeConstraints = new Dictionary <string, string>(); targetAttributeConstraints.Add("name", "Target"); // get all attestations XmlParser attestationP = new XmlParser(File.ReadAllText(attestationFilePath)); if (_version == FrameNetEngine.Version.FrameNet_1_3) { string annotationSetXML; while ((annotationSetXML = attestationP.OuterXML("annotationSet")) != null) { Attestation annotation = new Attestation(); // parser for entire annotation set XmlParser annotationSetP = new XmlParser(annotationSetXML); // first get sentence...it is below the annotation layers annotation.Sentence = new XmlParser(annotationSetP.OuterXML("sentence")).ElementText("text").Trim(); #region get fe bindings // parser is forward-only, so rewind annotationSetP.Reset(); // get FE bindings if (!annotationSetP.SkipToElement("layer", feAttributeConstraints)) { throw new Exception("Failed to find FE layer in annotation set"); } string feBindingXML = annotationSetP.OuterXML("layer"); XmlParser feBindingP = new XmlParser(feBindingXML); // read off FE binding labels string labelXML; while ((labelXML = feBindingP.OuterXML("label")) != null) { XmlParser labelP = new XmlParser(labelXML); // skip null instantiations, which don't have start/end values if (!labelP.GetAttributeNames("label").Contains("start")) { continue; } // get annotated span of text int feStart = int.Parse(labelP.AttributeValue("label", "start")); int feEnd = int.Parse(labelP.AttributeValue("label", "end")); string feText = annotation.Sentence.Substring(feStart, feEnd - feStart + 1); AnnotatedSpan span = new AnnotatedSpan(feStart, feText); // add FE binding...we shouldn't have to check for the existence of a frame element, but errors abound! string feName = labelP.AttributeValue("label", "name"); if (frame.FrameElements.Contains(feName)) { FrameElement fe = frame.FrameElements.Get(feName); annotation.FrameElementBindings.EnsureContainsKey(fe, typeof(List <AnnotatedSpan>)); annotation.FrameElementBindings[fe].Add(span); } } #endregion #region targets // get target annotation...reset parser...sometimes the target comes before the FE layer annotationSetP.Reset(); if (!annotationSetP.SkipToElement("layer", targetAttributeConstraints)) { throw new Exception("Failed to find target layer in annotation set"); } // read all targets XmlParser targetP = new XmlParser(annotationSetP.OuterXML("layer")); while ((labelXML = targetP.OuterXML("label")) != null) { XmlParser labelP = new XmlParser(labelXML); int targetStart = int.Parse(labelP.AttributeValue("label", "start")); int targetEnd = int.Parse(labelP.AttributeValue("label", "end")); string targetText = annotation.Sentence.Substring(targetStart, targetEnd - targetStart + 1); annotation.Targets.Add(new AnnotatedSpan(targetStart, targetText)); } #endregion attestations.Add(annotation); } } else if (_version == FrameNetEngine.Version.FrameNet_1_5) { string sentenceXML; while ((sentenceXML = attestationP.OuterXML("sentence")) != null) { Attestation annotation = new Attestation(); XmlParser sentenceP = new XmlParser(sentenceXML); annotation.Sentence = sentenceP.ElementText("text").Trim(); #region get fe bindings if (!sentenceP.SkipToElement("layer", feAttributeConstraints)) { throw new Exception("Failed to find FE layer in annotation set"); } // read off FE binding labels XmlParser feBindingP = new XmlParser(sentenceP.OuterXML("layer")); string labelXML; while ((labelXML = feBindingP.OuterXML("label")) != null) { XmlParser labelP = new XmlParser(labelXML); // skip null instantiations, which don't have start/end values if (!labelP.GetAttributeNames("label").Contains("start")) { continue; } // get annotated span of text int feStart = int.Parse(labelP.AttributeValue("label", "start")); int feEnd = int.Parse(labelP.AttributeValue("label", "end")); string feText = annotation.Sentence.Substring(feStart, feEnd - feStart + 1); AnnotatedSpan span = new AnnotatedSpan(feStart, feText); // add FE binding...we shouldn't have to check for the existence of a frame element, but errors abound! int feID = int.Parse(labelP.AttributeValue("label", "feID")); if (frame.FrameElements.Contains(feID)) { FrameElement fe = frame.FrameElements.Get(feID); annotation.FrameElementBindings.EnsureContainsKey(fe, typeof(List <AnnotatedSpan>)); annotation.FrameElementBindings[fe].Add(span); } } #endregion #region targets // get target annotation...reset parser...sometimes the target comes before the FE layer sentenceP.Reset(); if (!sentenceP.SkipToElement("layer", targetAttributeConstraints)) { throw new Exception("Failed to find target layer in annotation set"); } // read all targets XmlParser targetP = new XmlParser(sentenceP.OuterXML("layer")); while ((labelXML = targetP.OuterXML("label")) != null) { XmlParser labelP = new XmlParser(labelXML); int targetEnd = int.Parse(labelP.AttributeValue("label", "end")); int targetStart = int.Parse(labelP.AttributeValue("label", "start")); // bug in framenet: bad sentence if (targetStart >= annotation.Sentence.Length || targetEnd >= annotation.Sentence.Length) { continue; } string targetText = annotation.Sentence.Substring(targetStart, targetEnd - targetStart + 1); annotation.Targets.Add(new AnnotatedSpan(targetStart, targetText)); } #endregion attestations.Add(annotation); } } else { throw new Exception("Unrecognized FrameNet version: " + _version); } return(attestations); }
/// <summary> /// Extracts a VerbNet class from its XML definition /// </summary> /// <param name="classXML">XML for class</param> /// <param name="isSubClassXML">Whether or not the XML describes a sub-class</param> /// <returns>VerbClass</returns> private VerbClass ExtractClass(string classXML, bool isSubClassXML) { // extract class from given XML VerbClass vnClass = null; XmlParser classP = new XmlParser(classXML); string classTag = isSubClassXML ? "VNSUBCLASS" : "VNCLASS"; string vnClassXML; while ((vnClassXML = classP.OuterXML(classTag)) != null) { XmlParser vnClassP = new XmlParser(vnClassXML); // get id, using only the dotted number portion and using 0 as the root of all classes string id = vnClassP.AttributeValue(classTag, "ID").Trim().Replace('-', '.'); id = "0." + id.Substring(id.IndexOf('.') + 1); // create class vnClass = new VerbClass(id); // extract verbs XmlParser membersP = new XmlParser(vnClassP.OuterXML("MEMBERS")); string verb; while ((verb = membersP.AttributeValue("MEMBER", "name")) != null) { vnClass.AddVerb(verb); membersP.MoveToElementNode(false); } // extract thematic roles XmlParser rolesP = new XmlParser(vnClassP.OuterXML("THEMROLES")); string role; while ((role = rolesP.AttributeValue("THEMROLE", "type")) != null) { vnClass.AddThematicRole((ThematicRole)Enum.Parse(typeof(ThematicRole), role)); rolesP.MoveToElementNode(false); } // extract examples XmlParser examplesP = new XmlParser(vnClassP.OuterXML("FRAMES")); string example; while ((example = examplesP.ElementText("EXAMPLE")) != null) { vnClass.AddExample(example); } // extract subclasses XmlParser subClassesP = new XmlParser(vnClassP.OuterXML("SUBCLASSES")); string subClassXML; while ((subClassXML = subClassesP.OuterXML("VNSUBCLASS")) != null) { vnClass.AddChild(ExtractClass(subClassXML, true)); } // map id to verb class _idVerbClass.Add(vnClass.ID, vnClass); } if (vnClass == null) { throw new Exception("Invalid class XML"); } return(vnClass); }
public static void Initialize(string path, bool initializeDB) { if(_initialized) { Console.Out.WriteLine("ATT configuration is already initialized"); return; } _path = path; XmlParser p = new XmlParser(File.ReadAllText(_path)); XmlParser postgresP = new XmlParser(p.OuterXML("postgres")); _postgresHost = postgresP.ElementText("host"); _postgresPort = int.Parse(postgresP.ElementText("port")); _postgresSSL = bool.Parse(postgresP.ElementText("ssl")); _postgresDatabase = postgresP.ElementText("database"); _postgresUser = postgresP.ElementText("user"); _postgresPassword = postgresP.ElementText("password"); _postgresConnectionTimeout = int.Parse(postgresP.ElementText("connection_timeout")); _postgresRetryLimit = int.Parse(postgresP.ElementText("connection_retry_limit")); _postgresCommandTimeout = int.Parse(postgresP.ElementText("command_timeout")); _postgresMaxPoolSize = int.Parse(postgresP.ElementText("max_pool_size")); XmlParser postgisP = new XmlParser(p.OuterXML("postgis")); _shp2pgsqlPath = postgisP.ElementText("shp2pgsql"); _pgsql2shpPath = postgisP.ElementText("pgsql2shp"); _postgisShapefileDirectory = postgisP.ElementText("shapefile_directory"); if (string.IsNullOrWhiteSpace(_shp2pgsqlPath) || !File.Exists(_shp2pgsqlPath)) throw new FileNotFoundException("Failed to locate shp2pgsql executable. Check configuration."); if (string.IsNullOrWhiteSpace(_pgsql2shpPath) || !File.Exists(_pgsql2shpPath)) throw new FileNotFoundException("Failed to locate shp2pgsql executable. Check configuration."); XmlParser rP = new XmlParser(p.OuterXML("r")); string rExePath = rP.ElementText("exe_path"); if (string.IsNullOrWhiteSpace(rExePath) || !File.Exists(rExePath)) rExePath = Environment.GetEnvironmentVariable("R_EXE"); R.ExePath = rExePath; _rPackageInstallDirectory = rP.ElementText("package_install_directory"); if (!string.IsNullOrWhiteSpace(_rPackageInstallDirectory)) { if (!Directory.Exists(_rPackageInstallDirectory)) Directory.CreateDirectory(_rPackageInstallDirectory); R.AddLibPath(_rPackageInstallDirectory); } _rCranMirror = rP.ElementText("cran_mirror"); XmlParser javaP = new XmlParser(p.OuterXML("java")); _javaExePath = javaP.ElementText("exe_path"); if (string.IsNullOrWhiteSpace(_javaExePath) || !File.Exists(_javaExePath)) _javaExePath = Environment.GetEnvironmentVariable("JAVA_EXE"); if (string.IsNullOrWhiteSpace(_javaExePath) || !File.Exists(_javaExePath)) throw new FileNotFoundException("Failed to locate java.exe excutable. Check configuration."); _classifierTypeOptions = new Dictionary<Type, Dictionary<string, string>>(); XmlParser classifiersP = new XmlParser(p.OuterXML("classifiers")); string classifierXML; while ((classifierXML = classifiersP.OuterXML("classifier")) != null) { XmlParser classifierP = new XmlParser(classifierXML); Type type = Reflection.GetType(classifierP.AttributeValue("classifier", "type")); Dictionary<string, string> optionValue = new Dictionary<string, string>(); string option; while ((option = classifierP.MoveToElementNode(false)) != null) optionValue.Add(option, classifierP.ElementText(option)); _classifierTypeOptions.Add(type, optionValue); } XmlParser incidentsP = new XmlParser(p.OuterXML("incidents")); _incidentsImportDirectory = incidentsP.ElementText("import_directory"); XmlParser eventsP = new XmlParser(p.OuterXML("events")); _eventsImportDirectory = eventsP.ElementText("import_directory"); XmlParser importersP = new XmlParser(p.OuterXML("importers")); _importersLoadDirectory = importersP.ElementText("load_directory"); XmlParser modelingP = new XmlParser(p.OuterXML("modeling")); _modelsDirectory = modelingP.ElementText("model_directory"); if (string.IsNullOrWhiteSpace(_modelsDirectory)) _modelsDirectory = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "att", "models"); if (!Directory.Exists(_modelsDirectory)) Directory.CreateDirectory(_modelsDirectory); _modelTypeFeatureExtractorType = new Dictionary<Type, Type>(); _modelTypeFeatureExtractorConfigOptions = new Dictionary<Type, Dictionary<string, string>>(); string featureExtractorsXML = modelingP.OuterXML("feature_extractors"); if (featureExtractorsXML != null) { XmlParser featureExtractorsP = new XmlParser(featureExtractorsXML); string featureExtractorXML; while ((featureExtractorXML = featureExtractorsP.OuterXML("feature_extractor")) != null) { XmlParser featureExtractorConfigP = new XmlParser(featureExtractorXML); Dictionary<string, string> configOptions = new Dictionary<string, string>(); foreach (string option in featureExtractorConfigP.GetAttributeNames("feature_extractor")) configOptions.Add(option, featureExtractorConfigP.AttributeValue("feature_extractor", option)); Type modelType = Reflection.GetType(configOptions["model_type"]); // get external feature extractor type string featureExtractorTypeStr = featureExtractorConfigP.ElementText("feature_extractor"); string[] parts = featureExtractorTypeStr.Split('@'); if (parts.Length > 1) { _externalFeatureExtractorDirectory = Path.GetDirectoryName(parts[1]); AppDomain.CurrentDomain.AssemblyResolve += new ResolveEventHandler(LoadExternalFeatureExtractorAssembly); } Type featureExtractorType = Reflection.GetType(featureExtractorTypeStr, null); _modelTypeFeatureExtractorType.Add(modelType, featureExtractorType); _modelTypeFeatureExtractorConfigOptions.Add(modelType, configOptions); } } XmlParser systemP = new XmlParser(p.OuterXML("system")); _processorCount = int.Parse(systemP.ElementText("processor_count")); if (_processorCount == -1) _processorCount = Environment.ProcessorCount; if (_processorCount <= 0) throw new Exception("Invalid processor count (must be >= 1): " + _processorCount); if (initializeDB) { DB.Initialize(); _initialized = true; } }
/// <summary> /// Constructor /// </summary> /// <param name="frameNetDirectory">Path to FrameNet distribution directory</param> /// <param name="version">FrameNet version</param> public FrameNetEngine(string frameNetDirectory, Version version) { if (!System.IO.Directory.Exists(frameNetDirectory)) { throw new DirectoryNotFoundException("Invalid FrameNet directory"); } _frameNameFrame = new Dictionary <string, Frame>(); _frameElementIdFrameElement = new Dictionary <int, FrameElement>(); _lexemeLexicalUnitIDs = new Dictionary <string, Set <int> >(); _lexicalUnitIdFrame = new Dictionary <int, Frame>(); _lexicalUnitLexicalUnitIDs = new Dictionary <string, Set <int> >(); _lexicalUnitIdLexicalUnit = new Dictionary <int, LexicalUnit>(); if (version == Version.FrameNet_1_3) { // init annotation engine _lexicalUnitAnnotationEngine = new LexicalUnitAnnotationEngine(LAIR.CommonPort.IO.Directory.FindDirectory(frameNetDirectory, "luXML"), version); #region get frames Set <int> uniqueFrameIDCheck = new Set <int>(); XmlParser framesP = new XmlParser(System.IO.File.ReadAllText(LAIR.CommonPort.IO.Directory.FindFile(frameNetDirectory, "frames.xml"))); while (framesP.SkipToElement("frame")) { // create frame string frameXML = framesP.OuterXML("frame"); XmlParser frameP = new XmlParser(frameXML); int frameID = int.Parse(frameP.AttributeValue("frame", "ID")); string frameName = frameP.AttributeValue("frame", "name").ToLower().Trim(); // use lowercase for all frame names string frameDefinition = frameP.ElementText("definition"); Frame frame = new Frame(frameName, frameDefinition, frameID); // add to frame index index _frameNameFrame.Add(frame.Name, frame); uniqueFrameIDCheck.Add(frame.ID); // get frame elements string fesXML = frameP.OuterXML("fes"); XmlParser fesP = new XmlParser(fesXML); string feXML; while ((feXML = fesP.OuterXML("fe")) != null) { // get frame element XmlParser feParser = new XmlParser(feXML); int feID = int.Parse(feParser.AttributeValue("fe", "ID")); string feName = feParser.AttributeValue("fe", "name").Trim().ToLower(); string feDef = feParser.ElementText("definition"); FrameElement fe = new FrameElement(feID, feName, feDef, frame); frame.FrameElements.Add(fe); // add to index _frameElementIdFrameElement.Add(fe.ID, fe); } // get lexical units string lusXML = frameP.OuterXML("lexunits"); XmlParser lusParser = new XmlParser(lusXML); string luXML; while ((luXML = lusParser.OuterXML("lexunit")) != null) { XmlParser luParser = new XmlParser(luXML); int luID = int.Parse(luParser.AttributeValue("lexunit", "ID")); string luName = luParser.AttributeValue("lexunit", "name"); luName = luName.Substring(0, luName.IndexOf('.')); string luPos = luParser.AttributeValue("lexunit", "pos"); string luDef = luParser.ElementText("definition"); // get lexemes for this lexunit...we may get duplicates...don't worry about them Set <Lexeme> lexemes = new Set <Lexeme>(false); string lexemesXML = luParser.OuterXML("lexemes"); XmlParser lexemesP = new XmlParser(lexemesXML); string lexemeXML; while ((lexemeXML = lexemesP.OuterXML("lexeme")) != null) { XmlParser lexemeP = new XmlParser(lexemeXML); string pos = lexemeP.AttributeValue("lexeme", "pos"); bool breakBefore = bool.Parse(lexemeP.AttributeValue("lexeme", "breakBefore")); bool head = bool.Parse(lexemeP.AttributeValue("lexeme", "headword")); string value = lexemeP.ElementText("lexeme"); lexemes.Add(new Lexeme(value, pos, breakBefore, head)); } // create lexical unit and add to frame LexicalUnit lexicalUnit = new LexicalUnit(luID, luName, luPos, luDef, lexemes); frame.LexicalUnits.Add(lexicalUnit); // add map from full lexeme string to lexical unit id string lexemeString = lexicalUnit.ToString(); _lexemeLexicalUnitIDs.EnsureContainsKey(lexemeString, typeof(Set <int>), false); _lexemeLexicalUnitIDs[lexemeString].Add(luID); // add map from lexical unit to frame _lexicalUnitIdFrame.Add(lexicalUnit.ID, frame); // add map from lexical unit to lexical unit id _lexicalUnitLexicalUnitIDs.EnsureContainsKey(lexicalUnit.Name, typeof(Set <int>)); _lexicalUnitLexicalUnitIDs[lexicalUnit.Name].Add(lexicalUnit.ID); // add map from lexical unit ID to lexical unit _lexicalUnitIdLexicalUnit.Add(lexicalUnit.ID, lexicalUnit); } } #endregion #region get frame relations framesP = new XmlParser(System.IO.File.ReadAllText(LAIR.CommonPort.IO.Directory.FindFile(frameNetDirectory, "frRelation.xml"))); string relationsXML; while ((relationsXML = framesP.OuterXML("frame-relation-type")) != null) { // get relation type XmlParser relationsP = new XmlParser(relationsXML); Frame.FrameRelation relation = Frame.GetFrameRelation(relationsP.AttributeValue("frame-relation-type", "name")); string relationXML; while ((relationXML = relationsP.OuterXML("frame-relation")) != null) { XmlParser relationP = new XmlParser(relationXML); string superFrameName = relationP.AttributeValue("frame-relation", "superFrameName").ToLower(); string subFrameName = relationP.AttributeValue("frame-relation", "subFrameName").ToLower(); Frame superFrame = _frameNameFrame[superFrameName]; Frame subFrame = _frameNameFrame[subFrameName]; superFrame.GetSubFrames(relation).Add(subFrame); subFrame.GetSuperFrames(relation).Add(superFrame); // add FE relations while (relationP.SkipToElement("fe-relation")) { int superFeID = int.Parse(relationP.AttributeValue("fe-relation", "supId")); int subFeID = int.Parse(relationP.AttributeValue("fe-relation", "subId")); FrameElement superFE = superFrame.FrameElements.Get(superFeID); FrameElement subFE = subFrame.FrameElements.Get(subFeID); superFE.AddSubFrameElement(subFE, relation); subFE.AddSuperFrameElement(superFE, relation); } } } #endregion } else if (version == Version.FrameNet_1_5) { // init annotation engine _lexicalUnitAnnotationEngine = new LexicalUnitAnnotationEngine(LAIR.CommonPort.IO.Directory.FindDirectory(frameNetDirectory, "lu"), version); #region get frames Set <int> uniqueFrameIDCheck = new Set <int>(); foreach (string framePath in System.IO.Directory.GetFiles(LAIR.CommonPort.IO.Directory.FindDirectory(frameNetDirectory, "frame"), "*.xml")) { // create frame XmlParser frameP = new XmlParser(System.IO.File.ReadAllText(framePath)); int frameID = int.Parse(frameP.AttributeValue("frame", "ID")); string frameName = frameP.AttributeValue("frame", "name").ToLower().Trim(); // use lowercase for all frame names string frameDefinition = frameP.ElementText("definition"); Frame frame = new Frame(frameName, frameDefinition, frameID); // add to frame index index _frameNameFrame.Add(frame.Name, frame); uniqueFrameIDCheck.Add(frame.ID); // get frame elements string feXML; while ((feXML = frameP.OuterXML("FE")) != null) { // get frame element XmlParser feParser = new XmlParser(feXML); int feID = int.Parse(feParser.AttributeValue("FE", "ID")); string feName = feParser.AttributeValue("FE", "name").Trim().ToLower(); string feDef = feParser.ElementText("definition"); FrameElement fe = new FrameElement(feID, feName, feDef, frame); frame.FrameElements.Add(fe); // add to index _frameElementIdFrameElement.Add(fe.ID, fe); } // get lexical units frameP.Reset(); string luXML; while ((luXML = frameP.OuterXML("lexUnit")) != null) { XmlParser luParser = new XmlParser(luXML); string luPos = luParser.AttributeValue("lexUnit", "POS"); string luName = luParser.AttributeValue("lexUnit", "name"); luName = luName.Substring(0, luName.IndexOf('.')); int luID = int.Parse(luParser.AttributeValue("lexUnit", "ID")); string luDef = luParser.ElementText("definition"); // get lexemes for this lexunit...we may get duplicates...don't worry about them Set <Lexeme> lexemes = new Set <Lexeme>(false); string lexemeXML; while ((lexemeXML = luParser.OuterXML("lexeme")) != null) { XmlParser lexemeP = new XmlParser(lexemeXML); bool head = bool.Parse(lexemeP.AttributeValue("lexeme", "headword")); bool breakBefore = bool.Parse(lexemeP.AttributeValue("lexeme", "breakBefore")); string pos = lexemeP.AttributeValue("lexeme", "POS"); string value = lexemeP.AttributeValue("lexeme", "name"); lexemes.Add(new Lexeme(value, pos, breakBefore, head)); } // create lexical unit and add to frame LexicalUnit lexicalUnit = new LexicalUnit(luID, luName, luPos, luDef, lexemes); frame.LexicalUnits.Add(lexicalUnit); // add map from full lexeme string to lexical unit id string lexemeString = lexicalUnit.ToString(); _lexemeLexicalUnitIDs.EnsureContainsKey(lexemeString, typeof(Set <int>), false); _lexemeLexicalUnitIDs[lexemeString].Add(luID); // add map from lexical unit to frame _lexicalUnitIdFrame.Add(lexicalUnit.ID, frame); // add map from lexical unit to lexical unit id _lexicalUnitLexicalUnitIDs.EnsureContainsKey(lexicalUnit.Name, typeof(Set <int>)); _lexicalUnitLexicalUnitIDs[lexicalUnit.Name].Add(lexicalUnit.ID); // add map from lexical unit ID to lexical unit _lexicalUnitIdLexicalUnit.Add(lexicalUnit.ID, lexicalUnit); } } #endregion #region get relations XmlParser allRelationsP = new XmlParser(System.IO.File.ReadAllText(LAIR.CommonPort.IO.Directory.FindFile(frameNetDirectory, "frRelation.xml"))); string relationsXML; while ((relationsXML = allRelationsP.OuterXML("frameRelationType")) != null) { // get relation type XmlParser relationsP = new XmlParser(relationsXML); Frame.FrameRelation relation = Frame.GetFrameRelation(relationsP.AttributeValue("frameRelationType", "name")); // read each instance of the relation string relationXML; while ((relationXML = relationsP.OuterXML("frameRelation")) != null) { XmlParser relationP = new XmlParser(relationXML); // get related frames Frame subFrame = _frameNameFrame[relationP.AttributeValue("frameRelation", "subFrameName").ToLower()]; Frame superFrame = _frameNameFrame[relationP.AttributeValue("frameRelation", "superFrameName").ToLower()]; subFrame.GetSuperFrames(relation).Add(superFrame); superFrame.GetSubFrames(relation).Add(subFrame); // add FE relations while (relationP.SkipToElement("FERelation")) { FrameElement subFE = subFrame.FrameElements.Get(int.Parse(relationP.AttributeValue("FERelation", "subID"))); FrameElement superFE = superFrame.FrameElements.Get(int.Parse(relationP.AttributeValue("FERelation", "supID"))); subFE.AddSuperFrameElement(superFE, relation); superFE.AddSubFrameElement(subFE, relation); } } } #endregion } else { throw new Exception("Unrecognized FrameNet version: " + version); } }
/// <summary> /// Reads the point log for this prediction. The key is the point ID, which is mapped to two lists of tuples. The first /// list contains the labels and their confidence scores and the second list contains the feature IDs and their values. /// </summary> /// <param name="pointPredictionLogPath">Path to point prediction log</param> /// <param name="pointIds">Point IDs to read log for, or null for all points.</param> /// <returns></returns> public override Dictionary<string, Tuple<List<Tuple<string, double>>, List<Tuple<string, string>>>> ReadPointPredictionLog(string pointPredictionLogPath, Set<string> pointIds = null) { Dictionary<string, Tuple<List<Tuple<string, double>>, List<Tuple<string, string>>>> log = new Dictionary<string, Tuple<List<Tuple<string, double>>, List<Tuple<string, string>>>>(); using (FileStream pointPredictionLogFile = new FileStream(pointPredictionLogPath, FileMode.Open, FileAccess.Read)) using (GZipStream pointPredictionLogGzip = new GZipStream(pointPredictionLogFile, CompressionMode.Decompress)) using (StreamReader pointPredictionLog = new StreamReader(pointPredictionLogGzip)) { string line; while ((line = pointPredictionLog.ReadLine()) != null) { string pointId = line.Substring(0, line.IndexOf(' ')); if (pointIds == null || pointIds.Contains(pointId)) { XmlParser pointP = new XmlParser(line.Substring(line.IndexOf(' ') + 1)); List<Tuple<string, double>> labelConfidences = new List<Tuple<string, double>>(); XmlParser labelsP = new XmlParser(pointP.OuterXML("ls")); string labelXML; while ((labelXML = labelsP.OuterXML("l")) != null) { XmlParser labelP = new XmlParser(labelXML); double confidence = double.Parse(labelP.AttributeValue("l", "c")); string label = labelP.ElementText("l"); labelConfidences.Add(new Tuple<string, double>(label, confidence)); } List<Tuple<string, string>> featureValues = new List<Tuple<string, string>>(); XmlParser featureValuesP = new XmlParser(pointP.OuterXML("fvs")); string featureValueXML; while ((featureValueXML = featureValuesP.OuterXML("fv")) != null) { XmlParser featureValueP = new XmlParser(featureValueXML); featureValues.Add(new Tuple<string, string>(featureValueP.AttributeValue("fv", "id"), featureValueP.ElementText("fv"))); } log.Add(pointId, new Tuple<List<Tuple<string, double>>, List<Tuple<string, string>>>(labelConfidences, featureValues)); if (pointIds != null) { pointIds.Remove(pointId); if (pointIds.Count == 0) break; } } } pointPredictionLog.Close(); } return log; }
public string AttributeValue(string roleset, string p1) { return(innerParser.AttributeValue(roleset, p1)); }
/// <summary> /// Constructor /// </summary> /// <param name="dataDirectory">Path to the SemLink data directory</param> public SemLinkEngine(string dataDirectory) { _dataDirectory = dataDirectory; #region propbank-verbnet // check for file if (!File.Exists(PropBankVerbNetLinkingPath)) { throw new FileNotFoundException("Failed to find PropBank-VerbNet mapping file: " + PropBankVerbNetLinkingPath); } // read each predicate mapping _propBankRoleVerbNetRoles = new Dictionary <string, Set <string> >(); XmlParser propBankVerbNetP = new XmlParser(File.ReadAllText(PropBankVerbNetLinkingPath)); string predicateXML; while ((predicateXML = propBankVerbNetP.OuterXML("predicate")) != null) { XmlParser predicateP = new XmlParser(predicateXML); string pbVerb = predicateP.AttributeValue("predicate", "lemma").Trim(); if (pbVerb == "") { throw new Exception("Blank PropBank verb"); } // get argument mappings string argMapXML; while ((argMapXML = predicateP.OuterXML("argmap")) != null) { XmlParser argMapP = new XmlParser(argMapXML); // get role set for current argument mapping string pbRoleSetStr = argMapP.AttributeValue("argmap", "pb-roleset"); int pbRoleSet = int.Parse(pbRoleSetStr.Substring(pbRoleSetStr.IndexOf('.') + 1)); if (pbRoleSet <= 0) { throw new Exception("Invalid PropBank role set: " + pbRoleSet); } // get verbnet class, using periods instead of dashes string vnClass = argMapP.AttributeValue("argmap", "vn-class").Trim().Replace("-", "."); if (vnClass == "") { throw new Exception("Blank VerbNet class"); } // read argument mapping string roleXML; while ((roleXML = argMapP.OuterXML("role")) != null) { XmlParser roleP = new XmlParser(roleXML); // get fully-specified propbank role string pbArgStr = roleP.AttributeValue("role", "pb-arg"); if (pbArgStr == "M" || pbArgStr == "A") { continue; } int pbArg = int.Parse(pbArgStr); string fullPbRole = pbVerb + "." + pbRoleSet + "." + pbArg; // get fully-specified verbnet role string vnRole = roleP.AttributeValue("role", "vn-theta").Trim(); if (vnRole == "") { throw new Exception("Blank VerbNet role"); } string fullVnRole = vnClass + "." + vnRole; // create entry _propBankRoleVerbNetRoles.EnsureContainsKey(fullPbRole, typeof(Set <string>)); _propBankRoleVerbNetRoles[fullPbRole].Add(fullVnRole); } } } // map verbnet to propbank _verbNetRolePropBankRoles = new Dictionary <string, Set <string> >(); foreach (string propBankRole in _propBankRoleVerbNetRoles.Keys) { foreach (string verbNetRole in _propBankRoleVerbNetRoles[propBankRole]) { _verbNetRolePropBankRoles.EnsureContainsKey(verbNetRole, typeof(Set <string>)); _verbNetRolePropBankRoles[verbNetRole].Add(propBankRole); } } #endregion #region verbnet-framenet // check for file if (!File.Exists(FrameNetVerbNetLinkingPath)) { throw new FileNotFoundException("Failed to find FrameNet-VerbNet mapping file: " + FrameNetVerbNetLinkingPath); } // read each mapping _verbNetRoleFrameElements = new Dictionary <string, Set <string> >(); XmlParser verbNetFrameNetP = new XmlParser(File.ReadAllText(FrameNetVerbNetLinkingPath)); string vnClassXML; while ((vnClassXML = verbNetFrameNetP.OuterXML("vncls")) != null) { XmlParser vnClassP = new XmlParser(vnClassXML); // get verbnet class and framenet frame string vnClass = vnClassP.AttributeValue("vncls", "class").Trim().Replace("-", "."); string frame = vnClassP.AttributeValue("vncls", "fnframe").Trim().ToLower(); // get each role mapping string roleXML; while ((roleXML = vnClassP.OuterXML("role")) != null) { // get fe and vn role XmlParser roleP = new XmlParser(roleXML); string fe = frame + "." + roleP.AttributeValue("role", "fnrole").Trim().ToLower(); string vnRole = vnClass + "." + roleP.AttributeValue("role", "vnrole").Trim(); // add to list of FEs for vn role _verbNetRoleFrameElements.EnsureContainsKey(vnRole, typeof(Set <string>), false); _verbNetRoleFrameElements[vnRole].Add(fe); } } // map frame elements to verbnet roles _frameElementVerbNetRoles = new Dictionary <string, Set <string> >(); foreach (string verbNetRole in _verbNetRoleFrameElements.Keys) { foreach (string frameElement in _verbNetRoleFrameElements[verbNetRole]) { _frameElementVerbNetRoles.EnsureContainsKey(frameElement, typeof(Set <string>)); _frameElementVerbNetRoles[frameElement].Add(verbNetRole); } } #endregion }