internal static sbyte[] openFeatureGeneratorBytes(Jfile featureGenDescriptorFile) { sbyte[] featureGeneratorBytes = null; // load descriptor file into memory if (featureGenDescriptorFile != null) { InputStream bytesIn = CmdLineUtil.openInFile(featureGenDescriptorFile); try { featureGeneratorBytes = ModelUtil.read(bytesIn); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { bytesIn.close(); } catch (IOException) { // sorry that this can fail } } } return(featureGeneratorBytes); }
public override ObjectStream <T> create <T>(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); int typesToGenerate = 0; if (@params.Types.Contains("DNA")) { typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_DNA_ENTITIES; } else if (@params.Types.Contains("protein")) { typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_PROTEIN_ENTITIES; } else if (@params.Types.Contains("cell_type")) { typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_CELLTYPE_ENTITIES; } else if (@params.Types.Contains("cell_line")) { typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_CELLLINE_ENTITIES; } else if (@params.Types.Contains("RNA")) { typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_RNA_ENTITIES; } return(new BioNLP2004NameSampleStream(CmdLineUtil.openInFile(@params.Data), typesToGenerate)); }
public override ObjectStream <CorefSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); CmdLineUtil.checkInputFile("Data", @params.Data); FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data); ObjectStream <string> lineStream = new ParagraphStream(new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding)); return(new CorefSampleDataStream(lineStream)); }
public override ObjectStream <NameSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); language = @params.Lang; FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data); ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding); return(new ADNameSampleStream(lineStream, @params.SplitHyphenatedTokens.Value)); }
public override ObjectStream <POSSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); language = @params.Lang; FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data); ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding); ADPOSSampleStream sentenceStream = new ADPOSSampleStream(lineStream, @params.ExpandME.Value, @params.IncludeFeatures.Value); return(sentenceStream); }
public override ObjectStream<DocumentSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); language = @params.Lang; try { return new LeipzigDoccatSampleStream(@params.Lang, 20, CmdLineUtil.openInFile(@params.Data)); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while opening sample data: " + e.Message, e); } }
public override ObjectStream <NameSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); // TODO: support the other languages with this CoNLL. LANGUAGE lang; if ("en".Equals(@params.Lang)) { lang = LANGUAGE.EN; language = @params.Lang; } else if ("de".Equals(@params.Lang)) { lang = LANGUAGE.DE; language = @params.Lang; } else { throw new TerminateToolException(1, "Unsupported language: " + @params.Lang); } int typesToGenerate = 0; if (@params.Types.Contains("per")) { typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_PERSON_ENTITIES; } if (@params.Types.Contains("org")) { typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES; } if (@params.Types.Contains("loc")) { typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES; } if (@params.Types.Contains("misc")) { typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_MISC_ENTITIES; } return(new Conll03NameSampleStream(lang, CmdLineUtil.openInFile(@params.Data), typesToGenerate)); }
public override ObjectStream <POSSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); ObjectStream <string> lineStream; try { lineStream = new PlainTextByLineStream(new InputStreamReader(CmdLineUtil.openInFile(@params.Data), "UTF-8")); //Console.Out = new PrintStream(System.out, true, "UTF-8"); return(new ConllXPOSSampleStream(lineStream)); } catch (UnsupportedEncodingException e) { // this shouldn't happen throw new TerminateToolException(-1, "UTF-8 encoding is not supported: " + e.Message, e); } }
public override ObjectStream <ChunkSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); language = @params.Lang; FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data); ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding); ADChunkSampleStream sampleStream = new ADChunkSampleStream(lineStream); if (@params.Start != null && @params.Start > -1) { sampleStream.Start = @params.Start.Value; } if (@params.End != null && @params.End > -1) { sampleStream.End = @params.End.Value; } return(sampleStream); }
public override void run(string[] args) { Parameters @params = validateAndParseParams(args, typeof(Parameters)); File testData = new File(@params.CensusData); File dictOutFile = new File(@params.Dict); CmdLineUtil.checkInputFile("Name data", testData); CmdLineUtil.checkOutputFile("Dictionary file", dictOutFile); FileInputStream sampleDataIn = CmdLineUtil.openInFile(testData); ObjectStream <StringList> sampleStream = new NameFinderCensus90NameStream(sampleDataIn, Charset.forName(@params.Encoding)); Dictionary mDictionary; try { Console.WriteLine("Creating Dictionary..."); mDictionary = createDictionary(sampleStream); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry this can fail.. } } Console.WriteLine("Saving Dictionary..."); OutputStream @out = null; try { @out = new FileOutputStream(dictOutFile); mDictionary.serialize(@out); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while writing dictionary file: " + e.Message, e); } finally { if (@out != null) { try { @out.close(); } catch (IOException e) { // file might be damaged throw new TerminateToolException(-1, "Attention: Failed to correctly write dictionary:" + e.Message, e); } } } }
public static IDictionary <string, object> loadResources(Jfile resourcePath) { IDictionary <string, object> resources = new Dictionary <string, object>(); if (resourcePath != null) { IDictionary <string, ArtifactSerializer> artifactSerializers = TokenNameFinderModel.createArtifactSerializers(); File[] resourceFiles = resourcePath.listFiles(); // TODO: Filter files, also files with start with a dot foreach (File resourceFile in resourceFiles) { // TODO: Move extension extracting code to method and // write unit test for it // extract file ending string resourceName = resourceFile.Name; int lastDot = resourceName.LastIndexOf('.'); if (lastDot == -1) { continue; } string ending = resourceName.Substring(lastDot + 1); // lookup serializer from map ArtifactSerializer serializer = artifactSerializers[ending]; // TODO: Do different? For now just ignore .... if (serializer == null) { continue; } InputStream resoruceIn = CmdLineUtil.openInFile(resourceFile); try { resources[resourceName] = serializer.create(resoruceIn); } catch (InvalidFormatException e) { // TODO: Fix exception handling Console.WriteLine(e.ToString()); Console.Write(e.StackTrace); } catch (IOException e) { // TODO: Fix exception handling Console.WriteLine(e.ToString()); Console.Write(e.StackTrace); } finally { try { resoruceIn.close(); } catch (IOException) { } } } } return(resources); }