Esempio n. 1
0
        internal static sbyte[] openFeatureGeneratorBytes(Jfile featureGenDescriptorFile)
        {
            sbyte[] featureGeneratorBytes = null;
            // load descriptor file into memory
            if (featureGenDescriptorFile != null)
            {
                InputStream bytesIn = CmdLineUtil.openInFile(featureGenDescriptorFile);

                try
                {
                    featureGeneratorBytes = ModelUtil.read(bytesIn);
                }
                catch (IOException e)
                {
                    throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
                }
                finally
                {
                    try
                    {
                        bytesIn.close();
                    }
                    catch (IOException)
                    {
                        // sorry that this can fail
                    }
                }
            }
            return(featureGeneratorBytes);
        }
        public override ObjectStream <T> create <T>(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            int typesToGenerate = 0;

            if (@params.Types.Contains("DNA"))
            {
                typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_DNA_ENTITIES;
            }
            else if (@params.Types.Contains("protein"))
            {
                typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_PROTEIN_ENTITIES;
            }
            else if (@params.Types.Contains("cell_type"))
            {
                typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_CELLTYPE_ENTITIES;
            }
            else if (@params.Types.Contains("cell_line"))
            {
                typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_CELLLINE_ENTITIES;
            }
            else if (@params.Types.Contains("RNA"))
            {
                typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_RNA_ENTITIES;
            }

            return(new BioNLP2004NameSampleStream(CmdLineUtil.openInFile(@params.Data), typesToGenerate));
        }
Esempio n. 3
0
        public override ObjectStream <CorefSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            CmdLineUtil.checkInputFile("Data", @params.Data);
            FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data);

            ObjectStream <string> lineStream = new ParagraphStream(new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding));

            return(new CorefSampleDataStream(lineStream));
        }
        public override ObjectStream <NameSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            language = @params.Lang;

            FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data);

            ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding);

            return(new ADNameSampleStream(lineStream, @params.SplitHyphenatedTokens.Value));
        }
        public override ObjectStream <POSSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            language = @params.Lang;

            FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data);

            ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding);

            ADPOSSampleStream sentenceStream = new ADPOSSampleStream(lineStream, @params.ExpandME.Value, @params.IncludeFeatures.Value);

            return(sentenceStream);
        }
Esempio n. 6
0
	  public override ObjectStream<DocumentSample> create(string[] args)
	  {

		Parameters @params = ArgumentParser.parse(args, typeof(Parameters));
		language = @params.Lang;

		try
		{
		  return new LeipzigDoccatSampleStream(@params.Lang, 20, CmdLineUtil.openInFile(@params.Data));
		}
		catch (IOException e)
		{
		  throw new TerminateToolException(-1, "IO error while opening sample data: " + e.Message, e);
		}
	  }
Esempio n. 7
0
        public override ObjectStream <NameSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            // TODO: support the other languages with this CoNLL.
            LANGUAGE lang;

            if ("en".Equals(@params.Lang))
            {
                lang     = LANGUAGE.EN;
                language = @params.Lang;
            }
            else if ("de".Equals(@params.Lang))
            {
                lang     = LANGUAGE.DE;
                language = @params.Lang;
            }
            else
            {
                throw new TerminateToolException(1, "Unsupported language: " + @params.Lang);
            }

            int typesToGenerate = 0;

            if (@params.Types.Contains("per"))
            {
                typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_PERSON_ENTITIES;
            }
            if (@params.Types.Contains("org"))
            {
                typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES;
            }
            if (@params.Types.Contains("loc"))
            {
                typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES;
            }
            if (@params.Types.Contains("misc"))
            {
                typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
            }


            return(new Conll03NameSampleStream(lang, CmdLineUtil.openInFile(@params.Data), typesToGenerate));
        }
Esempio n. 8
0
        public override ObjectStream <POSSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            ObjectStream <string> lineStream;

            try
            {
                lineStream = new PlainTextByLineStream(new InputStreamReader(CmdLineUtil.openInFile(@params.Data), "UTF-8"));
                //Console.Out = new PrintStream(System.out, true, "UTF-8");

                return(new ConllXPOSSampleStream(lineStream));
            }
            catch (UnsupportedEncodingException e)
            {
                // this shouldn't happen
                throw new TerminateToolException(-1, "UTF-8 encoding is not supported: " + e.Message, e);
            }
        }
        public override ObjectStream <ChunkSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            language = @params.Lang;

            FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data);

            ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding);

            ADChunkSampleStream sampleStream = new ADChunkSampleStream(lineStream);

            if (@params.Start != null && @params.Start > -1)
            {
                sampleStream.Start = @params.Start.Value;
            }

            if (@params.End != null && @params.End > -1)
            {
                sampleStream.End = @params.End.Value;
            }

            return(sampleStream);
        }
Esempio n. 10
0
        public override void run(string[] args)
        {
            Parameters @params = validateAndParseParams(args, typeof(Parameters));

            File testData    = new File(@params.CensusData);
            File dictOutFile = new File(@params.Dict);

            CmdLineUtil.checkInputFile("Name data", testData);
            CmdLineUtil.checkOutputFile("Dictionary file", dictOutFile);

            FileInputStream           sampleDataIn = CmdLineUtil.openInFile(testData);
            ObjectStream <StringList> sampleStream = new NameFinderCensus90NameStream(sampleDataIn, Charset.forName(@params.Encoding));

            Dictionary mDictionary;

            try
            {
                Console.WriteLine("Creating Dictionary...");
                mDictionary = createDictionary(sampleStream);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry this can fail..
                }
            }

            Console.WriteLine("Saving Dictionary...");

            OutputStream @out = null;

            try
            {
                @out = new FileOutputStream(dictOutFile);
                mDictionary.serialize(@out);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while writing dictionary file: " + e.Message, e);
            }
            finally
            {
                if (@out != null)
                {
                    try
                    {
                        @out.close();
                    }
                    catch (IOException e)
                    {
                        // file might be damaged
                        throw new TerminateToolException(-1, "Attention: Failed to correctly write dictionary:" + e.Message, e);
                    }
                }
            }
        }
Esempio n. 11
0
        public static IDictionary <string, object> loadResources(Jfile resourcePath)
        {
            IDictionary <string, object> resources = new Dictionary <string, object>();

            if (resourcePath != null)
            {
                IDictionary <string, ArtifactSerializer> artifactSerializers = TokenNameFinderModel.createArtifactSerializers();

                File[] resourceFiles = resourcePath.listFiles();

                // TODO: Filter files, also files with start with a dot
                foreach (File resourceFile in resourceFiles)
                {
                    // TODO: Move extension extracting code to method and
                    // write unit test for it

                    // extract file ending
                    string resourceName = resourceFile.Name;

                    int lastDot = resourceName.LastIndexOf('.');

                    if (lastDot == -1)
                    {
                        continue;
                    }

                    string ending = resourceName.Substring(lastDot + 1);

                    // lookup serializer from map
                    ArtifactSerializer serializer = artifactSerializers[ending];

                    // TODO: Do different? For now just ignore ....
                    if (serializer == null)
                    {
                        continue;
                    }

                    InputStream resoruceIn = CmdLineUtil.openInFile(resourceFile);

                    try
                    {
                        resources[resourceName] = serializer.create(resoruceIn);
                    }
                    catch (InvalidFormatException e)
                    {
                        // TODO: Fix exception handling
                        Console.WriteLine(e.ToString());
                        Console.Write(e.StackTrace);
                    }
                    catch (IOException e)
                    {
                        // TODO: Fix exception handling
                        Console.WriteLine(e.ToString());
                        Console.Write(e.StackTrace);
                    }
                    finally
                    {
                        try
                        {
                            resoruceIn.close();
                        }
                        catch (IOException)
                        {
                        }
                    }
                }
            }
            return(resources);
        }