示例#1
0
 public TokenRangesEnumerator(TokenRanges collection)
 {
     collectionRef = collection;
     currentIndex  = -1;
     currentObject = null;
     currentSize   = collectionRef.Count;
 }
示例#2
0
 public TokenRanges(TokenRanges other) : this(morphodita_csharpPINVOKE.new_TokenRanges__SWIG_1(TokenRanges.getCPtr(other)), true)
 {
     if (morphodita_csharpPINVOKE.SWIGPendingException.Pending)
     {
         throw morphodita_csharpPINVOKE.SWIGPendingException.Retrieve();
     }
 }
示例#3
0
    public static int Main(string[] args)
    {
        if (args.Length < 1) {
            Console.Error.WriteLine("Usage: RunMorphoCli tagger_file");
            return 1;
        }

        Console.Error.Write("Loading tagger: ");
        Tagger tagger = Tagger.load(args[0]);
        if (tagger == null) {
            Console.Error.WriteLine("Cannot load tagger from file '{0}'", args[0]);
            return 1;
        }
        Console.Error.WriteLine("done");

        Forms forms = new Forms();
        TaggedLemmas lemmas = new TaggedLemmas();
        TokenRanges tokens = new TokenRanges();
        Tokenizer tokenizer = tagger.newTokenizer();
        if (tokenizer == null) {
            Console.Error.WriteLine("No tokenizer is defined for the supplied model!");
            return 1;
        }

        XmlTextWriter xmlOut = new XmlTextWriter(Console.Out);
        for (bool not_eof = true; not_eof; ) {
            string line;
            StringBuilder textBuilder = new StringBuilder();

            // Read block
            while ((not_eof = (line = Console.In.ReadLine()) != null) && line.Length > 0) {
                textBuilder.Append(line).Append('\n');
            }
            if (not_eof) textBuilder.Append('\n');

            // Tokenize and tag
            string text = textBuilder.ToString();
            tokenizer.setText(text);
            int t = 0;
            while (tokenizer.nextSentence(forms, tokens)) {
                tagger.tag(forms, lemmas);

                for (int i = 0; i < lemmas.Count; i++) {
                    TaggedLemma lemma = lemmas[i];
                    int token_start = (int)tokens[i].start, token_length = (int)tokens[i].length;
                    xmlOut.WriteString(text.Substring(t, token_start - t));
                    if (i == 0) xmlOut.WriteStartElement("sentence");
                    xmlOut.WriteStartElement("token");
                    xmlOut.WriteAttributeString("lemma", lemma.lemma);
                    xmlOut.WriteAttributeString("tag", lemma.tag);
                    xmlOut.WriteString(text.Substring(token_start, token_length));
                    xmlOut.WriteEndElement();
                    if (i + 1 == lemmas.Count) xmlOut.WriteEndElement();
                    t = token_start + token_length;
                }
            }
            xmlOut.WriteString(text.Substring(t));
        }
        return 0;
    }
示例#4
0
 public void SetRange(int index, TokenRanges values)
 {
     morphodita_csharpPINVOKE.TokenRanges_SetRange(swigCPtr, index, TokenRanges.getCPtr(values));
     if (morphodita_csharpPINVOKE.SWIGPendingException.Pending)
     {
         throw morphodita_csharpPINVOKE.SWIGPendingException.Retrieve();
     }
 }
示例#5
0
        public static TokenRanges Repeat(TokenRange value, int count)
        {
            global::System.IntPtr cPtr = morphodita_csharpPINVOKE.TokenRanges_Repeat(TokenRange.getCPtr(value), count);
            TokenRanges           ret  = (cPtr == global::System.IntPtr.Zero) ? null : new TokenRanges(cPtr, true);

            if (morphodita_csharpPINVOKE.SWIGPendingException.Pending)
            {
                throw morphodita_csharpPINVOKE.SWIGPendingException.Retrieve();
            }
            return(ret);
        }
示例#6
0
        public TokenRanges GetRange(int index, int count)
        {
            global::System.IntPtr cPtr = morphodita_csharpPINVOKE.TokenRanges_GetRange(swigCPtr, index, count);
            TokenRanges           ret  = (cPtr == global::System.IntPtr.Zero) ? null : new TokenRanges(cPtr, true);

            if (morphodita_csharpPINVOKE.SWIGPendingException.Pending)
            {
                throw morphodita_csharpPINVOKE.SWIGPendingException.Retrieve();
            }
            return(ret);
        }
示例#7
0
    public static int Main(string[] args)
    {
        if (args.Length < 1)
        {
            Console.Error.WriteLine("Usage: RunMorphoCli tagger_file");
            return(1);
        }

        Console.Error.Write("Loading tagger: ");
        Tagger tagger = Tagger.load(args[0]);

        if (tagger == null)
        {
            Console.Error.WriteLine("Cannot load tagger from file '{0}'", args[0]);
            return(1);
        }
        Console.Error.WriteLine("done");

        Forms        forms     = new Forms();
        TaggedLemmas lemmas    = new TaggedLemmas();
        TokenRanges  tokens    = new TokenRanges();
        Tokenizer    tokenizer = tagger.newTokenizer();

        if (tokenizer == null)
        {
            Console.Error.WriteLine("No tokenizer is defined for the supplied model!");
            return(1);
        }

        XmlTextWriter xmlOut = new XmlTextWriter(Console.Out);

        for (bool not_eof = true; not_eof;)
        {
            string        line;
            StringBuilder textBuilder = new StringBuilder();

            // Read block
            while ((not_eof = (line = Console.In.ReadLine()) != null) && line.Length > 0)
            {
                textBuilder.Append(line).Append('\n');
            }
            if (not_eof)
            {
                textBuilder.Append('\n');
            }

            // Tokenize and tag
            string text = textBuilder.ToString();
            tokenizer.setText(text);
            int t = 0;
            while (tokenizer.nextSentence(forms, tokens))
            {
                tagger.tag(forms, lemmas);

                for (int i = 0; i < lemmas.Count; i++)
                {
                    TaggedLemma lemma = lemmas[i];
                    int         token_start = (int)tokens[i].start, token_length = (int)tokens[i].length;
                    xmlOut.WriteString(text.Substring(t, token_start - t));
                    if (i == 0)
                    {
                        xmlOut.WriteStartElement("sentence");
                    }
                    xmlOut.WriteStartElement("token");
                    xmlOut.WriteAttributeString("lemma", lemma.lemma);
                    xmlOut.WriteAttributeString("tag", lemma.tag);
                    xmlOut.WriteString(text.Substring(token_start, token_length));
                    xmlOut.WriteEndElement();
                    if (i + 1 == lemmas.Count)
                    {
                        xmlOut.WriteEndElement();
                    }
                    t = token_start + token_length;
                }
            }
            xmlOut.WriteString(text.Substring(t));
        }
        return(0);
    }
示例#8
0
 internal static global::System.Runtime.InteropServices.HandleRef getCPtr(TokenRanges obj)
 {
     return((obj == null) ? new global::System.Runtime.InteropServices.HandleRef(null, global::System.IntPtr.Zero) : obj.swigCPtr);
 }
示例#9
0
文件: RunNer.cs 项目: ufal/nametag
    public static int Main(string[] args)
    {
        if (args.Length < 1) {
            Console.Error.WriteLine("Usage: RunMorphoCli ner_file");
            return 1;
        }

        Console.Error.Write("Loading ner: ");
        Ner ner = Ner.load(args[0]);
        if (ner == null) {
            Console.Error.WriteLine("Cannot load ner from file '{0}'", args[0]);
            return 1;
        }
        Console.Error.WriteLine("done");

        Forms forms = new Forms();
        TokenRanges tokens = new TokenRanges();
        NamedEntities entities = new NamedEntities();
        List<NamedEntity> sortedEntities = new List<NamedEntity>();
        Stack<int> openEntities = new Stack<int>();
        Tokenizer tokenizer = ner.newTokenizer();
        if (tokenizer == null) {
            Console.Error.WriteLine("No tokenizer is defined for the supplied model!");
            return 1;
        }

        XmlTextWriter xmlOut = new XmlTextWriter(Console.Out);
        for (bool not_eof = true; not_eof; ) {
            string line;
            StringBuilder textBuilder = new StringBuilder();

            // Read block
            while ((not_eof = (line = Console.In.ReadLine()) != null) && line.Length > 0) {
                textBuilder.Append(line).Append('\n');
            }
            if (not_eof) textBuilder.Append('\n');

            // Tokenize and tag
            string text = textBuilder.ToString();
            tokenizer.setText(text);
            int t = 0;
            while (tokenizer.nextSentence(forms, tokens)) {
                ner.recognize(forms, entities);
                SortEntities(entities, sortedEntities);

                for (int i = 0, e = 0; i < tokens.Count; i++) {
                    int token_start = (int)tokens[i].start, token_length = (int)tokens[i].length;
                    xmlOut.WriteString(text.Substring(t, token_start - t));
                    if (i == 0) xmlOut.WriteStartElement("sentence");

                    for (; e < sortedEntities.Count && sortedEntities[e].start == i; e++) {
                        xmlOut.WriteStartElement("ne");
                        xmlOut.WriteAttributeString("type", sortedEntities[e].type);
                        openEntities.Push((int)sortedEntities[e].start + (int)sortedEntities[e].length - 1);
                    }

                    xmlOut.WriteStartElement("token");
                    xmlOut.WriteString(text.Substring(token_start, token_length));
                    xmlOut.WriteEndElement();

                    for (; openEntities.Count > 0 && openEntities.Peek() == i; openEntities.Pop())
                        xmlOut.WriteEndElement();
                    if (i + 1 == tokens.Count) xmlOut.WriteEndElement();
                    t = token_start + token_length;
                }
            }
            xmlOut.WriteString(text.Substring(t));
        }
        return 0;
    }
示例#10
0
        public bool nextSentence(Forms forms, TokenRanges tokens)
        {
            bool ret = morphodita_csharpPINVOKE.Tokenizer_nextSentence(swigCPtr, Forms.getCPtr(forms), TokenRanges.getCPtr(tokens));

            return(ret);
        }
示例#11
0
文件: RunNer.cs 项目: tivvit/nametag
    public static int Main(string[] args)
    {
        if (args.Length < 1)
        {
            Console.Error.WriteLine("Usage: RunMorphoCli ner_file");
            return(1);
        }

        Console.Error.Write("Loading ner: ");
        Ner ner = Ner.load(args[0]);

        if (ner == null)
        {
            Console.Error.WriteLine("Cannot load ner from file '{0}'", args[0]);
            return(1);
        }
        Console.Error.WriteLine("done");

        Forms              forms          = new Forms();
        TokenRanges        tokens         = new TokenRanges();
        NamedEntities      entities       = new NamedEntities();
        List <NamedEntity> sortedEntities = new List <NamedEntity>();
        Stack <int>        openEntities   = new Stack <int>();
        Tokenizer          tokenizer      = ner.newTokenizer();

        if (tokenizer == null)
        {
            Console.Error.WriteLine("No tokenizer is defined for the supplied model!");
            return(1);
        }

        XmlTextWriter xmlOut = new XmlTextWriter(Console.Out);

        for (bool not_eof = true; not_eof;)
        {
            string        line;
            StringBuilder textBuilder = new StringBuilder();

            // Read block
            while ((not_eof = (line = Console.In.ReadLine()) != null) && line.Length > 0)
            {
                textBuilder.Append(line).Append('\n');
            }
            if (not_eof)
            {
                textBuilder.Append('\n');
            }

            // Tokenize and tag
            string text = textBuilder.ToString();
            tokenizer.setText(text);
            int t = 0;
            while (tokenizer.nextSentence(forms, tokens))
            {
                ner.recognize(forms, entities);
                SortEntities(entities, sortedEntities);

                for (int i = 0, e = 0; i < tokens.Count; i++)
                {
                    int token_start = (int)tokens[i].start, token_length = (int)tokens[i].length;
                    xmlOut.WriteString(text.Substring(t, token_start - t));
                    if (i == 0)
                    {
                        xmlOut.WriteStartElement("sentence");
                    }

                    for (; e < sortedEntities.Count && sortedEntities[e].start == i; e++)
                    {
                        xmlOut.WriteStartElement("ne");
                        xmlOut.WriteAttributeString("type", sortedEntities[e].type);
                        openEntities.Push((int)sortedEntities[e].start + (int)sortedEntities[e].length - 1);
                    }

                    xmlOut.WriteStartElement("token");
                    xmlOut.WriteString(text.Substring(token_start, token_length));
                    xmlOut.WriteEndElement();

                    for (; openEntities.Count > 0 && openEntities.Peek() == i; openEntities.Pop())
                    {
                        xmlOut.WriteEndElement();
                    }
                    if (i + 1 == tokens.Count)
                    {
                        xmlOut.WriteEndElement();
                    }
                    t = token_start + token_length;
                }
            }
            xmlOut.WriteString(text.Substring(t));
        }
        return(0);
    }