Example #1
0
        static void Main(string[] args)
        {
            var corpus = new EntitiesCorpus();
            var cp1251 = Encoding.GetEncoding("windows-1251");
            var xml    = File.ReadAllText(@"E:\SystemsTests\rco\bin\result.xml", cp1251);
            var text   = File.ReadAllText(@"E:\SystemsTests\rco\bin\samples\modelsall_text_cp1251.txt", cp1251);

            corpus.ClearedText = text;
            var doc      = XDocument.Parse(xml);
            var document = doc.Element("output").Element("document");
            var entities = document.Elements("sentence").SelectMany(s => s.Elements("entities").SelectMany(e => e.Elements("entity"))).ToList();

            foreach (var entity in entities)
            {
                var t = GetType(entity.GetAttributeValue("type"));
                if (t != NerType.Undefined)
                {
                    var begin = entity.GetAttributeValue("offset").ParseInt(-1);
                    var end   = entity.GetAttributeValue("length").ParseInt(-1) + begin;
                    corpus.Entities.Add(new NerTextEntity(begin, end, t));
                }
            }
            File.WriteAllText("modelsall_rco_text.txt", corpus.ClearedText);
            File.WriteAllText("modelsall_rco.txt", corpus.Render());
            Console.ReadLine();
        }
Example #2
0
 private static void AddEntityToModel(EntitiesCorpus model, int begin, int endi, NerType type, Dictionary<KeyValuePair<int, int>, NerTextEntity> allEntities)
 {
     NerTextEntity existing;
     if (allEntities.TryGetValue(new KeyValuePair<int, int>(begin, endi), out existing) && existing.Type != type)
     {
         model.Entities.Remove(existing);
     }
     if (existing != null)
     {
         return;
     }
     var ent = new NerTextEntity(begin, endi, type);
     model.Entities.Add(ent);
     allEntities.Add(new KeyValuePair<int, int>(begin, endi), ent);
 }
Example #3
0
        private static void ConvertJsonToCorpus(string inputPath, string outputPath, ConcurrentBag <string> types)
        {
            var     json   = File.ReadAllText(inputPath);
            var     corpus = new EntitiesCorpus();
            dynamic s      = JsonConvert.DeserializeObject(json);
            var     doc    = s.doc;

            corpus.ClearedText = doc.text;
            var entities = doc.entities.entity;
            var etypes   = new Dictionary <string, string>();

            if (entities != null)
            {
                foreach (dynamic entity in entities)
                {
                    string id = entity.eid;
                    if (entity.level == "NAM")
                    {
                        string type = entity.type;
                        etypes.Add(id, type);
                    }
                }

                var mentions = doc.mentions.mention;
                if (mentions != null)
                {
                    foreach (dynamic mention in mentions)
                    {
                        int    begin = mention.begin;
                        int    end   = mention.end;
                        string id    = mention.eid;
                        string type;
                        if (etypes.TryGetValue(id, out type))
                        {
                            types.Add(type);
                            var t = GetType(type);
                            if (t != NerType.Undefined)
                            {
                                corpus.Entities.Add(new NerTextEntity(begin, end + 1, t));
                            }
                        }
                    }
                }
            }

            File.WriteAllText(outputPath, corpus.Render());
        }
Example #4
0
        private static void AddEntityToModel(EntitiesCorpus model, int begin, int endi, NerType type, Dictionary <KeyValuePair <int, int>, NerTextEntity> allEntities)
        {
            NerTextEntity existing;

            if (allEntities.TryGetValue(new KeyValuePair <int, int>(begin, endi), out existing) && existing.Type != type)
            {
                model.Entities.Remove(existing);
            }
            if (existing != null)
            {
                return;
            }
            var ent = new NerTextEntity(begin, endi, type);

            model.Entities.Add(ent);
            allEntities.Add(new KeyValuePair <int, int>(begin, endi), ent);
        }
Example #5
0
        private static void ConvertJsonToCorpus(string inputPath, string outputPath, ConcurrentBag<string> types)
        {
            var json = File.ReadAllText(inputPath);
            var corpus = new EntitiesCorpus();
            dynamic s = JsonConvert.DeserializeObject(json);
            var doc = s.doc;
            corpus.ClearedText = doc.text;
            var entities = doc.entities.entity;
            var etypes = new Dictionary<string, string>();
            if (entities != null)
            {
                foreach (dynamic entity in entities)
                {
                    string id = entity.eid;
                    if (entity.level == "NAM")
                    {
                        string type = entity.type;
                        etypes.Add(id, type);
                    }
                }

                var mentions = doc.mentions.mention;
                if (mentions != null)
                {
                    foreach (dynamic mention in mentions)
                    {
                        int begin = mention.begin;
                        int end = mention.end;
                        string id = mention.eid;
                        string type;
                        if (etypes.TryGetValue(id, out type))
                        {
                            types.Add(type);
                            var t = GetType(type);
                            if (t != NerType.Undefined)
                            {
                                corpus.Entities.Add(new NerTextEntity(begin, end + 1, t));
                            }
                        }
                    }
                }
            }

            File.WriteAllText(outputPath, corpus.Render());
        }
Example #6
0
 static void Main(string[] args)
 {
     var corpus = new EntitiesCorpus();
     var cp1251 = Encoding.GetEncoding("windows-1251");
     var xml = File.ReadAllText(@"E:\SystemsTests\rco\bin\result.xml", cp1251);
     var text = File.ReadAllText(@"E:\SystemsTests\rco\bin\samples\modelsall_text_cp1251.txt",cp1251);
     corpus.ClearedText = text;
     var doc = XDocument.Parse(xml);
     var document = doc.Element("output").Element("document");
     var entities = document.Elements("sentence").SelectMany(s => s.Elements("entities").SelectMany(e => e.Elements("entity"))).ToList();
     foreach (var entity in entities)
     {
         var t = GetType(entity.GetAttributeValue("type"));
         if (t != NerType.Undefined)
         {
             var begin = entity.GetAttributeValue("offset").ParseInt(-1);
             var end = entity.GetAttributeValue("length").ParseInt(-1) + begin;
             corpus.Entities.Add(new NerTextEntity(begin, end, t));
         }
     }
     File.WriteAllText("modelsall_rco_text.txt", corpus.ClearedText);
     File.WriteAllText("modelsall_rco.txt", corpus.Render());
     Console.ReadLine();
 }
Example #7
0
        static void Main(string[] args)
        {
            var model = new EntitiesCorpus();
            var doc   =
                XDocument.Parse(
                    File.ReadAllText(@"rdf_IE_2.0_ru.xml"));
            var root                 = doc.Root;
            var rdfnamespace         = XNamespace.Get("http://www.w3.org/1999/02/22-rdf-syntax-ns#");
            var auxnamespace         = XNamespace.Get("http://www.abbyy.com/ns/Aux#");
            var basicentitynamespace = XNamespace.Get("http://www.abbyy.com/ns/BasicEntity#");
            var nodeId               = rdfnamespace.GetName("nodeID");
            var resource             = rdfnamespace.GetName("resource");
            var annotationsName      = auxnamespace.GetName("TextAnnotations");
            var annotation           = auxnamespace.GetName("InstanceAnnotation");
            var start                = auxnamespace.GetName("annotation_start");
            var end             = auxnamespace.GetName("annotation_end");
            var instance        = auxnamespace.GetName("instance");
            var annotationsText = auxnamespace.GetName("document_text");
            var annotations     = root.Element(annotationsName);
            var text            = annotations.Element(annotationsText).Value;

            File.WriteAllText("testmodeleng_text_abby.txt", text);
            var objects          = new Dictionary <string, Tuple <int, int> >();
            var allEntities      = new Dictionary <KeyValuePair <int, int>, NerTextEntity>();
            var basicEntityTypes = new HashSet <string>();

            foreach (var e in root.Elements())
            {
                if (e.Name.Namespace == basicentitynamespace)
                {
                    basicEntityTypes.Add(e.Name.LocalName);
                }
            }
            foreach (var basicEntityType in basicEntityTypes)
            {
                Console.WriteLine(basicEntityType);
            }
            foreach (var ann in annotations
                     .Elements(auxnamespace.GetName("instance_annotation"))
                     .Select(a => a.Element(annotation))
                     .Where(a => a != null))
            {
                var ints   = ann.Element(instance);
                var idattr = ints.Attribute(nodeId);
                var begin  = int.Parse(ann.Element(start).Value);
                var endi   = int.Parse(ann.Element(end).Value);
                if (idattr != null)
                {
                    var id = idattr.Value;
                    if (!objects.ContainsKey(id))
                    {
                        if (begin != endi)
                        {
                            objects.Add(id, Tuple.Create(begin, endi));
                        }
                    }
                }
                else
                {
                    var resAttr = ints.Attribute(resource);
                    if (resAttr != null)
                    {
                        NerType type = GetTypeFromUriAttr(resAttr.Value, string.Empty);
                        if (type != NerType.Undefined)
                        {
                            AddEntityToModel(model, begin, endi, type, allEntities);
                        }
                    }
                }
            }
            model.ClearedText = text;

            foreach (var xElement in root.Elements())
            {
                NerType type = GetTypeFromUriAttr(xElement.Name.Namespace.NamespaceName, xElement.Name.LocalName);
                if (type != NerType.Undefined)
                {
                    var idattr = xElement.Attribute(nodeId);
                    if (idattr != null)
                    {
                        var val = idattr.Value;
                        Tuple <int, int> pos;
                        if (objects.TryGetValue(val, out pos))
                        {
                            AddEntityToModel(model, pos.Item1, pos.Item2, type, allEntities);
                        }
                    }
                }
            }
            File.WriteAllText("modelsall_abby.txt", model.Render());
            Console.ReadLine();
        }
Example #8
0
        static void Main(string[] args)
        {
            var model = new EntitiesCorpus();
            var doc =
                XDocument.Parse(
                    File.ReadAllText(@"rdf_IE_2.0_ru.xml"));
            var root = doc.Root;
            var rdfnamespace = XNamespace.Get("http://www.w3.org/1999/02/22-rdf-syntax-ns#");
            var auxnamespace = XNamespace.Get("http://www.abbyy.com/ns/Aux#");
            var basicentitynamespace = XNamespace.Get("http://www.abbyy.com/ns/BasicEntity#");
            var nodeId = rdfnamespace.GetName("nodeID");
            var resource = rdfnamespace.GetName("resource");
            var annotationsName = auxnamespace.GetName("TextAnnotations");
            var annotation = auxnamespace.GetName("InstanceAnnotation");
            var start = auxnamespace.GetName("annotation_start");
            var end = auxnamespace.GetName("annotation_end");
            var instance = auxnamespace.GetName("instance");
            var annotationsText = auxnamespace.GetName("document_text");
            var annotations = root.Element(annotationsName);
            var text = annotations.Element(annotationsText).Value;
            File.WriteAllText("testmodeleng_text_abby.txt", text);
            var objects = new Dictionary<string, Tuple<int, int>>();
            var allEntities = new Dictionary<KeyValuePair<int, int>, NerTextEntity>();
            var basicEntityTypes = new HashSet<string>();
            foreach (var e in root.Elements())
            {
                if (e.Name.Namespace == basicentitynamespace)
                {
                    basicEntityTypes.Add(e.Name.LocalName);
                }
            }
            foreach (var basicEntityType in basicEntityTypes)
            {
                Console.WriteLine(basicEntityType);
            }
            foreach (var ann in annotations
                .Elements(auxnamespace.GetName("instance_annotation"))
                .Select(a => a.Element(annotation))
                .Where(a => a != null))
            {
                var ints = ann.Element(instance);
                var idattr = ints.Attribute(nodeId);
                var begin = int.Parse(ann.Element(start).Value);
                var endi = int.Parse(ann.Element(end).Value);
                if (idattr != null)
                {
                    var id = idattr.Value;
                    if (!objects.ContainsKey(id))
                    {

                        if (begin != endi)
                        {
                            objects.Add(id, Tuple.Create(begin, endi));
                        }
                    }
                }
                else
                {
                    var resAttr = ints.Attribute(resource);
                    if (resAttr != null)
                    {
                        NerType type = GetTypeFromUriAttr(resAttr.Value, string.Empty);
                        if (type != NerType.Undefined)
                        {
                            AddEntityToModel(model, begin, endi, type, allEntities);
                        }
                    }
                }
            }
            model.ClearedText = text;

            foreach (var xElement in root.Elements())
            {
                NerType type = GetTypeFromUriAttr(xElement.Name.Namespace.NamespaceName, xElement.Name.LocalName);
                if (type != NerType.Undefined)
                {
                    var idattr = xElement.Attribute(nodeId);
                    if (idattr != null)
                    {
                        var val = idattr.Value;
                        Tuple<int, int> pos;
                        if (objects.TryGetValue(val, out pos))
                        {
                            AddEntityToModel(model, pos.Item1, pos.Item2, type, allEntities);
                        }
                    }
                }
            }
            File.WriteAllText("modelsall_abby.txt", model.Render());
            Console.ReadLine();
        }