Ejemplo n.º 1
0
        public static void Temp4()
        {
            var    source = @"D:\Codes\Project\EntityTyping\Fine-ner\input\dictionaries\dbpedia\dbpedia entity type.txt";
            var    reader = new pml.file.reader.LargeFileReader(source);
            var    des    = @"D:\Codes\Project\EntityTyping\Fine-ner\input\dictionaries\dbpedia\tmp.txt";
            var    writer = new LargeFileWriter(des, FileMode.Create);
            string line;
            int    count = 0;
            var    set   = new HashSet <string>();
            var    dic   = new Dictionary <string, int>();
            var    times = 0;

            while ((line = reader.ReadLine()) != null)
            {
                if (++count % 10000 == 0)
                {
                    Console.WriteLine(count);
                }
                var array = line.Split('\t');
                dic.TryGetValue(array[1], out times);
                dic[array[1]] = times + 1;
            }
            reader.Close();
            foreach (var type in dic.OrderByDescending(key => key.Value))
            {
                writer.WriteLine(type.Key + "\t" + type.Value);
            }
            writer.Close();
        }
Ejemplo n.º 2
0
        public static void Temp()
        {
            var source = @"D:\Data\DBpedia\mapping based types";

            var    desDir = "";
            var    dic    = new Dictionary <string, FileWriter>();
            var    reader = new pml.file.reader.LargeFileReader(source);
            var    des    = @"D:\Data\DBpedia\entity type pairs.txt";
            var    writer = new LargeFileWriter(des, FileMode.Create);
            string line;

            System.Text.RegularExpressions.Regex entityRegex = new System.Text.RegularExpressions.Regex(@"/([^>/]+)>\s<");
            System.Text.RegularExpressions.Regex typeRegex   = new System.Text.RegularExpressions.Regex(@"ontology/(\w+)>\s\.$");
            int count = 0;

            while ((line = reader.ReadLine()) != null)
            {
                string entity = null;
                string type   = null;
                if (entityRegex.IsMatch(line))
                {
                    var match = entityRegex.Match(line);
                    entity = match.Groups[1].Value;
                }
                if (typeRegex.IsMatch(line))
                {
                    var match = typeRegex.Match(line);
                    type = match.Groups[1].Value;
                }
                if (entity != null && type != null)
                {
                    if (++count % 10000 == 0)
                    {
                        Console.WriteLine(count);
                    }
                    writer.WriteLine(entity + "\t" + type);
                }
            }
            reader.Close();
            writer.Close();
        }
Ejemplo n.º 3
0
        public static void Temp3()
        {
            var    source = @"D:\Data\DBpedia\redirects.ttl";
            var    reader = new pml.file.reader.LargeFileReader(source);
            var    des    = @"D:\Data\DBpedia\redirects.txt";
            var    writer = new LargeFileWriter(des, FileMode.Create);
            string line;

            System.Text.RegularExpressions.Regex firstRegex  = new System.Text.RegularExpressions.Regex(@"/([^>/]+)>\s<");
            System.Text.RegularExpressions.Regex secondRegex = new System.Text.RegularExpressions.Regex(@"/(\w+)>\s\.$");
            int count = 0;

            while ((line = reader.ReadLine()) != null)
            {
                string first  = null;
                string second = null;

                if (firstRegex.IsMatch(line))
                {
                    var match = firstRegex.Match(line);
                    first = match.Groups[1].Value;
                }
                if (secondRegex.IsMatch(line))
                {
                    var match = secondRegex.Match(line);
                    second = match.Groups[1].Value;
                }
                if (first != null && second != null)
                {
                    if (++count % 10000 == 0)
                    {
                        Console.WriteLine(count);
                    }
                    writer.WriteLine(first + "\t" + second);
                }
            }
            reader.Close();
            writer.Close();
        }