public static void Temp4() { var source = @"D:\Codes\Project\EntityTyping\Fine-ner\input\dictionaries\dbpedia\dbpedia entity type.txt"; var reader = new pml.file.reader.LargeFileReader(source); var des = @"D:\Codes\Project\EntityTyping\Fine-ner\input\dictionaries\dbpedia\tmp.txt"; var writer = new LargeFileWriter(des, FileMode.Create); string line; int count = 0; var set = new HashSet <string>(); var dic = new Dictionary <string, int>(); var times = 0; while ((line = reader.ReadLine()) != null) { if (++count % 10000 == 0) { Console.WriteLine(count); } var array = line.Split('\t'); dic.TryGetValue(array[1], out times); dic[array[1]] = times + 1; } reader.Close(); foreach (var type in dic.OrderByDescending(key => key.Value)) { writer.WriteLine(type.Key + "\t" + type.Value); } writer.Close(); }
public static void Temp() { var source = @"D:\Data\DBpedia\mapping based types"; var desDir = ""; var dic = new Dictionary <string, FileWriter>(); var reader = new pml.file.reader.LargeFileReader(source); var des = @"D:\Data\DBpedia\entity type pairs.txt"; var writer = new LargeFileWriter(des, FileMode.Create); string line; System.Text.RegularExpressions.Regex entityRegex = new System.Text.RegularExpressions.Regex(@"/([^>/]+)>\s<"); System.Text.RegularExpressions.Regex typeRegex = new System.Text.RegularExpressions.Regex(@"ontology/(\w+)>\s\.$"); int count = 0; while ((line = reader.ReadLine()) != null) { string entity = null; string type = null; if (entityRegex.IsMatch(line)) { var match = entityRegex.Match(line); entity = match.Groups[1].Value; } if (typeRegex.IsMatch(line)) { var match = typeRegex.Match(line); type = match.Groups[1].Value; } if (entity != null && type != null) { if (++count % 10000 == 0) { Console.WriteLine(count); } writer.WriteLine(entity + "\t" + type); } } reader.Close(); writer.Close(); }
public static void Temp3() { var source = @"D:\Data\DBpedia\redirects.ttl"; var reader = new pml.file.reader.LargeFileReader(source); var des = @"D:\Data\DBpedia\redirects.txt"; var writer = new LargeFileWriter(des, FileMode.Create); string line; System.Text.RegularExpressions.Regex firstRegex = new System.Text.RegularExpressions.Regex(@"/([^>/]+)>\s<"); System.Text.RegularExpressions.Regex secondRegex = new System.Text.RegularExpressions.Regex(@"/(\w+)>\s\.$"); int count = 0; while ((line = reader.ReadLine()) != null) { string first = null; string second = null; if (firstRegex.IsMatch(line)) { var match = firstRegex.Match(line); first = match.Groups[1].Value; } if (secondRegex.IsMatch(line)) { var match = secondRegex.Match(line); second = match.Groups[1].Value; } if (first != null && second != null) { if (++count % 10000 == 0) { Console.WriteLine(count); } writer.WriteLine(first + "\t" + second); } } reader.Close(); writer.Close(); }