public static void Temp5() { var sourceDir = @"D:\Codes\Project\EntityTyping\Fine-ner\input\feature\test"; var sourceFiles = Directory.GetFiles(sourceDir).ToList(); var desFile = @"D:\Codes\Project\EntityTyping\Fine-ner\input\feature\test data in dbpedia info.txt"; var writer = new LargeFileWriter(desFile, FileMode.Create); for (var i = 0; i < sourceFiles.Count; i++) { var reader = new EventReaderByLine(sourceFiles[i]); int count = 0; int total = 0; while (reader.HasNext()) { total++; var event1 = reader.GetNextEvent(); var rawFeature = event1.Feature.ToList(); if (!rawFeature[(int)Event.Field.dbpediaTypes].Equals("UNKNOW")) { count++; } } reader.Close(); writer.WriteLine(Path.GetFileNameWithoutExtension(sourceFiles[i]) + "\t" + count + "\t" + (1.0 * count / total)); } writer.Close(); }
public void AddFeature() { var reader = new EventReaderByLine(source); var writer = new EventWriterByLine(des); int count = 0; //var dic = new Dictionary<string, int>(); while (reader.HasNext()) { if (++count % 1000 == 0) { Console.Clear(); Console.WriteLine("{0} has processed {1}", Thread.CurrentThread.Name, count); } if (count > 100000) { break; } var e = reader.GetNextEvent(); try { var feature = extractor.AddFeature(e); e = new Event(e.Label, feature); //try //{ // dic[feature[feature.Count - 2]] += 1; //} //catch (Exception) //{ // dic[feature[feature.Count - 2]] = 0; //} writer.WriteEvent(e); } catch (Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); Console.WriteLine(e); } } //Console.WriteLine("Effect for file {0}", Path.GetFileName(source)); //foreach (var item in dic) //{ // Console.WriteLine(item.Key + ":" + item.Value); //} //Console.WriteLine(); //Console.ReadKey(); reader.Close(); writer.Close(); }
public static void Temp6() { var sourceFile = @"D:\Codes\Project\EntityTyping\Fine-ner\output\conll feature\raw\train.txt"; var desFile = @"D:\Codes\Project\EntityTyping\Fine-ner\output\conll feature\raw\train data in dbpedia info.txt"; var writer = new LargeFileWriter(desFile, FileMode.Create); var coverNumByType = new Dictionary <string, int>(); var totals = new Dictionary <string, int>(); var reader = new EventReaderByLine(sourceFile); while (reader.HasNext()) { var event1 = reader.GetNextEvent(); var rawFeature = event1.Feature.ToList(); try { totals[event1.Label.ToString()] += 1; } catch (Exception) { totals[event1.Label.ToString()] = 1; } if (!rawFeature[(int)Event.Field.dbpediaTypes].Equals("UNKNOW")) { try { coverNumByType[event1.Label.ToString()] += 1; } catch (Exception) { coverNumByType[event1.Label.ToString()] = 1; } } } reader.Close(); foreach (var type in totals.Keys) { writer.WriteLine(type + "\t" + coverNumByType[type] + "\t" + totals[type] + "\t" + (1.0 * coverNumByType[type] / totals[type])); } writer.Close(); }