Beispiel #1
0
        public static void Temp5()
        {
            var sourceDir   = @"D:\Codes\Project\EntityTyping\Fine-ner\input\feature\test";
            var sourceFiles = Directory.GetFiles(sourceDir).ToList();
            var desFile     = @"D:\Codes\Project\EntityTyping\Fine-ner\input\feature\test data in dbpedia info.txt";
            var writer      = new LargeFileWriter(desFile, FileMode.Create);

            for (var i = 0; i < sourceFiles.Count; i++)
            {
                var reader = new EventReaderByLine(sourceFiles[i]);
                int count  = 0;
                int total  = 0;
                while (reader.HasNext())
                {
                    total++;
                    var event1     = reader.GetNextEvent();
                    var rawFeature = event1.Feature.ToList();
                    if (!rawFeature[(int)Event.Field.dbpediaTypes].Equals("UNKNOW"))
                    {
                        count++;
                    }
                }
                reader.Close();
                writer.WriteLine(Path.GetFileNameWithoutExtension(sourceFiles[i]) + "\t" + count + "\t" + (1.0 * count / total));
            }
            writer.Close();
        }
        public void AddFeature()
        {
            var reader = new EventReaderByLine(source);
            var writer = new EventWriterByLine(des);
            int count  = 0;

            //var dic = new Dictionary<string, int>();

            while (reader.HasNext())
            {
                if (++count % 1000 == 0)
                {
                    Console.Clear();
                    Console.WriteLine("{0} has processed {1}", Thread.CurrentThread.Name, count);
                }
                if (count > 100000)
                {
                    break;
                }
                var e = reader.GetNextEvent();
                try
                {
                    var feature = extractor.AddFeature(e);
                    e = new Event(e.Label, feature);
                    //try
                    //{
                    //    dic[feature[feature.Count - 2]] += 1;
                    //}
                    //catch (Exception)
                    //{
                    //    dic[feature[feature.Count - 2]] = 0;

                    //}
                    writer.WriteEvent(e);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Message);
                    Console.WriteLine(ex.StackTrace);
                    Console.WriteLine(e);
                }
            }
            //Console.WriteLine("Effect for file {0}", Path.GetFileName(source));
            //foreach (var item in dic)
            //{
            //    Console.WriteLine(item.Key + ":" + item.Value);
            //}
            //Console.WriteLine();
            //Console.ReadKey();
            reader.Close();
            writer.Close();
        }
Beispiel #3
0
        public static void Temp6()
        {
            var sourceFile     = @"D:\Codes\Project\EntityTyping\Fine-ner\output\conll feature\raw\train.txt";
            var desFile        = @"D:\Codes\Project\EntityTyping\Fine-ner\output\conll feature\raw\train data in dbpedia info.txt";
            var writer         = new LargeFileWriter(desFile, FileMode.Create);
            var coverNumByType = new Dictionary <string, int>();
            var totals         = new Dictionary <string, int>();

            var reader = new EventReaderByLine(sourceFile);

            while (reader.HasNext())
            {
                var event1     = reader.GetNextEvent();
                var rawFeature = event1.Feature.ToList();
                try
                {
                    totals[event1.Label.ToString()] += 1;
                }
                catch (Exception)
                {
                    totals[event1.Label.ToString()] = 1;
                }
                if (!rawFeature[(int)Event.Field.dbpediaTypes].Equals("UNKNOW"))
                {
                    try
                    {
                        coverNumByType[event1.Label.ToString()] += 1;
                    }
                    catch (Exception)
                    {
                        coverNumByType[event1.Label.ToString()] = 1;
                    }
                }
            }
            reader.Close();
            foreach (var type in totals.Keys)
            {
                writer.WriteLine(type + "\t" + coverNumByType[type] + "\t" + totals[type] + "\t" + (1.0 * coverNumByType[type] / totals[type]));
            }
            writer.Close();
        }