Beispiel #1
0
        public DocumentFactory(AnnotationSet annotationSet)
        {
            if (annotationSet == null)
                throw new ArgumentNullException("annotationSet");

            _annotationSet = annotationSet;
        }
Beispiel #2
0
        static void saveReport(WEMDocument document, string fileName,
            AnnotationSet annotationSet)
        {
            using (StreamWriter writer = new StreamWriter(
                new FileStream(fileName, FileMode.OpenOrCreate)))
            {
                foreach (var sen in document.Sentences)
                {
                    foreach (var word in sen.Words)
                    {
                        if (word is Entity)
                        {
                            Entity entity = (Entity)word;
                            writer.Write(String.Format("[{0} \\ {1}] ",
                                entity.Content,
                                annotationSet[entity.EntityId]));
                        }
                        else
                        {
                            writer.Write(word.Content + ' ');
                        }
                    }
                    writer.WriteLine();
                }

                writer.Close();
            }
        }
Beispiel #3
0
        public TextDocumentReader(AnnotationSet annotationSet)
        {
            _sententceFactory = new SentenceFactory(annotationSet);

            InputLanguage  = Language.SimplifiedChinese;
            OutputEncoding = Encoding.UTF8;
        }
Beispiel #4
0
 /// <summary>
 /// 初始化一个标注器的基类。
 /// </summary>
 /// <param name="annotationSet">该标注器所使用的标注集。</param>
 protected AnnotatorBase(AnnotationSet annotationSet, Model model)
 {
     _annotationSet = annotationSet;
     _model         = model;
     _initialized   = false;
     _isRunning     = false;
 }
Beispiel #5
0
        static void Main(string[] args)
        {
            //try
            //{
                PFRDocumentReader reader = new PFRDocumentReader();
                AnnotationSet wemas = new AnnotationSet();
                wemas.Load("WEMAS.xml");

                var dir = Directory.CreateDirectory("Results")
                    .CreateSubdirectory("PFR");

                var repDir = Directory.CreateDirectory("Reports")
                    .CreateSubdirectory("PFR");

                var comDir = Directory.CreateDirectory("Compare");

                WEMDocument[] docs = null;

                //using (Stream stream
                //    = new FileStream(
                //        "199801.txt", FileMode.Open, FileAccess.Read))
                //{
                //    docs = reader.ReadDocuments(stream);
                //}

                //foreach (var doc in docs)
                //{
                //    doc.Save(Path.Combine(dir.FullName, doc.Name + ".xml"));
                //    saveReport(
                //        doc,
                //        Path.Combine(repDir.FullName, doc.Name + ".txt"),
                //        wemas);
                //}

                DocumentFactory factory = new DocumentFactory(wemas);
                docs = factory.GetDocumentsFromXml(
                    from docFile in dir.GetFiles()
                    select docFile.FullName);

                foreach (var doc in docs)
                {
                    doc.Save(Path.Combine(
                        comDir.FullName,
                        doc.Name + ".xml"));
                }
            //}
            //catch (Exception e)
            //{
            //    Console.WriteLine(e.Message);
            //}

            Console.WriteLine("Done.");
            Console.ReadKey(true);
        }
Beispiel #6
0
        /// <summary>
        /// Creates a new annotation set. Caller must have WRITE permission for theassociated dataset.The following fields are required:  * datasetId  * referenceSetIdAll other fields may be optionally specified, unless documented as beingserver-generated (for example, the `id` field).
        /// Documentation https://developers.google.com/genomics/v1/reference/annotationsets/create
        /// Generation Note: This does not always build corectly.  Google needs to standardise things I need to figuer out which ones are wrong.
        /// </summary>
        /// <param name="service">Authenticated Genomics service.</param>
        /// <param name="body">A valid Genomics v1 body.</param>
        /// <returns>AnnotationSetResponse</returns>
        public static AnnotationSet Create(GenomicsService service, AnnotationSet body)
        {
            try
            {
                // Initial validation.
                if (service == null)
                {
                    throw new ArgumentNullException("service");
                }
                if (body == null)
                {
                    throw new ArgumentNullException("body");
                }

                // Make the request.
                return(service.Annotationsets.Create(body).Execute());
            }
            catch (Exception ex)
            {
                throw new Exception("Request Annotationsets.Create failed.", ex);
            }
        }
Beispiel #7
0
        public void Start()
        {
            if (startCheck())
            {
                AnnotationSet wemas;
                ICTCLASAnnotator ictclasAno;
                CRFPPAnnotator crfppAno;
                Corpus corpus;

                Console.BackgroundColor = ConsoleColor.DarkBlue;
                Console.Clear();
                writeTitle();

                ConsoleColor color = Console.ForegroundColor;
                Console.ForegroundColor = ConsoleColor.Green;

                try
                {
                    Console.WriteLine(MessageInitializing);

                    wemas = new AnnotationSet();
                    wemas.Load(_annotationSet);

                    ictclasAno = new ICTCLASAnnotator(wemas, null);
                    crfppAno = new CRFPPAnnotator(wemas,
                        new Model(_crfppModel));
                    crfppAno.SetCRFPPRootPath(_crfppDir);

                    corpus = new Corpus(Encoding.GetEncoding(_xmlEncoding));

                    Console.Write(MessageLoadingCorpus);
                    savePosition();

                    var documents = from file in Directory.GetFiles(_corpusDir)
                                    where file.ToUpper().EndsWith(".TXT")
                                    select file;

                    if (documents.Count() == 0)
                    {
                        Console.WriteLine();
                        Console.WriteLine(MessageEmptyCorpus);
                        Console.WriteLine(MessageDone);
                        Console.ForegroundColor = color;
                        pause();
                        return;
                    }

                    _sentenceFactory
                        = new SentenceFactory(wemas);
                    _sentenceFactory.InputLanguage
                        = Language.SimplifiedChinese;
                    _sentenceFactory.OutputEncoding = corpus.Encoding;

                    int count = 0;
                    foreach (var doc in documents)
                    {
                        Sentence[] sens = _sentenceFactory.GetSentences(
                            new FileStream(doc, FileMode.Open));

                        WEMDocument wDoc = new WEMDocument(corpus.Encoding);
                        wDoc.Name = doc;

                        foreach (var sen in sens)
                        {
                            wDoc.AddSentence(sen);
                        }

                        corpus.AddDocument(wDoc);

                        rewrite(String.Format("{0}%", (int)((double)
                        ++count / documents.Count() * 100)));
                    }

                    Console.WriteLine("共加载{0}篇文档。",
                        corpus.Documents.Count);
                    Console.Write(MessageAnnotatingSeg);
                    savePosition();

                    bool done = false;

                    ictclasAno.AnnotationProgressChanged += (s, ea) =>
                        {
                            rewrite(String.Format("{0}%",
                                Math.Round(ea.ProgressInDouble, 2) * 100));
                        };

                    ictclasAno.AnnotationCompleted += (s, ea) =>
                        {
                            if (ea.Error == null)
                            {
                                rewrite("100%");
                                Console.WriteLine(MessageAnnotatingEnt);

                                crfppAno.AnnotationProgressChanged += (sen, args) =>
                                    {
                                        Console.WriteLine(args.Message);
                                    };

                                crfppAno.AnnotationCompleted += (sen, args) =>
                                    {
                                        if (args.Error == null)
                                        {
                                            Console.WriteLine(
                                                                            MessageAnnotationFinished);
                                            if (_reportsEnabled)
                                                Console.Write(MessageSavingDocsAndReps);
                                            else
                                                Console.Write(MessageSavingDocuments);
                                            savePosition();

                                            int sCount = 0;
                                            foreach (var doc in corpus.Documents)
                                            {
                                                string reportFileName;

                                                string fileName = documents.ElementAt(
                                                    corpus.Documents.IndexOf(doc));

                                                FileInfo fi = new FileInfo(fileName);
                                                reportFileName = fileName = fi.Name;

                                                fileName = fileName.Remove(
                                                    fileName.Length - 3);
                                                fileName = _outputDir + fileName
                                                    + "xml";

                                                doc.Save(fileName);

                                                if (_reportsEnabled)
                                                    saveReport(doc,
                                                        _reportsDir + reportFileName,
                                                        wemas);

                                                rewrite(String.Format("{0}%",
                                                    (int)((double)++sCount
                                                    / corpus.Documents.Count * 100)));
                                            }

                                            Console.WriteLine(MessageDone);
                                            done = true;
                                        }
                                        else
                                        {
                                            Console.ForegroundColor = ConsoleColor.Red;
                                            Console.WriteLine();
                                            Console.WriteLine(MessageUnhandledException);
                                            Console.WriteLine(args.Error.Message);
                                            Console.WriteLine(args.Error.StackTrace);
                                            done = true;
                                        }
                                    };

                                if (crfppAno.Initialize())
                                {
                                    crfppAno.ProcessCorpusAsync(corpus);
                                }
                                else
                                {
                                    Console.ForegroundColor = ConsoleColor.Red;
                                    Console.WriteLine(MessageEntInitFailed);
                                    done = true;
                                    return;
                                }
                            }
                            else
                            {
                                Console.ForegroundColor = ConsoleColor.Red;
                                Console.WriteLine();
                                Console.WriteLine(MessageUnhandledException);
                                Console.WriteLine(ea.Error.Message);
                                Console.WriteLine(ea.Error.StackTrace);
                                done = true;
                            }
                        };

                    if (ictclasAno.Initialize())
                    {
                        ictclasAno.ProcessCorpusAsync(corpus);

                        while (!done)
                            Thread.Sleep(50);
                    }
                    else
                    {
                        Console.ForegroundColor = ConsoleColor.Red;
                        Console.WriteLine(MessageSegInitFailed);
                    }
                }
                catch (Exception ex)
                {
                    Console.ForegroundColor = ConsoleColor.Red;
                    Console.WriteLine();
                    Console.WriteLine(MessageUnhandledException);
                    Console.WriteLine(ex.Message);
                }

                Console.ForegroundColor = color;
                //pause();
                return;
            }
            else
            {
                //();
                return;
            }
        }
Beispiel #8
0
        static void ProcessCommand(string command)
        {
            string[] blocks = command.Split();

            switch (blocks[0].ToLower())
            {
                case "":
                    return;

                case "exit":
                    if (blocks.Length == 1)
                        _exitFlag = true;
                    else
                        Console.WriteLine("Unknown Parameters.");
                    return;

                case "clear":
                    if (blocks.Length == 1)
                        Console.Clear();
                    else
                        Console.WriteLine("Unknown Parameters.");
                    return;

                case "genas":
                    AnnotationDictionary dict = new AnnotationDictionary();
                    AnnotationSet ans = new AnnotationSet("WEMAS");
                    dict.Load("WEMAS.xml");

                    foreach (var entry in dict._Dictionary)
                    {
                        ans.SetAnnotationDescription(entry.Key, entry.Value);
                    }

                    foreach (var sep in dict.SentenceSeparators)
                    {
                        ans.AddSentenceSeparator(sep);
                    }

                    ans.Save("WEMAS.wemas");
                    return;

                case "testas":
                    AnnotationSet ans2 = new AnnotationSet("WEMAS");
                    ans2.Load("WEMAS.wemas");
                    ans2.Description =
                        "WEB ENTITY MINER Default Annotation Set.";

                    ans2.Save("WEMAS2.wemas");
                    return;

                case "testws":
                    AnnotationSet wemas = new AnnotationSet();
                    wemas.Load("WEMAS.wemas");

                    ICTCLASAnnotator ano = new ICTCLASAnnotator(wemas, null);
                    CRFPPAnnotator crfAno = new CRFPPAnnotator(wemas,
                        new Model(
                            AppDomain.CurrentDomain.BaseDirectory
                            + "model.crfppmodel"));

                    Corpus c = new Corpus(Encoding.UTF8);
                    SentenceFactory.AnnotationSet = wemas;
                    SentenceFactory.InputLanguage = Language.SimplifiedChinese;
                    SentenceFactory.OutputEncoding = Encoding.UTF8;

                    var sens = SentenceFactory.GetSentences(
                        new FileStream("utf8.txt", FileMode.Open, FileAccess.Read));

                    WEMDocument doc = new WEMDocument(Encoding.UTF8);
                    foreach (var sen in sens)
                    {
                        doc.AddSentence(sen);
                    }

                    c.AddDocument(doc);

                    try
                    {
                        ano.ProcessCorpus(c);
                        crfAno.ProcessCorpus(c);
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("Unhandled Exception:\n{0}",
                            ex.Message);
                    }

                    doc.Save("RESULT.xml");

                    foreach (var sententce in doc.Sentences)
                    {
                        foreach (var word in sententce.Words)
                        {
                            if (word is Entity)
                            {
                                Console.WriteLine("{0}/ENTITY:{1}",
                                    word.Content,
                                    wemas[((Entity)word).EntityId]);
                            }
                            else
                            {
                                Console.WriteLine("{0}", word.Content);
                            }
                        }
                    }
                    return;

                default:
                    Console.WriteLine("Unknown Command: '{0}'.", blocks[0]);
                    return;
            }
        }
Beispiel #9
0
        /// <summary>
        /// Updates an annotation set. The update must respect all mutabilityrestrictions and other invariants described on the annotation set resource.Caller must have WRITE permission for the associated dataset.
        /// Documentation https://developers.google.com/genomics/v1/reference/annotationsets/update
        /// Generation Note: This does not always build corectly.  Google needs to standardise things I need to figuer out which ones are wrong.
        /// </summary>
        /// <param name="service">Authenticated Genomics service.</param>
        /// <param name="annotationSetId">The ID of the annotation set to be updated.</param>
        /// <param name="body">A valid Genomics v1 body.</param>
        /// <param name="optional">Optional paramaters.</param>
        /// <returns>AnnotationSetResponse</returns>
        public static AnnotationSet Update(GenomicsService service, string annotationSetId, AnnotationSet body, AnnotationsetsUpdateOptionalParms optional = null)
        {
            try
            {
                // Initial validation.
                if (service == null)
                {
                    throw new ArgumentNullException("service");
                }
                if (body == null)
                {
                    throw new ArgumentNullException("body");
                }
                if (annotationSetId == null)
                {
                    throw new ArgumentNullException(annotationSetId);
                }

                // Building the initial request.
                var request = service.Annotationsets.Update(body, annotationSetId);

                // Applying optional parameters to the request.
                request = (AnnotationsetsResource.UpdateRequest)SampleHelpers.ApplyOptionalParms(request, optional);

                // Requesting data.
                return(request.Execute());
            }
            catch (Exception ex)
            {
                throw new Exception("Request Annotationsets.Update failed.", ex);
            }
        }
Beispiel #10
0
 public Task Update(string skillId, string annotationId, AnnotationSet set)
 {
     return(Client.Update(skillId, annotationId, set));
 }
 // Constructor: Set readonlies, get other metaobjects
 internal ProjectionMetaObject()
 {
     annotations = new AnnotationSet();
     behaviors   = new BehaviorSet  ();
 }
Beispiel #12
0
        private static AnnotationSet AnnotationSet(params object[] annotations)
        {
            var set = new AnnotationSet();

            foreach (var annotation in annotations)
                set.Apply(annotation);

            return set;
        }
Beispiel #13
0
 public ICTCLASAnnotator(AnnotationSet annotationSet, Model model)
     : base(annotationSet, model)
 {
 }
Beispiel #14
0
 public CRFPPAnnotator(AnnotationSet annotationSet, Model model)
     : base(annotationSet, model)
 {
     CRFPPHelper.RootPath =
         AppDomain.CurrentDomain.BaseDirectory + "CRFPP";
 }