public DocumentFactory(AnnotationSet annotationSet) { if (annotationSet == null) throw new ArgumentNullException("annotationSet"); _annotationSet = annotationSet; }
static void saveReport(WEMDocument document, string fileName, AnnotationSet annotationSet) { using (StreamWriter writer = new StreamWriter( new FileStream(fileName, FileMode.OpenOrCreate))) { foreach (var sen in document.Sentences) { foreach (var word in sen.Words) { if (word is Entity) { Entity entity = (Entity)word; writer.Write(String.Format("[{0} \\ {1}] ", entity.Content, annotationSet[entity.EntityId])); } else { writer.Write(word.Content + ' '); } } writer.WriteLine(); } writer.Close(); } }
public TextDocumentReader(AnnotationSet annotationSet) { _sententceFactory = new SentenceFactory(annotationSet); InputLanguage = Language.SimplifiedChinese; OutputEncoding = Encoding.UTF8; }
/// <summary> /// 初始化一个标注器的基类。 /// </summary> /// <param name="annotationSet">该标注器所使用的标注集。</param> protected AnnotatorBase(AnnotationSet annotationSet, Model model) { _annotationSet = annotationSet; _model = model; _initialized = false; _isRunning = false; }
static void Main(string[] args) { //try //{ PFRDocumentReader reader = new PFRDocumentReader(); AnnotationSet wemas = new AnnotationSet(); wemas.Load("WEMAS.xml"); var dir = Directory.CreateDirectory("Results") .CreateSubdirectory("PFR"); var repDir = Directory.CreateDirectory("Reports") .CreateSubdirectory("PFR"); var comDir = Directory.CreateDirectory("Compare"); WEMDocument[] docs = null; //using (Stream stream // = new FileStream( // "199801.txt", FileMode.Open, FileAccess.Read)) //{ // docs = reader.ReadDocuments(stream); //} //foreach (var doc in docs) //{ // doc.Save(Path.Combine(dir.FullName, doc.Name + ".xml")); // saveReport( // doc, // Path.Combine(repDir.FullName, doc.Name + ".txt"), // wemas); //} DocumentFactory factory = new DocumentFactory(wemas); docs = factory.GetDocumentsFromXml( from docFile in dir.GetFiles() select docFile.FullName); foreach (var doc in docs) { doc.Save(Path.Combine( comDir.FullName, doc.Name + ".xml")); } //} //catch (Exception e) //{ // Console.WriteLine(e.Message); //} Console.WriteLine("Done."); Console.ReadKey(true); }
/// <summary> /// Creates a new annotation set. Caller must have WRITE permission for theassociated dataset.The following fields are required: * datasetId * referenceSetIdAll other fields may be optionally specified, unless documented as beingserver-generated (for example, the `id` field). /// Documentation https://developers.google.com/genomics/v1/reference/annotationsets/create /// Generation Note: This does not always build corectly. Google needs to standardise things I need to figuer out which ones are wrong. /// </summary> /// <param name="service">Authenticated Genomics service.</param> /// <param name="body">A valid Genomics v1 body.</param> /// <returns>AnnotationSetResponse</returns> public static AnnotationSet Create(GenomicsService service, AnnotationSet body) { try { // Initial validation. if (service == null) { throw new ArgumentNullException("service"); } if (body == null) { throw new ArgumentNullException("body"); } // Make the request. return(service.Annotationsets.Create(body).Execute()); } catch (Exception ex) { throw new Exception("Request Annotationsets.Create failed.", ex); } }
public void Start() { if (startCheck()) { AnnotationSet wemas; ICTCLASAnnotator ictclasAno; CRFPPAnnotator crfppAno; Corpus corpus; Console.BackgroundColor = ConsoleColor.DarkBlue; Console.Clear(); writeTitle(); ConsoleColor color = Console.ForegroundColor; Console.ForegroundColor = ConsoleColor.Green; try { Console.WriteLine(MessageInitializing); wemas = new AnnotationSet(); wemas.Load(_annotationSet); ictclasAno = new ICTCLASAnnotator(wemas, null); crfppAno = new CRFPPAnnotator(wemas, new Model(_crfppModel)); crfppAno.SetCRFPPRootPath(_crfppDir); corpus = new Corpus(Encoding.GetEncoding(_xmlEncoding)); Console.Write(MessageLoadingCorpus); savePosition(); var documents = from file in Directory.GetFiles(_corpusDir) where file.ToUpper().EndsWith(".TXT") select file; if (documents.Count() == 0) { Console.WriteLine(); Console.WriteLine(MessageEmptyCorpus); Console.WriteLine(MessageDone); Console.ForegroundColor = color; pause(); return; } _sentenceFactory = new SentenceFactory(wemas); _sentenceFactory.InputLanguage = Language.SimplifiedChinese; _sentenceFactory.OutputEncoding = corpus.Encoding; int count = 0; foreach (var doc in documents) { Sentence[] sens = _sentenceFactory.GetSentences( new FileStream(doc, FileMode.Open)); WEMDocument wDoc = new WEMDocument(corpus.Encoding); wDoc.Name = doc; foreach (var sen in sens) { wDoc.AddSentence(sen); } corpus.AddDocument(wDoc); rewrite(String.Format("{0}%", (int)((double) ++count / documents.Count() * 100))); } Console.WriteLine("共加载{0}篇文档。", corpus.Documents.Count); Console.Write(MessageAnnotatingSeg); savePosition(); bool done = false; ictclasAno.AnnotationProgressChanged += (s, ea) => { rewrite(String.Format("{0}%", Math.Round(ea.ProgressInDouble, 2) * 100)); }; ictclasAno.AnnotationCompleted += (s, ea) => { if (ea.Error == null) { rewrite("100%"); Console.WriteLine(MessageAnnotatingEnt); crfppAno.AnnotationProgressChanged += (sen, args) => { Console.WriteLine(args.Message); }; crfppAno.AnnotationCompleted += (sen, args) => { if (args.Error == null) { Console.WriteLine( MessageAnnotationFinished); if (_reportsEnabled) Console.Write(MessageSavingDocsAndReps); else Console.Write(MessageSavingDocuments); savePosition(); int sCount = 0; foreach (var doc in corpus.Documents) { string reportFileName; string fileName = documents.ElementAt( corpus.Documents.IndexOf(doc)); FileInfo fi = new FileInfo(fileName); reportFileName = fileName = fi.Name; fileName = fileName.Remove( fileName.Length - 3); fileName = _outputDir + fileName + "xml"; doc.Save(fileName); if (_reportsEnabled) saveReport(doc, _reportsDir + reportFileName, wemas); rewrite(String.Format("{0}%", (int)((double)++sCount / corpus.Documents.Count * 100))); } Console.WriteLine(MessageDone); done = true; } else { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(); Console.WriteLine(MessageUnhandledException); Console.WriteLine(args.Error.Message); Console.WriteLine(args.Error.StackTrace); done = true; } }; if (crfppAno.Initialize()) { crfppAno.ProcessCorpusAsync(corpus); } else { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(MessageEntInitFailed); done = true; return; } } else { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(); Console.WriteLine(MessageUnhandledException); Console.WriteLine(ea.Error.Message); Console.WriteLine(ea.Error.StackTrace); done = true; } }; if (ictclasAno.Initialize()) { ictclasAno.ProcessCorpusAsync(corpus); while (!done) Thread.Sleep(50); } else { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(MessageSegInitFailed); } } catch (Exception ex) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(); Console.WriteLine(MessageUnhandledException); Console.WriteLine(ex.Message); } Console.ForegroundColor = color; //pause(); return; } else { //(); return; } }
static void ProcessCommand(string command) { string[] blocks = command.Split(); switch (blocks[0].ToLower()) { case "": return; case "exit": if (blocks.Length == 1) _exitFlag = true; else Console.WriteLine("Unknown Parameters."); return; case "clear": if (blocks.Length == 1) Console.Clear(); else Console.WriteLine("Unknown Parameters."); return; case "genas": AnnotationDictionary dict = new AnnotationDictionary(); AnnotationSet ans = new AnnotationSet("WEMAS"); dict.Load("WEMAS.xml"); foreach (var entry in dict._Dictionary) { ans.SetAnnotationDescription(entry.Key, entry.Value); } foreach (var sep in dict.SentenceSeparators) { ans.AddSentenceSeparator(sep); } ans.Save("WEMAS.wemas"); return; case "testas": AnnotationSet ans2 = new AnnotationSet("WEMAS"); ans2.Load("WEMAS.wemas"); ans2.Description = "WEB ENTITY MINER Default Annotation Set."; ans2.Save("WEMAS2.wemas"); return; case "testws": AnnotationSet wemas = new AnnotationSet(); wemas.Load("WEMAS.wemas"); ICTCLASAnnotator ano = new ICTCLASAnnotator(wemas, null); CRFPPAnnotator crfAno = new CRFPPAnnotator(wemas, new Model( AppDomain.CurrentDomain.BaseDirectory + "model.crfppmodel")); Corpus c = new Corpus(Encoding.UTF8); SentenceFactory.AnnotationSet = wemas; SentenceFactory.InputLanguage = Language.SimplifiedChinese; SentenceFactory.OutputEncoding = Encoding.UTF8; var sens = SentenceFactory.GetSentences( new FileStream("utf8.txt", FileMode.Open, FileAccess.Read)); WEMDocument doc = new WEMDocument(Encoding.UTF8); foreach (var sen in sens) { doc.AddSentence(sen); } c.AddDocument(doc); try { ano.ProcessCorpus(c); crfAno.ProcessCorpus(c); } catch (Exception ex) { Console.WriteLine("Unhandled Exception:\n{0}", ex.Message); } doc.Save("RESULT.xml"); foreach (var sententce in doc.Sentences) { foreach (var word in sententce.Words) { if (word is Entity) { Console.WriteLine("{0}/ENTITY:{1}", word.Content, wemas[((Entity)word).EntityId]); } else { Console.WriteLine("{0}", word.Content); } } } return; default: Console.WriteLine("Unknown Command: '{0}'.", blocks[0]); return; } }
/// <summary> /// Updates an annotation set. The update must respect all mutabilityrestrictions and other invariants described on the annotation set resource.Caller must have WRITE permission for the associated dataset. /// Documentation https://developers.google.com/genomics/v1/reference/annotationsets/update /// Generation Note: This does not always build corectly. Google needs to standardise things I need to figuer out which ones are wrong. /// </summary> /// <param name="service">Authenticated Genomics service.</param> /// <param name="annotationSetId">The ID of the annotation set to be updated.</param> /// <param name="body">A valid Genomics v1 body.</param> /// <param name="optional">Optional paramaters.</param> /// <returns>AnnotationSetResponse</returns> public static AnnotationSet Update(GenomicsService service, string annotationSetId, AnnotationSet body, AnnotationsetsUpdateOptionalParms optional = null) { try { // Initial validation. if (service == null) { throw new ArgumentNullException("service"); } if (body == null) { throw new ArgumentNullException("body"); } if (annotationSetId == null) { throw new ArgumentNullException(annotationSetId); } // Building the initial request. var request = service.Annotationsets.Update(body, annotationSetId); // Applying optional parameters to the request. request = (AnnotationsetsResource.UpdateRequest)SampleHelpers.ApplyOptionalParms(request, optional); // Requesting data. return(request.Execute()); } catch (Exception ex) { throw new Exception("Request Annotationsets.Update failed.", ex); } }
public Task Update(string skillId, string annotationId, AnnotationSet set) { return(Client.Update(skillId, annotationId, set)); }
// Constructor: Set readonlies, get other metaobjects internal ProjectionMetaObject() { annotations = new AnnotationSet(); behaviors = new BehaviorSet (); }
private static AnnotationSet AnnotationSet(params object[] annotations) { var set = new AnnotationSet(); foreach (var annotation in annotations) set.Apply(annotation); return set; }
public ICTCLASAnnotator(AnnotationSet annotationSet, Model model) : base(annotationSet, model) { }
public CRFPPAnnotator(AnnotationSet annotationSet, Model model) : base(annotationSet, model) { CRFPPHelper.RootPath = AppDomain.CurrentDomain.BaseDirectory + "CRFPP"; }