public override void PrintResults(PrintWriter pw, IList <ICoreMap> goldStandard, IList <ICoreMap> extractorOutput)
        {
            ResultsPrinter.Align(goldStandard, extractorOutput);
            // the mention factory cannot be null here
            System.Diagnostics.Debug.Assert(relationMentionFactory != null, "ERROR: RelationExtractorResultsPrinter.relationMentionFactory cannot be null in printResults!");
            // Count predicted-actual relation type pairs
            ICounter <Pair <string, string> > results    = new ClassicCounter <Pair <string, string> >();
            ClassicCounter <string>           labelCount = new ClassicCounter <string>();

            // TODO: assumes binary relations
            for (int goldSentenceIndex = 0; goldSentenceIndex < goldStandard.Count; goldSentenceIndex++)
            {
                foreach (RelationMention goldRelation in AnnotationUtils.GetAllRelations(relationMentionFactory, goldStandard[goldSentenceIndex], createUnrelatedRelations))
                {
                    ICoreMap extractorSentence = extractorOutput[goldSentenceIndex];
                    IList <RelationMention> extractorRelations = AnnotationUtils.GetRelations(relationMentionFactory, extractorSentence, goldRelation.GetArg(0), goldRelation.GetArg(1));
                    labelCount.IncrementCount(goldRelation.GetType());
                    foreach (RelationMention extractorRelation in extractorRelations)
                    {
                        results.IncrementCount(new Pair <string, string>(extractorRelation.GetType(), goldRelation.GetType()));
                    }
                }
            }
            PrintResultsInternal(pw, results, labelCount);
        }
Beispiel #2
0
        public void TestFindAttributesOnDynamicMetaAnnotation()
        {
            var results = AnnotationUtils.FindAnnotationAttributes(GetType().GetMethod(nameof(DynamicallyOverridden)), typeof(TheTarget)).Get();

            results["Property"].Should().Be("dynamic-override");
            results.GetValueOrDefault("ExtraValue").Should().Be("extra");
        }
Beispiel #3
0
        public ISymbol DeserializeSymbol(XElement element)
        {
            var symbol = AnnotationUtils.FindTypeWithName(element.Name.LocalName);

            if (symbol != null)
            {
                if (typeof(ITerminal).IsAssignableFrom(symbol))
                {
                    return(DeserializeTerminalSymbol(element));
                }
                else if (typeof(INonTerminal).IsAssignableFrom(symbol))
                {
                    return(DeserializeNonTerminalSymbol(element));
                }
                else if (typeof(IOneOf).IsAssignableFrom(symbol))
                {
                    return(DeserializeOneOf(element));
                }
                else if (typeof(IAction).IsAssignableFrom(symbol))
                {
                    return(DeserializeAction(element));
                }
            }

            throw new System.ArgumentException($"The given Element '{element.Name}' does not exist or is not a symbol");
        }
 private void InspectAggregateType()
 {
     _aggregateType = AnnotationUtils.FindAnnotationAttributes(_inspectedType, typeof(AggregateRootAttribute))
                      .Map(map => (string)map.GetValueOrDefault("type"))
                      .Filter(i => i.Length > 0)
                      .OrElse(_inspectedType.Name);
 }
Beispiel #5
0
 public void Initialize(PropertyInfo property, IEntityModel childEntity)
 {
     _childEntity = childEntity;
     _routingKey  = AnnotationUtils.FindAnnotationAttributes(property, typeof(AggregateMemberAttribute))
                    .Map(map => (string)map.GetValueOrDefault("routingKey"))
                    .Filter(key => !string.IsNullOrEmpty(key))
                    .OrElse(childEntity.RoutingKey);
 }
Beispiel #6
0
        private void InitializeProteinFormat(IIdentifiedResult identifiedResult, string oldProteinHeader)
        {
            var           proteins         = identifiedResult.GetProteins();
            List <string> proAnnotations   = AnnotationUtils.GetAnnotationKeys(proteins);
            string        newProteinHeader = StringUtils.GetMergedHeader(oldProteinHeader, proAnnotations, '\t');

            ProteinFormat = new LineFormat <IIdentifiedProtein>(IdentifiedProteinPropertyConverterFactory.GetInstance(), newProteinHeader, GetEngineName(), proteins);
        }
Beispiel #7
0
        public void TestFindAttributesOnDirectAnnotation()
        {
            var results = AnnotationUtils.FindAnnotationAttributes(GetType().GetMethod(nameof(DirectAnnotated)), typeof(TheTarget)).Get();

            results["Property"].Should().Be("value");
            results.ContainsKey("value").Should().BeFalse("value property should use annotation Simple class name as key");
            results["TheTarget"].Should().Be("value()");
        }
Beispiel #8
0
        private void InitializePeptideFormat(IIdentifiedResult identifiedResult, string oldPeptideHeader)
        {
            var           spectra          = identifiedResult.GetSpectra();
            List <string> pepAnnotations   = AnnotationUtils.GetAnnotationKeys(spectra);
            string        newPeptideHeader = StringUtils.GetMergedHeader(oldPeptideHeader, pepAnnotations, '\t');

            PeptideFormat = new LineFormat <IIdentifiedSpectrum>(IdentifiedSpectrumPropertyConverterFactory.GetInstance(), newPeptideHeader, GetEngineName(), spectra);
        }
Beispiel #9
0
        public void TestGetAnnotationKeys()
        {
            var real = AnnotationUtils.GetAnnotationKeys(ann);

            Assert.AreEqual(5, real.Count);

            Assert.AreEqual(new[] { "A", "B", "C", "D", "E" }, real);
        }
Beispiel #10
0
        public void Initialize(List <IIdentifiedSpectrum> spectra)
        {
            string oldPeptideHeader = PeptideFormat == null?GetDefaultPeptideHeader() : PeptideFormat.GetHeader();

            List <string> pepAnnotations   = AnnotationUtils.GetAnnotationKeys(spectra);
            string        newPeptideHeader = StringUtils.GetMergedHeader(oldPeptideHeader, pepAnnotations, '\t');

            PeptideFormat = new PeptideLineFormat(newPeptideHeader, GetEngineName(), spectra);
        }
Beispiel #11
0
//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
//ORIGINAL LINE: @Override @SuppressWarnings("rawtypes") public boolean matches(Method method, Class targetClass)
        public override bool matches(System.Reflection.MethodInfo method, Type targetClass)
        {
            if (AnnotationUtils.getAnnotation(method, this.annotationType) != null)
            {
                return(true);
            }
            // The method may be on an interface, so let's check on the target class as well.
            System.Reflection.MethodInfo specificMethod = AopUtils.getMostSpecificMethod(method, targetClass);
            return(specificMethod != method && (AnnotationUtils.getAnnotation(specificMethod, this.annotationType) != null));
        }
Beispiel #12
0
//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
//ORIGINAL LINE: @SuppressWarnings("unchecked") public void doWith(Method method) throws IllegalArgumentException, IllegalAccessException
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
            public void doWith(System.Reflection.MethodInfo method)
            {
                State state = AnnotationUtils.getAnnotation(method, typeof(State));

                string processName = component.processKey();

                if (StringUtils.hasText(state.process()))
                {
                    processName = state.process();
                }

                string stateName = state.state();

                if (!StringUtils.hasText(stateName))
                {
                    stateName = state.value();
                }

                Assert.notNull(stateName, "You must provide a stateName!");

                IDictionary <int, string> vars = new Dictionary <int, string>();

                Annotation[][] paramAnnotationsArray = method.ParameterAnnotations;

                int ctr         = 0;
                int pvMapIndex  = -1;
                int procIdIndex = -1;

                foreach (Annotation[] paramAnnotations in paramAnnotationsArray)
                {
                    ctr += 1;

                    foreach (Annotation pa in paramAnnotations)
                    {
                        if (pa is ProcessVariable)
                        {
                            ProcessVariable pv     = (ProcessVariable)pa;
                            string          pvName = pv.value();
                            vars[ctr] = pvName;
                        }
                        else if (pa is ProcessVariables)
                        {
                            pvMapIndex = ctr;
                        }
                        else if (pa is ProcessId)
                        {
                            procIdIndex = ctr;
                        }
                    }
                }

                ActivitiStateHandlerRegistration registration = new ActivitiStateHandlerRegistration(vars, method, bean, stateName, beanName, pvMapIndex, procIdIndex, processName);

                outerInstance.registry.registerActivitiStateHandler(registration);
            }
        /// <exception cref="System.IO.IOException"/>
        public static void SaveCoNLLFiles(string dir, Annotation dataset, bool useSubTypes, bool alreadyBIO)
        {
            IList <ICoreMap> sentences = dataset.Get(typeof(CoreAnnotations.SentencesAnnotation));
            string           docid     = null;
            TextWriter       os        = null;

            foreach (ICoreMap sentence in sentences)
            {
                string myDocid = sentence.Get(typeof(CoreAnnotations.DocIDAnnotation));
                if (docid == null || !myDocid.Equals(docid))
                {
                    if (os != null)
                    {
                        os.Close();
                    }
                    docid = myDocid;
                    os    = new TextWriter(new FileOutputStream(dir + File.separator + docid + ".conll"));
                }
                IList <CoreLabel> labeledSentence = AnnotationUtils.SentenceEntityMentionsToCoreLabels(sentence, true, null, null, useSubTypes, alreadyBIO);
                System.Diagnostics.Debug.Assert((labeledSentence != null));
                string prev = null;
                foreach (CoreLabel word in labeledSentence)
                {
                    string w  = word.Word().ReplaceAll("[ \t\n]+", "_");
                    string t  = word.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation));
                    string l  = word.Get(typeof(CoreAnnotations.AnswerAnnotation));
                    string nl = l;
                    if (!alreadyBIO && !l.Equals("O"))
                    {
                        if (prev != null && l.Equals(prev))
                        {
                            nl = "I-" + l;
                        }
                        else
                        {
                            nl = "B-" + l;
                        }
                    }
                    string   line = w + " " + t + " " + nl;
                    string[] toks = line.Split("[ \t\n]+");
                    if (toks.Length != 3)
                    {
                        throw new Exception("INVALID LINE: \"" + line + "\"");
                    }
                    os.Printf("%s %s %s\n", w, t, nl);
                    prev = l;
                }
                os.WriteLine();
            }
            if (os != null)
            {
                os.Close();
            }
        }
Beispiel #14
0
            protected override void GetAnnotationCore <TValue>(string kind, int iinfo, ref TValue value)
            {
                Contracts.Assert(0 <= iinfo && iinfo < InfoCount);
                if (kind == AnnotationUtils.Kinds.IsNormalized && !UseCounter[iinfo])
                {
                    AnnotationUtils.Marshal <bool, TValue>(IsNormalized, iinfo, ref value);
                    return;
                }

                base.GetAnnotationCore(kind, iinfo, ref value);
            }
        /*
         *  Model creation, saving, loading, and saving
         */
        public virtual void Train(Annotation doc)
        {
            IList <IList <CoreLabel> > trainingSet = AnnotationUtils.EntityMentionsToCoreLabels(doc, annotationsToSkip, useSubTypes, useBIO);

            // dump a file in CoNLL-2003 format
            // saveCoNLLFiles("/tmp/ace/train/", doc, useSubTypes, useBIO);
            this.classifier = CreateClassifier();
            if (trainingSet.Count > 0)
            {
                this.classifier.Train(Java.Util.Collections.UnmodifiableCollection(trainingSet));
            }
        }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            // just a simple test, to make sure stuff works
            Properties props = StringUtils.ArgsToProperties(args);

            Edu.Stanford.Nlp.IE.Machinereading.Domains.Roth.RothCONLL04Reader reader = new Edu.Stanford.Nlp.IE.Machinereading.Domains.Roth.RothCONLL04Reader();
            reader.SetLoggerLevel(Level.Info);
            reader.SetProcessor(new StanfordCoreNLP(props));
            Annotation doc = reader.Parse("/u/nlp/data/RothCONLL04/conll04.corp");

            System.Console.Out.WriteLine(AnnotationUtils.DatasetToString(doc));
        }
Beispiel #17
0
        /// <summary>Predict a relation for each pair of entities in the sentence; including relations of type unrelated.</summary>
        /// <remarks>
        /// Predict a relation for each pair of entities in the sentence; including relations of type unrelated.
        /// This creates new RelationMention objects!
        /// </remarks>
        protected internal virtual IList <RelationMention> ExtractAllRelations(ICoreMap sentence)
        {
            IList <RelationMention> extractions = new List <RelationMention>();
            IList <RelationMention> cands       = null;

            if (createUnrelatedRelations)
            {
                // creates all possible relations between all entities in the sentence
                cands = AnnotationUtils.GetAllUnrelatedRelations(relationMentionFactory, sentence, false);
            }
            else
            {
                // just take the candidates produced by the reader (in KBP)
                cands = sentence.Get(typeof(MachineReadingAnnotations.RelationMentionsAnnotation));
                if (cands == null)
                {
                    cands = new List <RelationMention>();
                }
            }
            // the actual classification takes place here!
            foreach (RelationMention rel in cands)
            {
                IDatum <string, string> testDatum = CreateDatum(rel);
                string            label           = ClassOf(testDatum, rel);
                ICounter <string> probs           = ProbabilityOf(testDatum);
                double            prob            = probs.GetCount(label);
                StringWriter      sw = new StringWriter();
                PrintWriter       pw = new PrintWriter(sw);
                if (logger.IsLoggable(Level.Info))
                {
                    JustificationOf(testDatum, pw, label);
                }
                logger.Info("Current sentence: " + AnnotationUtils.TokensAndNELabelsToString(rel.GetArg(0).GetSentence()) + "\n" + "Classifying relation: " + rel + "\n" + "JUSTIFICATION for label GOLD:" + rel.GetType() + " SYS:" + label + " (prob:" + prob +
                            "):\n" + sw.ToString());
                logger.Info("Justification done.");
                RelationMention relation = relationMentionFactory.ConstructRelationMention(rel.GetObjectId(), sentence, rel.GetExtent(), label, null, rel.GetArgs(), probs);
                extractions.Add(relation);
                if (!relation.GetType().Equals(rel.GetType()))
                {
                    logger.Info("Classification: found different type " + relation.GetType() + " for relation: " + rel);
                    logger.Info("The predicted relation is: " + relation);
                    logger.Info("Current sentence: " + AnnotationUtils.TokensAndNELabelsToString(rel.GetArg(0).GetSentence()));
                }
                else
                {
                    logger.Info("Classification: found similar type " + relation.GetType() + " for relation: " + rel);
                    logger.Info("The predicted relation is: " + relation);
                    logger.Info("Current sentence: " + AnnotationUtils.TokensAndNELabelsToString(rel.GetArg(0).GetSentence()));
                }
            }
            return(extractions);
        }
        public IMessageHandlingMember <T> WrapHandler <T>(IMessageHandlingMember <T> original)
            where T : class
        {
            IDictionary <string, object?>?attributes = original.AnnotationAttributes <AllowReplayAttribute>();
            var annotationAttributes =
                AnnotationUtils.FindAnnotationAttributes <AllowReplayAttribute>(
                    original.Unwrap <MemberInfo>()?.DeclaringType
                    );
            var isReplayAllowed =
                attributes == null ? annotationAttributes ?? DefaultSetting ! : attributes["allowReplay"];

            return(isReplayAllowed == null ? new ReplayBlockingMessageHandlingMember <T>(original) : original);
        }
        /// <exception cref="System.IO.IOException"/>
        public override Annotation Read(string path)
        {
            Annotation doc = new Annotation(string.Empty);

            logger.Info("Reading file: " + path);
            // Each iteration through this loop processes a single sentence along with any relations in it
            for (IEnumerator <string> lineIterator = IOUtils.ReadLines(path).GetEnumerator(); lineIterator.MoveNext();)
            {
                Annotation sentence = ReadSentence(path, lineIterator);
                AnnotationUtils.AddSentence(doc, sentence);
            }
            return(doc);
        }
Beispiel #20
0
        protected internal virtual GeneralDataset <string, string> CreateDataset(Annotation corpus)
        {
            GeneralDataset <string, string> dataset = new RVFDataset <string, string>();

            foreach (ICoreMap sentence in corpus.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                foreach (RelationMention rel in AnnotationUtils.GetAllRelations(relationMentionFactory, sentence, createUnrelatedRelations))
                {
                    dataset.Add(CreateDatum(rel));
                }
            }
            dataset.ApplyFeatureCountThreshold(featureCountThreshold);
            return(dataset);
        }
Beispiel #21
0
        public IdentifiedProteinTextWriter(string proteinHeader, IEnumerable <IIdentifiedProtein> proteins)
        {
            this._proteins       = proteins;
            this._annotationKeys = AnnotationUtils.GetAnnotationKeys(this._proteins);

            var sb = new StringBuilder();

            sb.Append(proteinHeader);
            foreach (string key in this._annotationKeys)
            {
                sb.Append("\t" + key);
            }
            this.converter = IdentifiedProteinPropertyConverterFactory.GetInstance().GetConverters(sb.ToString(), '\t');
        }
        private protected override SchemaShape.Column[] GetOutputColumnsCore(SchemaShape inputSchema)
        {
            bool success = inputSchema.TryFindColumn(LabelColumn.Name, out var labelCol);

            Contracts.Assert(success);

            var predLabelMetadata = new SchemaShape(labelCol.Annotations.Where(x => x.Name == AnnotationUtils.Kinds.KeyValues)
                                                    .Concat(AnnotationUtils.GetTrainerOutputAnnotation()));

            return(new[]
            {
                new SchemaShape.Column(DefaultColumnNames.Score, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false, new SchemaShape(AnnotationUtils.AnnotationsForMulticlassScoreColumn(labelCol))),
                new SchemaShape.Column(DefaultColumnNames.PredictedLabel, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.UInt32, true, predLabelMetadata)
            });
        }
Beispiel #23
0
        public IMessageHandlingMember WrapHandler(IMessageHandlingMember original)
        {
            var isReplayAllowed = (bool)original
                                  .AnnotationAttributes(typeof(AllowReplayAttribute))
                                  .OrElseGet(() => Optional <Type> .OfNullable(original.GetType().DeclaringType)
                                             .Map(c => AnnotationUtils.FindAnnotationAttributes(c, typeof(AllowReplayAttribute))
                                                  .OrElse(DefaultSetting))
                                             .OrElse(DefaultSetting)
                                             )["allowReplay"];

            if (!isReplayAllowed)
            {
                return(new ReplayBlockingMessageHandlingMember(original));
            }

            return(original);
        }
            private void InspectFields()
            {
                foreach (var field in _inspectedType.GetProperties())
                {
                    ChildEntityDefinitions.ForEach(def => def.CreateChildDefinition(field, this).IfPresent(child =>
                    {
                        _children.Add(child);
                        child.CommandHandlers.ForEach(x => _commandHandlers.TryAdd(x.Key, x.Value));
                    }));
                    AnnotationUtils.FindAnnotationAttributes <EntityIdAttribute>(field).IfPresent(attributes =>
                    {
                        _identifierField = field;
                        if (!string.Empty.Equals(attributes.GetValueOrDefault("routingKey")))
                        {
                            _routingKey = (string)attributes.GetValueOrDefault("routingKey");
                        }
                        else
                        {
                            _routingKey = field.Name;
                        }
                    });
                    if (_identifierField == null)
                    {
                        AnnotationUtils.FindAnnotationAttributes <KeyAttribute>(field).IfPresent(a =>
                        {
                            _identifierField = field;
                            _routingKey      = field.Name;
                        });
                    }

                    if (_identifierField != null)
                    {
                        if (!_identifierField.PropertyType.IsValidIdentifier())
                        {
                            throw new AxonConfigurationException($"Aggregate identifier type {_identifierField.PropertyType.Name} should override ToString()");
                        }
                    }
                    AnnotationUtils.FindAnnotationAttributes <AggregateVersionAttribute>(field).IfPresent(attributes => _versionField = field);
                }
            }
Beispiel #25
0
        public void TestGetAnnotationKeys()
        {
            string key1 = "TEST1";
            string key2 = "TEST2";

            var mph1 = new IdentifiedSpectrum();

            mph1.Annotations.Add(key1, null);
            mph1.Query.FileScan.Experimental = "EXP1";
            new IdentifiedPeptide(mph1);

            var mph2 = new IdentifiedSpectrum();

            mph2.Annotations.Add(key2, null);
            mph2.Query.FileScan.Experimental = "EXP2";
            new IdentifiedPeptide(mph2);

            mph1.Peptide.Sequence = "SEQ1";
            mph2.Peptide.Sequence = "SEQ2";

            var protein = new IdentifiedProtein();

            protein.Peptides.Add(mph1.Peptide);
            protein.Peptides.Add(mph2.Peptide);

            var mpg = new IdentifiedProteinGroup();

            mpg.Add(protein);

            var mr = new MascotResult();

            mr.Add(mpg);

            List <string> annotationKeys = AnnotationUtils.GetAnnotationKeys(mr.GetSpectra());

            Assert.AreEqual(2, annotationKeys.Count);
            Assert.IsTrue(annotationKeys.Contains(key1));
            Assert.IsTrue(annotationKeys.Contains(key2));
        }
        private MetaDataExtractor[] ExtractMetaData(ParameterInfo[] parameters)
        {
            var extractors = new List <MetaDataExtractor>();

            for (int i = 0; i < parameters.Length; i++)
            {
                if (typeof(MetaData).IsAssignableFrom(parameters[i].ParameterType))
                {
                    extractors.Add(new MetaDataExtractor(i, null));
                }
                else
                {
                    var metaDataAnnotation =
                        AnnotationUtils.FindAnnotationAttributes(parameters[i], typeof(MetaDataValueAttribute));
                    if (metaDataAnnotation.IsPresent)
                    {
                        extractors.Add(new MetaDataExtractor(i, (string)metaDataAnnotation.Get().Get("metaDataValue")));
                    }
                }
            }

            return(extractors.ToArray());
        }
        private SchemaShape.Column[] GetOutputColumnsCore(SchemaShape inputSchema)
        {
            SchemaShape.Column?labelCol      = null;
            var predictedLabelAnnotationCols = AnnotationUtils.GetTrainerOutputAnnotation();

            if (LabelColumn.IsValid)
            {
                bool success = inputSchema.TryFindColumn(LabelColumn.Name, out var inputLabelCol);
                Contracts.Assert(success);
                labelCol = inputLabelCol;
                predictedLabelAnnotationCols = predictedLabelAnnotationCols.Concat(
                    inputLabelCol.Annotations.Where(x => x.Name == AnnotationUtils.Kinds.KeyValues));
            }
            var scoreAnnotationCols = AnnotationUtils.AnnotationsForMulticlassScoreColumn(labelCol);

            return(new[]
            {
                new SchemaShape.Column(DefaultColumnNames.Score, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single,
                                       false, new SchemaShape(scoreAnnotationCols)),
                new SchemaShape.Column(DefaultColumnNames.PredictedLabel, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.UInt32,
                                       true, new SchemaShape(predictedLabelAnnotationCols))
            });
        }
Beispiel #28
0
        /// <summary>Reads in ACE*.apf.xml files and converts them to RelationSentence objects.</summary>
        /// <remarks>
        /// Reads in ACE*.apf.xml files and converts them to RelationSentence objects.
        /// Note that you probably should call parse() instead.
        /// Currently, this ignores document boundaries (the list returned will include
        /// sentences from all documents).
        /// </remarks>
        /// <param name="path">
        /// directory containing ACE files to read (e.g.
        /// "/home/mcclosky/scr/data/ACE2005/english_test"). This can also be
        /// the path to a single file.
        /// </param>
        /// <returns>list of RelationSentence objects</returns>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Xml.Sax.SAXException"/>
        /// <exception cref="Javax.Xml.Parsers.ParserConfigurationException"/>
        public override Annotation Read(string path)
        {
            IList <ICoreMap> allSentences = new List <ICoreMap>();
            File             basePath     = new File(path);

            System.Diagnostics.Debug.Assert(basePath.Exists());
            Annotation corpus = new Annotation(string.Empty);

            if (basePath.IsDirectory())
            {
                foreach (File aceFile in IOUtils.IterFilesRecursive(basePath, ".apf.xml"))
                {
                    if (aceFile.GetName().EndsWith(".UPC1.apf.xml"))
                    {
                        continue;
                    }
                    Sharpen.Collections.AddAll(allSentences, ReadDocument(aceFile, corpus));
                }
            }
            else
            {
                // in case it's a file
                Sharpen.Collections.AddAll(allSentences, ReadDocument(basePath, corpus));
            }
            AnnotationUtils.AddSentences(corpus, allSentences);
            // quick stats
            foreach (ICoreMap sent in allSentences)
            {
                // check for entity mentions of the same type that are adjacent
                CountAdjacentMentions(sent);
                // count relations between two proper nouns
                CountNameRelations(sent);
                // count types of mentions
                CountMentionTypes(sent);
            }
            return(corpus);
        }
        private CategoricalMetaData GetCategoricalMetaData(IChannel ch, RoleMappedData trainData, int numRow)
        {
            CategoricalMetaData catMetaData = new CategoricalMetaData();

            int[]     categoricalFeatures = null;
            const int useCatThreshold     = 50000;
            // Disable cat when data is too small, reduce the overfitting.
            bool useCat = LightGbmTrainerOptions.UseCategoricalSplit ?? numRow > useCatThreshold;

            if (!LightGbmTrainerOptions.UseCategoricalSplit.HasValue)
            {
                ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseCategoricalSplit) + " = " + useCat);
            }
            if (useCat)
            {
                var featureCol = trainData.Schema.Schema[DefaultColumnNames.Features];
                AnnotationUtils.TryGetCategoricalFeatureIndices(trainData.Schema.Schema, featureCol.Index, out categoricalFeatures);
            }
            var colType   = trainData.Schema.Feature.Value.Type;
            int rawNumCol = colType.GetVectorSize();

            FeatureCount          = rawNumCol;
            catMetaData.TotalCats = 0;
            if (categoricalFeatures == null)
            {
                catMetaData.CategoricalBoudaries = null;
                catMetaData.NumCol = rawNumCol;
            }
            else
            {
                var catIndices = ConstructCategoricalFeatureMetaData(categoricalFeatures, rawNumCol, ref catMetaData);
                // Set categorical features
                Options["categorical_feature"] = string.Join(",", catIndices);
            }
            return(catMetaData);
        }
Beispiel #30
0
 private protected override SchemaShape.Column[] GetOutputColumnsCore(SchemaShape inputSchema)
 {
     return(new[]
     {
         new SchemaShape.Column(DefaultColumnNames.Score, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.Single, false, new SchemaShape(AnnotationUtils.GetTrainerOutputAnnotation())),
         new SchemaShape.Column(DefaultColumnNames.Probability, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.Single, false, new SchemaShape(AnnotationUtils.GetTrainerOutputAnnotation(true))),
         new SchemaShape.Column(DefaultColumnNames.PredictedLabel, SchemaShape.Column.VectorKind.Scalar, BooleanDataViewType.Instance, false, new SchemaShape(AnnotationUtils.GetTrainerOutputAnnotation()))
     });
 }