/// <summary> /// Instructs HTML to text extraction engine (EntityPlaneMethod) to produce text from xpath /// </summary> /// <param name="name">Instruction name, it is human-readable descriptive name or special instructin name like ::BODYTEXT::</param> /// <param name="flags">The flags.</param> /// <param name="code">XPath associated with the instruction, selects nodes to be rendered into text</param> /// <param name="weight">Weight factor of the instruction, i.e. number of times the content should be repeated (boosting TF)</param> /// <param name="expansion">The expansion.</param> /// <param name="remove">if set to <c>true</c> [remove].</param> /// <remarks> /// It will add specified instruction to the rendering instruction set, and optionally remove all existing instructions before it. /// </remarks> /// <seealso cref="aceOperationSetExecutorBase" /> public void aceOperation_setRenderInstruction( [Description("Instruction name, it is human-readable descriptive name or special instructin name like ::BODYTEXT::")] String name = "::BODYTEXT::", [Description("Instruction flags, controls what and how to render")] DocumentRenderInstructionFlags flags = DocumentRenderInstructionFlags.this_page_content, [Description("XPath associated with the instruction, selects nodes to be rendered into text")] String code = "", [Description("Weight factor of the instruction, i.e. number of times the content should be repeated (boosting TF)")] Double weight = 1.0, [Description("Graph selection expansion steps - to reach 1+ edges far nodes ")] Int32 expansion = 1, [Description("If true it will remove any existing instruction in the set")] Boolean remove = false) { if (remove) { data.instructions.Clear(); } DocumentRenderInstruction dri = new DocumentRenderInstruction(name, code, weight); dri.instructionFlags = flags; dri.graphExpansionSteps = expansion; dri.weight = weight; data.instructions.Add(dri); }
/// <summary> /// Gets default configuration /// </summary> /// <returns></returns> public static PlanesMethodSettings GetDefaultSettings() { PlanesMethodSettings output = new PlanesMethodSettings(); output.entityMethod.instructions.Add(DocumentRenderInstruction.GetDescriptionInstruction()); output.entityMethod.instructions.Add(DocumentRenderInstruction.GetTitleInstruction()); output.entityMethod.instructions.Add(DocumentRenderInstruction.GetBodyTextInstruction()); // output.entityMethod.blenderOptions = DocumentBlenderFunctionOptions.binaryAggregation | DocumentBlenderFunctionOptions.pageLevel; output.entityMethod.filterFunctionName = ""; // nameof(DocumentEntropyFunction); output.entityMethod.filterLimit = 5; output.corpusMethod.stemmer = nameof(EnglishStemmer); output.corpusMethod.tokenizer = nameof(TokenizerBasic); output.corpusMethod.transliterationRuleSetId = ""; #region PREPARE Weighting model var weightModel = new FeatureWeightModel(); weightModel.LocalFunction = new Weighting.Local.TermFrequencyFunction(); var globalFactor = new FeatureWeightFactor(); globalFactor.Settings.functionName = nameof(IDFElement); weightModel.GlobalFactors.Add(globalFactor); output.corpusMethod.WeightModel = weightModel; #endregion var featureFilter = new FeatureFilter(); featureFilter.limit = 8000; //featureFilter. //featureFilter.functionSettings = new GlobalFunctionSettings(); //featureFilter.functionSettings.functionName = nameof(CollectionTDPElement); //featureFilter.functionSettings.weight = 1.0; //featureFilter.functionSettings.flags.Add(Weighting.Metrics.TDPFactor.chi.ToString()); output.corpusMethod.filter = featureFilter; /* * output.vectorMethod.constructor = new Feature.Settings.FeatureVectorConstructorSettings(); * dimensionSpecification dimSpec = new dimensionSpecification(); * dimSpec.functionName = nameof(CosineSimilarityFunction); * dimSpec.type = FeatureVectorDimensionType.similarityFunction; * output.vectorMethod.constructor.labelDimensions.Add(dimSpec); */ //output.vectorMethod.constructor = new Feature.Settings.FeatureVectorConstructorSettings(); //dimensionSpecification dimSpec = new dimensionSpecification(); ////dimSpec.functionName = nameof(CosineSimilarityFunction); //dimSpec.type = FeatureVectorDimensionType.directTermWeight; //output.vectorMethod.constructor.featureDimensions.Add(dimSpec); output.featureMethod.classifierSettings.type = Classifiers.ClassifierType.multiClassSVM; output.featureMethod.classifierSettings.lossFunctionForTraining = Accord.MachineLearning.VectorMachines.Learning.Loss.L2; /* * output.featureMethod.classifierSettings.type = Classifiers.ClassifierType.kNearestNeighbors; * output.featureMethod.classifierSettings.lossFunctionForTraining = Accord.MachineLearning.VectorMachines.Learning.Loss.L2; * output.featureMethod.classifierSettings.kNN_k = 4; */ return(output); }