// public Boolean DoNormalizeOnDomainLevel { get; set; } = true; /// <summary> /// Gets the preset tfidf. /// </summary> /// <param name="documentLimit">The document limit.</param> /// <param name="log">The log.</param> /// <returns></returns> public static DocumentRankingMethod GetPreset_TFIDF(Int32 documentLimit, ILogBuilder log) { DocumentRankingMethod output = new DocumentRankingMethod(); ScoreModel scoreModel = new ScoreModel(); ScoreModelTermWeightFactor twf = new ScoreModelTermWeightFactor(); twf.TermWeightModel = new Weighting.FeatureWeightModel(); twf.TermWeightModel.LocalFunction = new TermFrequencyFunction(); FeatureWeightFactor featureWeightFactor = new FeatureWeightFactor(); featureWeightFactor.Settings = new GlobalFunctionSettings(); featureWeightFactor.Settings.functionName = nameof(IDFElement); featureWeightFactor.Settings.weight = 1.0; featureWeightFactor.Deploy(log); twf.TermWeightModel.GlobalFactors.Add(featureWeightFactor); twf.weight = 1.0; scoreModel.Factors.Add(twf); output.query = new DocumentSelectQuery(); output.query.SizeLimit = documentLimit; output.model = scoreModel; return(output); }
/// <summary>Configures a global function based on Gravity moment</summary> /// <remarks><para>What it will do?</para></remarks> /// <param name="l">"Lambda factor of IGM"</param> /// <param name="weight">Weigth associated with the function</param> /// <param name="removeExisting">If any existing global factor should be removed</param> /// <seealso cref="aceOperationSetExecutorBase"/> public void aceOperation_setGlobalIGMWeight( [Description("Lambda factor of IGM")] Double l = 7.0, [Description("Weigth associated with the function")] Double weight = 1.0, [Description("If any existing global factor should be removed")] Boolean removeExisting = false) { if (removeExisting) { data.GlobalFactors.Clear(); } FeatureWeightFactor model = new FeatureWeightFactor(); model.Settings.functionName = nameof(IGMElement); model.Settings.l = l; model.Settings.weight = weight; data.GlobalFactors.Add(model); }
/// <summary>Configures a global function based on Term Discrimination Power</summary> /// <remarks><para>It will add specified global factor, optionally if will remove any existing global factors</para></remarks> /// <param name="factor">What factor should be added</param> /// <param name="weight">Weigth associated with the function</param> /// <param name="removeExisting">If any existing global factor should be removed</param> /// <seealso cref="aceOperationSetExecutorBase"/> public void aceOperation_setGlobalTDPWeight( [Description("What factor should be added")] TDPFactor factor = TDPFactor.chi, [Description("Weigth associated with the function")] Double weight = 1.0, [Description("If any existing global factor should be removed")] Boolean removeExisting = false) { if (removeExisting) { data.GlobalFactors.Clear(); } FeatureWeightFactor model = new FeatureWeightFactor(); model.Settings.functionName = nameof(CollectionTDPElement); model.Settings.flags.Add(factor.ToString()); model.Settings.weight = weight; data.GlobalFactors.Add(model); }
/// <summary> /// Configures a global function in the feature weighting model - supports: ICF, ICSd, IDF, IGM, mIDF /// </summary> /// <param name="function">Name of function elemenet</param> /// <param name="weight">Weigth associated with the function</param> /// <param name="IDF">How IDF should be computed</param> /// <param name="removeExisting">if set to <c>true</c> [remove existing].</param> /// <remarks> /// It will add specified global factor, optionally if will remove any existing global factors /// </remarks> /// <seealso cref="aceOperationSetExecutorBase" /> public void aceOperation_setGlobalWeight( [Description("Name of function elemenet")] String function = "IDFElement", [Description("Weigth associated with the function")] Double weight = 1.0, [Description("How IDF should be computed")] String flags = "logPlus", [Description("If any existing global factor should be removed")] Boolean removeExisting = false) { if (removeExisting) { data.GlobalFactors.Clear(); } FeatureWeightFactor model = new FeatureWeightFactor(); model.Settings.functionName = function; model.Settings.flags.AddRange(flags.SplitSmart(",")); model.Settings.weight = weight; data.GlobalFactors.Add(model); }
/// <summary> /// Gets default configuration /// </summary> /// <returns></returns> public static PlanesMethodSettings GetDefaultSettings() { PlanesMethodSettings output = new PlanesMethodSettings(); output.entityMethod.instructions.Add(DocumentRenderInstruction.GetDescriptionInstruction()); output.entityMethod.instructions.Add(DocumentRenderInstruction.GetTitleInstruction()); output.entityMethod.instructions.Add(DocumentRenderInstruction.GetBodyTextInstruction()); // output.entityMethod.blenderOptions = DocumentBlenderFunctionOptions.binaryAggregation | DocumentBlenderFunctionOptions.pageLevel; output.entityMethod.filterFunctionName = ""; // nameof(DocumentEntropyFunction); output.entityMethod.filterLimit = 5; output.corpusMethod.stemmer = nameof(EnglishStemmer); output.corpusMethod.tokenizer = nameof(TokenizerBasic); output.corpusMethod.transliterationRuleSetId = ""; #region PREPARE Weighting model var weightModel = new FeatureWeightModel(); weightModel.LocalFunction = new Weighting.Local.TermFrequencyFunction(); var globalFactor = new FeatureWeightFactor(); globalFactor.Settings.functionName = nameof(IDFElement); weightModel.GlobalFactors.Add(globalFactor); output.corpusMethod.WeightModel = weightModel; #endregion var featureFilter = new FeatureFilter(); featureFilter.limit = 8000; //featureFilter. //featureFilter.functionSettings = new GlobalFunctionSettings(); //featureFilter.functionSettings.functionName = nameof(CollectionTDPElement); //featureFilter.functionSettings.weight = 1.0; //featureFilter.functionSettings.flags.Add(Weighting.Metrics.TDPFactor.chi.ToString()); output.corpusMethod.filter = featureFilter; /* * output.vectorMethod.constructor = new Feature.Settings.FeatureVectorConstructorSettings(); * dimensionSpecification dimSpec = new dimensionSpecification(); * dimSpec.functionName = nameof(CosineSimilarityFunction); * dimSpec.type = FeatureVectorDimensionType.similarityFunction; * output.vectorMethod.constructor.labelDimensions.Add(dimSpec); */ //output.vectorMethod.constructor = new Feature.Settings.FeatureVectorConstructorSettings(); //dimensionSpecification dimSpec = new dimensionSpecification(); ////dimSpec.functionName = nameof(CosineSimilarityFunction); //dimSpec.type = FeatureVectorDimensionType.directTermWeight; //output.vectorMethod.constructor.featureDimensions.Add(dimSpec); output.featureMethod.classifierSettings.type = Classifiers.ClassifierType.multiClassSVM; output.featureMethod.classifierSettings.lossFunctionForTraining = Accord.MachineLearning.VectorMachines.Learning.Loss.L2; /* * output.featureMethod.classifierSettings.type = Classifiers.ClassifierType.kNearestNeighbors; * output.featureMethod.classifierSettings.lossFunctionForTraining = Accord.MachineLearning.VectorMachines.Learning.Loss.L2; * output.featureMethod.classifierSettings.kNN_k = 4; */ return(output); }