Exemplo n.º 1
0
        //        public Boolean DoNormalizeOnDomainLevel { get; set; } = true;


        /// <summary>
        /// Gets the preset tfidf.
        /// </summary>
        /// <param name="documentLimit">The document limit.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public static DocumentRankingMethod GetPreset_TFIDF(Int32 documentLimit, ILogBuilder log)
        {
            DocumentRankingMethod output = new DocumentRankingMethod();

            ScoreModel scoreModel = new ScoreModel();

            ScoreModelTermWeightFactor twf = new ScoreModelTermWeightFactor();

            twf.TermWeightModel = new Weighting.FeatureWeightModel();
            twf.TermWeightModel.LocalFunction = new TermFrequencyFunction();

            FeatureWeightFactor featureWeightFactor = new FeatureWeightFactor();

            featureWeightFactor.Settings = new GlobalFunctionSettings();
            featureWeightFactor.Settings.functionName = nameof(IDFElement);
            featureWeightFactor.Settings.weight       = 1.0;
            featureWeightFactor.Deploy(log);

            twf.TermWeightModel.GlobalFactors.Add(featureWeightFactor);
            twf.weight = 1.0;

            scoreModel.Factors.Add(twf);


            output.query           = new DocumentSelectQuery();
            output.query.SizeLimit = documentLimit;

            output.model = scoreModel;
            return(output);
        }
Exemplo n.º 2
0
        /// <summary>Configures a global function based on Gravity moment</summary>
        /// <remarks><para>What it will do?</para></remarks>
        /// <param name="l">"Lambda factor of IGM"</param>
        /// <param name="weight">Weigth associated with the function</param>
        /// <param name="removeExisting">If any existing global factor should be removed</param>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_setGlobalIGMWeight(
            [Description("Lambda factor of IGM")] Double l = 7.0,
            [Description("Weigth associated with the function")] Double weight = 1.0,
            [Description("If any existing global factor should be removed")] Boolean removeExisting = false)
        {
            if (removeExisting)
            {
                data.GlobalFactors.Clear();
            }

            FeatureWeightFactor model = new FeatureWeightFactor();

            model.Settings.functionName = nameof(IGMElement);
            model.Settings.l            = l;
            model.Settings.weight       = weight;
            data.GlobalFactors.Add(model);
        }
Exemplo n.º 3
0
        /// <summary>Configures a global function based on Term Discrimination Power</summary>
        /// <remarks><para>It will add specified global factor, optionally if will remove any existing global factors</para></remarks>
        /// <param name="factor">What factor should be added</param>
        /// <param name="weight">Weigth associated with the function</param>
        /// <param name="removeExisting">If any existing global factor should be removed</param>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_setGlobalTDPWeight(
            [Description("What factor should be added")] TDPFactor factor      = TDPFactor.chi,
            [Description("Weigth associated with the function")] Double weight = 1.0,
            [Description("If any existing global factor should be removed")] Boolean removeExisting = false)
        {
            if (removeExisting)
            {
                data.GlobalFactors.Clear();
            }

            FeatureWeightFactor model = new FeatureWeightFactor();

            model.Settings.functionName = nameof(CollectionTDPElement);
            model.Settings.flags.Add(factor.ToString());
            model.Settings.weight = weight;
            data.GlobalFactors.Add(model);
        }
Exemplo n.º 4
0
        /// <summary>
        /// Configures a global function in the feature weighting model - supports: ICF, ICSd, IDF, IGM, mIDF
        /// </summary>
        /// <param name="function">Name of function elemenet</param>
        /// <param name="weight">Weigth associated with the function</param>
        /// <param name="IDF">How IDF should be computed</param>
        /// <param name="removeExisting">if set to <c>true</c> [remove existing].</param>
        /// <remarks>
        /// It will add specified global factor, optionally if will remove any existing global factors
        /// </remarks>
        /// <seealso cref="aceOperationSetExecutorBase" />
        public void aceOperation_setGlobalWeight(
            [Description("Name of function elemenet")] String function         = "IDFElement",
            [Description("Weigth associated with the function")] Double weight = 1.0,
            [Description("How IDF should be computed")] String flags           = "logPlus",
            [Description("If any existing global factor should be removed")] Boolean removeExisting = false)
        {
            if (removeExisting)
            {
                data.GlobalFactors.Clear();
            }
            FeatureWeightFactor model = new FeatureWeightFactor();

            model.Settings.functionName = function;
            model.Settings.flags.AddRange(flags.SplitSmart(","));
            model.Settings.weight = weight;
            data.GlobalFactors.Add(model);
        }
Exemplo n.º 5
0
        /// <summary>
        /// Gets default configuration
        /// </summary>
        /// <returns></returns>
        public static PlanesMethodSettings GetDefaultSettings()
        {
            PlanesMethodSettings output = new PlanesMethodSettings();

            output.entityMethod.instructions.Add(DocumentRenderInstruction.GetDescriptionInstruction());
            output.entityMethod.instructions.Add(DocumentRenderInstruction.GetTitleInstruction());
            output.entityMethod.instructions.Add(DocumentRenderInstruction.GetBodyTextInstruction());

            //  output.entityMethod.blenderOptions = DocumentBlenderFunctionOptions.binaryAggregation | DocumentBlenderFunctionOptions.pageLevel;
            output.entityMethod.filterFunctionName = ""; // nameof(DocumentEntropyFunction);
            output.entityMethod.filterLimit        = 5;



            output.corpusMethod.stemmer   = nameof(EnglishStemmer);
            output.corpusMethod.tokenizer = nameof(TokenizerBasic);
            output.corpusMethod.transliterationRuleSetId = "";

            #region PREPARE Weighting model
            var weightModel = new FeatureWeightModel();
            weightModel.LocalFunction = new Weighting.Local.TermFrequencyFunction();

            var globalFactor = new FeatureWeightFactor();
            globalFactor.Settings.functionName = nameof(IDFElement);
            weightModel.GlobalFactors.Add(globalFactor);

            output.corpusMethod.WeightModel = weightModel;
            #endregion


            var featureFilter = new FeatureFilter();
            featureFilter.limit = 8000;
            //featureFilter.
            //featureFilter.functionSettings = new GlobalFunctionSettings();
            //featureFilter.functionSettings.functionName = nameof(CollectionTDPElement);
            //featureFilter.functionSettings.weight = 1.0;
            //featureFilter.functionSettings.flags.Add(Weighting.Metrics.TDPFactor.chi.ToString());
            output.corpusMethod.filter = featureFilter;

            /*
             * output.vectorMethod.constructor = new Feature.Settings.FeatureVectorConstructorSettings();
             * dimensionSpecification dimSpec = new dimensionSpecification();
             * dimSpec.functionName = nameof(CosineSimilarityFunction);
             * dimSpec.type = FeatureVectorDimensionType.similarityFunction;
             * output.vectorMethod.constructor.labelDimensions.Add(dimSpec);
             */

            //output.vectorMethod.constructor = new Feature.Settings.FeatureVectorConstructorSettings();
            //dimensionSpecification dimSpec = new dimensionSpecification();
            ////dimSpec.functionName = nameof(CosineSimilarityFunction);
            //dimSpec.type = FeatureVectorDimensionType.directTermWeight;
            //output.vectorMethod.constructor.featureDimensions.Add(dimSpec);


            output.featureMethod.classifierSettings.type = Classifiers.ClassifierType.multiClassSVM;
            output.featureMethod.classifierSettings.lossFunctionForTraining = Accord.MachineLearning.VectorMachines.Learning.Loss.L2;


            /*
             * output.featureMethod.classifierSettings.type = Classifiers.ClassifierType.kNearestNeighbors;
             * output.featureMethod.classifierSettings.lossFunctionForTraining = Accord.MachineLearning.VectorMachines.Learning.Loss.L2;
             * output.featureMethod.classifierSettings.kNN_k = 4;
             */

            return(output);
        }