public DomainNameAddOnFeaturesGenerator(FeaturesConfig featuresConfig) : base(featuresConfig)
        {
            if (!this.FeaturesConfig.UseDomainAsFeature)
            {
                FeatureDefaults           = new Features();
                FeatureDefaultsValueTypes = new FeatureValueTypes();
                FeatureGranularities      = new FeatureGranularities();
                return;
            }

            if (this.FeaturesConfig.NullDefaultsAllowed)
            {
                FeatureDefaults = new Features()
                {
                    { CityFeatureType.Domain, (int?)null }
                };
            }
            else
            {
                FeatureDefaults = new Features()
                {
                    { CityFeatureType.Domain, int.MaxValue }
                };
            }

            FeatureDefaultsValueTypes = new FeatureValueTypes()
            {
                { CityFeatureType.Domain, typeof(int?) }
            };

            FeatureGranularities = new FeatureGranularities()
            {
                { CityFeatureType.Domain, FeatureGranularity.Discrete }
            };
        }
        public FirstLettersAdmin1AddOnFeaturesGenerator(FeaturesConfig featuresConfig) : base(featuresConfig)
        {
            if (this.FeaturesConfig.NullDefaultsAllowed)
            {
                FeatureDefaults = new Features()
                {
                    { CityFeatureType.FirstLettersAdmin1NameMatch, false },
                    { CityFeatureType.FirstLettersAdmin1Letters, (byte?)null },
                    { CityFeatureType.FirstLettersAdmin1LettersRatio, (float?)null }
                };

                if (this.FeaturesConfig.UseSlotIndex)
                {
                    FeatureDefaults[CityFeatureType.FirstLettersAdmin1RTLSlotIndex] = (byte?)null;
                    FeatureDefaults[CityFeatureType.FirstLettersAdmin1LTRSlotIndex] = (byte?)null;
                }
            }
            else
            {
                FeatureDefaults = new Features()
                {
                    { CityFeatureType.FirstLettersAdmin1NameMatch, false },
                    { CityFeatureType.FirstLettersAdmin1Letters, (byte?)0 },
                    { CityFeatureType.FirstLettersAdmin1LettersRatio, (float?)0 },
                };

                if (this.FeaturesConfig.UseSlotIndex)
                {
                    FeatureDefaults[CityFeatureType.FirstLettersAdmin1RTLSlotIndex] = (byte?)byte.MaxValue;
                    FeatureDefaults[CityFeatureType.FirstLettersAdmin1LTRSlotIndex] = (byte?)byte.MaxValue;
                }
            }

            FeatureDefaultsValueTypes = new FeatureValueTypes()
            {
                { CityFeatureType.FirstLettersAdmin1NameMatch, typeof(bool) },
                { CityFeatureType.FirstLettersAdmin1Letters, typeof(byte?) },
                { CityFeatureType.FirstLettersAdmin1LettersRatio, typeof(float?) }
            };

            if (this.FeaturesConfig.UseSlotIndex)
            {
                FeatureDefaultsValueTypes[CityFeatureType.FirstLettersAdmin1RTLSlotIndex] = typeof(byte?);
                FeatureDefaultsValueTypes[CityFeatureType.FirstLettersAdmin1LTRSlotIndex] = typeof(byte?);
            }

            FeatureGranularities = new FeatureGranularities()
            {
                { CityFeatureType.FirstLettersAdmin1NameMatch, FeatureGranularity.Discrete },
                { CityFeatureType.FirstLettersAdmin1Letters, FeatureGranularity.Continuous },
                { CityFeatureType.FirstLettersAdmin1LettersRatio, FeatureGranularity.Continuous }
            };

            if (this.FeaturesConfig.UseSlotIndex)
            {
                FeatureGranularities[CityFeatureType.FirstLettersAdmin1RTLSlotIndex] = FeatureGranularity.Discrete;
                FeatureGranularities[CityFeatureType.FirstLettersAdmin1LTRSlotIndex] = FeatureGranularity.Discrete;
            }
        }
        public ExactCountryAddOnFeaturesGenerator(FeaturesConfig featuresConfig) : base(featuresConfig)
        {
            if (this.FeaturesConfig.NullDefaultsAllowed)
            {
                FeatureDefaults = new Features()
                {
                    { CityFeatureType.ExactCountryNameMatch, false },
                    { CityFeatureType.ExactCountryLetters, (byte?)null }
                };

                if (this.FeaturesConfig.UseSlotIndex)
                {
                    FeatureDefaults[CityFeatureType.ExactCountryRTLSlotIndex] = (byte?)null;
                    FeatureDefaults[CityFeatureType.ExactCountryLTRSlotIndex] = (byte?)null;
                }
            }
            else
            {
                FeatureDefaults = new Features()
                {
                    { CityFeatureType.ExactCountryNameMatch, false },
                    { CityFeatureType.ExactCountryLetters, (byte?)0 }
                };

                if (this.FeaturesConfig.UseSlotIndex)
                {
                    FeatureDefaults[CityFeatureType.ExactCountryRTLSlotIndex] = (byte?)byte.MaxValue;
                    FeatureDefaults[CityFeatureType.ExactCountryLTRSlotIndex] = (byte?)byte.MaxValue;
                }
            }

            FeatureDefaultsValueTypes = new FeatureValueTypes()
            {
                { CityFeatureType.ExactCountryNameMatch, typeof(bool) },
                { CityFeatureType.ExactCountryLetters, typeof(byte?) }
            };

            if (this.FeaturesConfig.UseSlotIndex)
            {
                FeatureDefaultsValueTypes[CityFeatureType.ExactCountryRTLSlotIndex] = typeof(byte?);
                FeatureDefaultsValueTypes[CityFeatureType.ExactCountryLTRSlotIndex] = typeof(byte?);
            }

            FeatureGranularities = new FeatureGranularities()
            {
                { CityFeatureType.ExactCountryNameMatch, FeatureGranularity.Discrete },
                { CityFeatureType.ExactCountryLetters, FeatureGranularity.Continuous }
            };

            if (this.FeaturesConfig.UseSlotIndex)
            {
                FeatureGranularities[CityFeatureType.ExactCountryRTLSlotIndex] = FeatureGranularity.Discrete;
                FeatureGranularities[CityFeatureType.ExactCountryLTRSlotIndex] = FeatureGranularity.Discrete;
            }
        }
Exemple #4
0
        public SVMClassifier(FeatureValueTypes featureDefaultsValueTypes, FeatureGranularities featureGranularities, string serializedClassifierPath = null)
        {
            Teacher = new SequentialMinimalOptimization <Gaussian>()
            {
                UseComplexityHeuristic = true,
                UseKernelEstimation    = true // Estimate the kernel from the data
            };


            if (serializedClassifierPath != null)
            {
                this.Svm = Serializer.Load <SupportVectorMachine <Gaussian> >(serializedClassifierPath);
            }
        }
        public RandomForestClassifier(FeatureValueTypes featureDefaultsValueTypes, FeatureGranularities featureGranularities, string serializedClassifierPath = null)
        {
            var featureKinds = new List <DecisionVariable>();

            foreach (var entry in featureDefaultsValueTypes)
            {
                var featureName = entry.Key.ToString();
                var featureType = entry.Value;

                var featureGranularity = featureGranularities[entry.Key];

                if (featureGranularity == FeatureGranularity.Continuous)
                {
                    featureKinds.Add(new DecisionVariable(featureName, DecisionVariableKind.Continuous));
                }
                else if (featureGranularity == FeatureGranularity.Discrete)
                {
                    var decisionVar = new DecisionVariable(featureName, DecisionVariableKind.Discrete);

                    // TODO: Fix uint, there is no Accord.UIntRange
                    if (featureType == (typeof(int)) || featureType == (typeof(int?)) || featureType == (typeof(uint)) || featureType == (typeof(uint?)))
                    {
                        decisionVar.Range = new Accord.IntRange(min: int.MinValue, max: int.MaxValue);
                    }
                    else if (featureType == (typeof(byte)) || featureType == (typeof(byte?)))
                    {
                        decisionVar.Range = new Accord.IntRange(min: byte.MinValue, max: byte.MaxValue);
                    }

                    featureKinds.Add(decisionVar);
                }
                else
                {
                    throw new ArgumentException("Unknown feature granularity");
                }
            }

            var featureKindsArr = featureKinds.ToArray <DecisionVariable>();

            this.RandomForestLearner = new RandomForestLearning(featureKindsArr)
            {
                NumberOfTrees = 10
            };

            if (serializedClassifierPath != null)
            {
                this.RandomForest = Serializer.Load <RandomForest>(serializedClassifierPath);
            }
        }
        public TLDAddOnFeaturesGenerator(FeaturesConfig featuresConfig) : base(featuresConfig)
        {
            FeatureDefaults = new Features()
            {
                { CityFeatureType.TLDMatch, false },
            };

            FeatureDefaultsValueTypes = new FeatureValueTypes()
            {
                { CityFeatureType.TLDMatch, typeof(bool) },
            };

            FeatureGranularities = new FeatureGranularities()
            {
                { CityFeatureType.TLDMatch, FeatureGranularity.Discrete }
            };
        }
Exemple #7
0
        public HostnamePatternsAddOnFeaturesGenerator(FeaturesConfig featuresConfig,
                                                      HostnamePatternMiner miner,
                                                      Dictionary <string, Dictionary <PatternRule, PatternMiningCoordinates> > hostnamePatternRules) : base(featuresConfig)
        {
            this.miner = miner;
            this.hostnamePatternRules = hostnamePatternRules;

            FeatureDefaults = new Features()
            {
                { CityFeatureType.HostnamePatternMatch, false },
            };

            FeatureDefaultsValueTypes = new FeatureValueTypes()
            {
                { CityFeatureType.HostnamePatternMatch, typeof(bool) },
            };

            FeatureGranularities = new FeatureGranularities()
            {
                { CityFeatureType.HostnamePatternMatch, FeatureGranularity.Discrete }
            };
        }
        public HostnamePatternsFeaturesGenerator(
            FeaturesConfig featuresConfig,
            HostnamePatternMiner miner,
            Dictionary <string, Dictionary <PatternRule, PatternMiningCoordinates> > hostnamePatternRules) : base(featuresConfig)
        {
            this.miner = miner;
            this.hostnamePatternRules = hostnamePatternRules;

            if (this.FeaturesConfig.NullDefaultsAllowed)
            {
                FeatureDefaults = new Features()
                {
                    { CityFeatureType.HostnamePatternMatch, false },
                    { CityFeatureType.HostnamePatternConfidence, (float?)null }
                };
            }
            else
            {
                FeatureDefaults = new Features()
                {
                    { CityFeatureType.HostnamePatternMatch, false },
                    { CityFeatureType.HostnamePatternConfidence, (float?)0 }
                };
            }

            FeatureDefaultsValueTypes = new FeatureValueTypes()
            {
                { CityFeatureType.HostnamePatternMatch, typeof(bool) },
                { CityFeatureType.HostnamePatternConfidence, typeof(float?) }
            };

            FeatureGranularities = new FeatureGranularities()
            {
                { CityFeatureType.HostnamePatternMatch, FeatureGranularity.Discrete },
                { CityFeatureType.HostnamePatternConfidence, FeatureGranularity.Continuous }
            };
        }
 public IClassifier CreateInstance(FeatureValueTypes featureDefaultsValueTypes, FeatureGranularities featureGranularities, string serializedClassifierPath = null)
 {
     return(new RandomForestClassifier(featureDefaultsValueTypes, featureGranularities, serializedClassifierPath));
 }
Exemple #10
0
        public LogisticRegressionClassifier(FeatureValueTypes featureDefaultsValueTypes, FeatureGranularities featureGranularities, string serializedClassifierPath = null)
        {
            Learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                Tolerance      = 1e-4, // Convergence parameters
                Iterations     = 500,  // Maximum number of iterations to perform
                Regularization = 0
            };

            if (serializedClassifierPath != null)
            {
                this.Regression = Serializer.Load <LogisticRegression>(serializedClassifierPath);
            }
        }
Exemple #11
0
 public IClassifier CreateInstance(FeatureValueTypes featureDefaultsValueTypes, FeatureGranularities featureGranularities, string serializedClassifierPath = null)
 {
     return(new LogisticRegressionClassifier(featureDefaultsValueTypes, featureGranularities, serializedClassifierPath));
 }