public ZipLineCluster(ClusteringConfig config, ClusteringVocabulary vocabulary) : base(config) { #if DEBUG this.Vocabulary = vocabulary; this.Vocabulary?.UseWildcard(TokenZipNode.WildcardId, "*"); #endif }
private static IClusteringConfig GetClusteringConfig(IConfigData configData) { var defaults = new ClusteringConfig(); return(new ClusteringConfig { CheckIntervalMsecs = configData.GetInt(CLUSTERING_CHECK_INTERVAL_KEY, defaults.CheckIntervalMsecs), NodeRecordMaxAgeMsecs = configData.GetInt(CLUSTERING_NODE_RECORD_MAX_AGE_KEY, defaults.NodeRecordMaxAgeMsecs), MasterLockDurationMsecs = configData.GetInt(CLUSTERING_MASTER_LOCK_MAX_AGE_KEY, defaults.MasterLockDurationMsecs), MaxPartitionSize = configData.GetInt(CLUSTERING_MAX_PARTITION_SIZE_KEY, defaults.MaxPartitionSize) }); }
public void ItAllowsToSet_MasterLockDuration_InMsecsAndRetrieveInSecsAndMsecs() { // Arrange var target = new ClusteringConfig(); // Act target.MasterLockDurationMsecs = 20100; // Assert - Note: expect rounded up (ceiling) values Assert.Equal(20100, target.MasterLockDurationMsecs); Assert.Equal(21, target.MasterLockDurationSecs); }
public void ItAllowsToSet_NodeRecordMaxAge_InMsecsAndRetrieveInSecsAndMsecs() { // Arrange var target = new ClusteringConfig(); // Act target.NodeRecordMaxAgeMsecs = 20100; // Assert - Note: expect rounded up (ceiling) values Assert.Equal(20100, target.NodeRecordMaxAgeMsecs); Assert.Equal(21, target.NodeRecordMaxAgeSecs); }
private static Clustering CreateClusteringEngine() { var config = new ClusteringConfig { MinClusterAffinity = 0.85f, WeightedTokens = new Dictionary <string, float> { { "Important Fragment Regex Here", 10 } } }; return(new Clustering(config)); }
public void ItSupportsOnlyValid_MaxPartitionSize() { // Arrange const int MIN = 1; const int MAX = 10000; var target = new ClusteringConfig(); // Act - no exceptions here target.MaxPartitionSize = MIN; target.MaxPartitionSize = MAX; // Assert Assert.Throws <InvalidConfigurationException>(() => target.MaxPartitionSize = MIN - 1); Assert.Throws <InvalidConfigurationException>(() => target.MaxPartitionSize = MAX + 1); }
public void ItSupportsOnlyValid_NodeRecordMaxAge() { // Arrange const int MIN = 10000; const int MAX = 600000; var target = new ClusteringConfig(); // Act - no exceptions here target.NodeRecordMaxAgeMsecs = MIN; target.NodeRecordMaxAgeMsecs = MAX; // Assert Assert.Throws <InvalidConfigurationException>(() => target.NodeRecordMaxAgeMsecs = MIN - 1); Assert.Throws <InvalidConfigurationException>(() => target.NodeRecordMaxAgeMsecs = MAX + 1); }
public void ItSupportsOnlyValid_CheckInterval() { // Arrange const int MIN = 1000; const int MAX = 300000; var target = new ClusteringConfig(); // Act - no exceptions here target.CheckIntervalMsecs = MIN; target.CheckIntervalMsecs = MAX; // Assert Assert.Throws <InvalidConfigurationException>(() => target.CheckIntervalMsecs = MIN - 1); Assert.Throws <InvalidConfigurationException>(() => target.CheckIntervalMsecs = MAX + 1); }
protected Tuple <float, float> CalculateAffinity(ClusterItem item, TokenZipNode rootZipNode, ClusteringConfig config, int itemsCount) { Dictionary <int, int[]> itemTokenPlaceMap = item.GetTokenPlacesMap(); Debug.Assert(itemTokenPlaceMap != null); int encounterThreshold = (int)Math.Floor(itemsCount * config.MinClusterAffinity * config.MinClusterAffinity); var itemTokensFound = new HashSet <int>(); Tuple <float, float> matchingAndTotalWeight = CalculateAffinity(0, itemTokenPlaceMap, rootZipNode, itemsCount, encounterThreshold, null, ref itemTokensFound); float foundWeight = 0.0f, notFoundWeight = 0.0f; int seqIndex = 1; foreach (Token token in item.TokenIndex.Tokens) { if (itemTokensFound.Contains(token.Id)) { seqIndex = 1; foundWeight += 1; } else { notFoundWeight += (float)(1 * Math.Pow(seqIndex++, 1.25)); } } return(new Tuple <float, float>(matchingAndTotalWeight.Item1 + foundWeight, matchingAndTotalWeight.Item2 + foundWeight + notFoundWeight)); }
public ZipLineCluster(ClusteringConfig config) : base(config) { }