예제 #1
0
        public ZipLineCluster(ClusteringConfig config, ClusteringVocabulary vocabulary) : base(config)
        {
#if DEBUG
            this.Vocabulary = vocabulary;
            this.Vocabulary?.UseWildcard(TokenZipNode.WildcardId, "*");
#endif
        }
예제 #2
0
        private static IClusteringConfig GetClusteringConfig(IConfigData configData)
        {
            var defaults = new ClusteringConfig();

            return(new ClusteringConfig
            {
                CheckIntervalMsecs = configData.GetInt(CLUSTERING_CHECK_INTERVAL_KEY, defaults.CheckIntervalMsecs),
                NodeRecordMaxAgeMsecs = configData.GetInt(CLUSTERING_NODE_RECORD_MAX_AGE_KEY, defaults.NodeRecordMaxAgeMsecs),
                MasterLockDurationMsecs = configData.GetInt(CLUSTERING_MASTER_LOCK_MAX_AGE_KEY, defaults.MasterLockDurationMsecs),
                MaxPartitionSize = configData.GetInt(CLUSTERING_MAX_PARTITION_SIZE_KEY, defaults.MaxPartitionSize)
            });
        }
예제 #3
0
        public void ItAllowsToSet_MasterLockDuration_InMsecsAndRetrieveInSecsAndMsecs()
        {
            // Arrange
            var target = new ClusteringConfig();

            // Act
            target.MasterLockDurationMsecs = 20100;

            // Assert - Note: expect rounded up (ceiling) values
            Assert.Equal(20100, target.MasterLockDurationMsecs);
            Assert.Equal(21, target.MasterLockDurationSecs);
        }
예제 #4
0
        public void ItAllowsToSet_NodeRecordMaxAge_InMsecsAndRetrieveInSecsAndMsecs()
        {
            // Arrange
            var target = new ClusteringConfig();

            // Act
            target.NodeRecordMaxAgeMsecs = 20100;

            // Assert - Note: expect rounded up (ceiling) values
            Assert.Equal(20100, target.NodeRecordMaxAgeMsecs);
            Assert.Equal(21, target.NodeRecordMaxAgeSecs);
        }
예제 #5
0
        private static Clustering CreateClusteringEngine()
        {
            var config = new ClusteringConfig
            {
                MinClusterAffinity = 0.85f,
                WeightedTokens     = new Dictionary <string, float>
                {
                    { "Important Fragment Regex Here", 10 }
                }
            };

            return(new Clustering(config));
        }
예제 #6
0
        public void ItSupportsOnlyValid_MaxPartitionSize()
        {
            // Arrange
            const int MIN    = 1;
            const int MAX    = 10000;
            var       target = new ClusteringConfig();

            // Act - no exceptions here
            target.MaxPartitionSize = MIN;
            target.MaxPartitionSize = MAX;

            // Assert
            Assert.Throws <InvalidConfigurationException>(() => target.MaxPartitionSize = MIN - 1);
            Assert.Throws <InvalidConfigurationException>(() => target.MaxPartitionSize = MAX + 1);
        }
예제 #7
0
        public void ItSupportsOnlyValid_NodeRecordMaxAge()
        {
            // Arrange
            const int MIN    = 10000;
            const int MAX    = 600000;
            var       target = new ClusteringConfig();

            // Act - no exceptions here
            target.NodeRecordMaxAgeMsecs = MIN;
            target.NodeRecordMaxAgeMsecs = MAX;

            // Assert
            Assert.Throws <InvalidConfigurationException>(() => target.NodeRecordMaxAgeMsecs = MIN - 1);
            Assert.Throws <InvalidConfigurationException>(() => target.NodeRecordMaxAgeMsecs = MAX + 1);
        }
예제 #8
0
        public void ItSupportsOnlyValid_CheckInterval()
        {
            // Arrange
            const int MIN    = 1000;
            const int MAX    = 300000;
            var       target = new ClusteringConfig();

            // Act - no exceptions here
            target.CheckIntervalMsecs = MIN;
            target.CheckIntervalMsecs = MAX;

            // Assert
            Assert.Throws <InvalidConfigurationException>(() => target.CheckIntervalMsecs = MIN - 1);
            Assert.Throws <InvalidConfigurationException>(() => target.CheckIntervalMsecs = MAX + 1);
        }
예제 #9
0
        protected Tuple <float, float> CalculateAffinity(ClusterItem item, TokenZipNode rootZipNode, ClusteringConfig config, int itemsCount)
        {
            Dictionary <int, int[]> itemTokenPlaceMap = item.GetTokenPlacesMap();

            Debug.Assert(itemTokenPlaceMap != null);

            int encounterThreshold = (int)Math.Floor(itemsCount * config.MinClusterAffinity * config.MinClusterAffinity);
            var itemTokensFound    = new HashSet <int>();
            Tuple <float, float> matchingAndTotalWeight = CalculateAffinity(0, itemTokenPlaceMap, rootZipNode, itemsCount, encounterThreshold, null, ref itemTokensFound);

            float foundWeight = 0.0f, notFoundWeight = 0.0f;
            int   seqIndex = 1;

            foreach (Token token in item.TokenIndex.Tokens)
            {
                if (itemTokensFound.Contains(token.Id))
                {
                    seqIndex     = 1;
                    foundWeight += 1;
                }
                else
                {
                    notFoundWeight += (float)(1 * Math.Pow(seqIndex++, 1.25));
                }
            }
            return(new Tuple <float, float>(matchingAndTotalWeight.Item1 + foundWeight, matchingAndTotalWeight.Item2 + foundWeight + notFoundWeight));
        }
예제 #10
0
 public ZipLineCluster(ClusteringConfig config) : base(config)
 {
 }