コード例 #1
0
        /// <summary>
        /// Creates dataset list, with all entities, groupped into first level categories.
        /// </summary>
        /// <returns></returns>
        public List <WebSiteDocumentsSet> GetFirstLevelCategories()
        {
            List <WebSiteDocumentsSet> dataset = new List <WebSiteDocumentsSet>();

            foreach (WebDocumentsCategory subcat in this)
            {
                WebSiteDocumentsSet ds = new WebSiteDocumentsSet();
                ds.AddRange(subcat.GetAllSites());
                ds.name = subcat.name;
                dataset.Add(ds);
            }
            return(dataset);
        }
コード例 #2
0
        /// <summary>
        /// Gets flat list of categories, where names represent category hierarchy graph path
        /// </summary>
        /// <param name="parentCatName">Name of the parent category, leave blank if this category should be considered as root</param>
        /// <returns></returns>
        public List <WebSiteDocumentsSet> GetAllCategories(String parentCatName = "")
        {
            List <WebSiteDocumentsSet> dataset = new List <WebSiteDocumentsSet>();

            foreach (WebDocumentsCategory subcat in this)
            {
                WebSiteDocumentsSet ds = new WebSiteDocumentsSet();
                ds.AddRange(subcat.siteDocuments);
                ds.name = parentCatName + pathSeparator + subcat.name;
                dataset.Add(ds);

                dataset.AddRange(subcat.GetAllCategories(ds.name));
            }
            return(dataset);
        }
コード例 #3
0
        /// <summary>
        /// Deploys the specified settings.
        /// </summary>
        /// <param name="_settings">The settings.</param>
        /// <param name="_dataset">Un-folded dataset, without having the unknown class defined</param>
        /// <param name="logger">The logger.</param>
        public void Deploy(CrossValidationModel _settings, IEnumerable <WebSiteDocumentsSet> _dataset, ILogBuilder logger)
        {
            settings = _settings;

            if (settings.SingleFold)
            {
                name = "1-fold -- single fold override";
                ExperimentDataSetFold fold = new ExperimentDataSetFold();
                fold.name = "SingleFold";
                fold.AddRange(_dataset);
                Add(fold);
                return;
            }

            if (_dataset is ExperimentDataSetFold foldInstance)
            {
                dataset = foldInstance;
            }

            name = settings.K + "-fold Tr[" + _settings.TrainingFolds + "] Ts[" + _settings.TestFolds + "]";

            List <CategorySlicedFolds> folds = new List <CategorySlicedFolds>();

            Dictionary <WebSiteDocumentsSet, CategorySlicedFolds> slicedFolds = new Dictionary <WebSiteDocumentsSet, CategorySlicedFolds>();

            foreach (WebSiteDocumentsSet cat in _dataset)
            {
                CategorySlicedFolds fold = new CategorySlicedFolds();
                fold.Deploy(cat, settings.K, settings.randomFolds);
                folds.Add(fold);
                slicedFolds.Add(cat, fold);
            }

            // --------------------------------------------------------- //

            var distributionMatrix = settings.GetDistributionMatrix();

            Int32 foldsToCreate = settings.K;

            if (settings.LimitFoldsExecution > 0)
            {
                foldsToCreate = settings.LimitFoldsExecution;
            }
            for (int i = 0; i < foldsToCreate; i++)
            {
                ExperimentDataSetFold setFold = new ExperimentDataSetFold();
                setFold.name = settings.K + "-fold[" + i + "]";

                setFold.CopyLabelNames(_dataset);

                WebSiteDocumentsSet unknownCat = new WebSiteDocumentsSet(SpaceLabel.UNKNOWN, "Test category - " + setFold.name);

                setFold.Add(unknownCat);

                foreach (KeyValuePair <WebSiteDocumentsSet, CategorySlicedFolds> catPair in slicedFolds)
                {
                    WebSiteDocumentsSet cat = setFold.First(x => x.name == catPair.Key.name);

                    for (int s = 0; s < settings.K; s++)
                    {
                        bool toTraining = distributionMatrix[i][s];

                        if (toTraining)
                        {
                            cat.AddRange(catPair.Value[s].WeakClone());
                        }
                        else
                        {
                            unknownCat.AddRange(catPair.Value[s].WeakClone());
                        }
                    }
                }

                Add(setFold);
            }
        }