/// <summary> /// Creates dataset list, with all entities, groupped into first level categories. /// </summary> /// <returns></returns> public List <WebSiteDocumentsSet> GetFirstLevelCategories() { List <WebSiteDocumentsSet> dataset = new List <WebSiteDocumentsSet>(); foreach (WebDocumentsCategory subcat in this) { WebSiteDocumentsSet ds = new WebSiteDocumentsSet(); ds.AddRange(subcat.GetAllSites()); ds.name = subcat.name; dataset.Add(ds); } return(dataset); }
/// <summary> /// Gets flat list of categories, where names represent category hierarchy graph path /// </summary> /// <param name="parentCatName">Name of the parent category, leave blank if this category should be considered as root</param> /// <returns></returns> public List <WebSiteDocumentsSet> GetAllCategories(String parentCatName = "") { List <WebSiteDocumentsSet> dataset = new List <WebSiteDocumentsSet>(); foreach (WebDocumentsCategory subcat in this) { WebSiteDocumentsSet ds = new WebSiteDocumentsSet(); ds.AddRange(subcat.siteDocuments); ds.name = parentCatName + pathSeparator + subcat.name; dataset.Add(ds); dataset.AddRange(subcat.GetAllCategories(ds.name)); } return(dataset); }
/// <summary> /// Deploys the specified settings. /// </summary> /// <param name="_settings">The settings.</param> /// <param name="_dataset">Un-folded dataset, without having the unknown class defined</param> /// <param name="logger">The logger.</param> public void Deploy(CrossValidationModel _settings, IEnumerable <WebSiteDocumentsSet> _dataset, ILogBuilder logger) { settings = _settings; if (settings.SingleFold) { name = "1-fold -- single fold override"; ExperimentDataSetFold fold = new ExperimentDataSetFold(); fold.name = "SingleFold"; fold.AddRange(_dataset); Add(fold); return; } if (_dataset is ExperimentDataSetFold foldInstance) { dataset = foldInstance; } name = settings.K + "-fold Tr[" + _settings.TrainingFolds + "] Ts[" + _settings.TestFolds + "]"; List <CategorySlicedFolds> folds = new List <CategorySlicedFolds>(); Dictionary <WebSiteDocumentsSet, CategorySlicedFolds> slicedFolds = new Dictionary <WebSiteDocumentsSet, CategorySlicedFolds>(); foreach (WebSiteDocumentsSet cat in _dataset) { CategorySlicedFolds fold = new CategorySlicedFolds(); fold.Deploy(cat, settings.K, settings.randomFolds); folds.Add(fold); slicedFolds.Add(cat, fold); } // --------------------------------------------------------- // var distributionMatrix = settings.GetDistributionMatrix(); Int32 foldsToCreate = settings.K; if (settings.LimitFoldsExecution > 0) { foldsToCreate = settings.LimitFoldsExecution; } for (int i = 0; i < foldsToCreate; i++) { ExperimentDataSetFold setFold = new ExperimentDataSetFold(); setFold.name = settings.K + "-fold[" + i + "]"; setFold.CopyLabelNames(_dataset); WebSiteDocumentsSet unknownCat = new WebSiteDocumentsSet(SpaceLabel.UNKNOWN, "Test category - " + setFold.name); setFold.Add(unknownCat); foreach (KeyValuePair <WebSiteDocumentsSet, CategorySlicedFolds> catPair in slicedFolds) { WebSiteDocumentsSet cat = setFold.First(x => x.name == catPair.Key.name); for (int s = 0; s < settings.K; s++) { bool toTraining = distributionMatrix[i][s]; if (toTraining) { cat.AddRange(catPair.Value[s].WeakClone()); } else { unknownCat.AddRange(catPair.Value[s].WeakClone()); } } } Add(setFold); } }