private Return<object> ChangeIdsByDocuments(BlatellaGroup cluster, List<Document> documents) { Return<object> _answer = new Return<object>(); try { if (cluster != null) { if (cluster.entities != null) { List<object> _entitiesInt = cluster.entities.Where(e => e is int).ToList(); List<object> _entitiesNotInt = cluster.entities.Where(e => !(e is int)).ToList(); if (_entitiesInt.Any()) { if (_entitiesNotInt.Any()) { string _message = "There can be no identifiers and documents simultaneously"; _answer.theresError = true; _answer.error = Utility.GetError(new Exception(_message), this.GetType()); } else {//there is identifiers List<object> _newEntities = new List<object>(); foreach (object _e in cluster.entities) { if (_e is int) { int _id = (int)_e; Document _document = documents[_id]; if (_document == null) { string _message = string.Format("It was not possible to find any document with id {0}", Convert.ToString(_id)); _answer.theresError = true; _answer.error = Utility.GetError(new Exception(_message), this.GetType()); } else _newEntities.Add(_document); } if (_answer.theresError) break; } if (!_answer.theresError) { cluster.entities.Clear(); cluster.entities.AddRange(_newEntities); } }//there is identifiers } } if (!_answer.theresError && cluster.supergroups != null) { foreach (BlatellaGroup _c in cluster.supergroups) { _answer = ChangeIdsByDocuments(_c, documents); if (_answer.theresError) break; } } if (!_answer.theresError && cluster.subgroups != null) { foreach (BlatellaGroup _c in cluster.subgroups) { _answer = ChangeIdsByDocuments(_c, documents); if (_answer.theresError) break; } } } } catch (Exception _ex) { _answer.theresError = true; _answer.error = Utility.GetError(_ex, this.GetType()); } return _answer; }
/// <summary> /// Performs a k-mean grouping. /// </summary> /// <param name="entities">object id,variable id, variable description</param> /// <returns>A list of groupings</returns> public Return<List<List<BlatellaGroup>>> GroupingKMeans(SortedList<int, SortedList<int, ICharacteristic>> entities) { Return<List<List<BlatellaGroup>>> _answer = new Return<List<List<BlatellaGroup>>>() { data = new List<List<BlatellaGroup>>() }; if (entities == null) { _answer.theresError = true; _answer.error = Utility.GetError(new ArgumentNullException("entities"), this.GetType()); } else { try { if (!_answer.theresError) {//validation Return<object> _answerValidateCharacteristics = ValidateCharacteristics(entities); if (_answerValidateCharacteristics.theresError) { _answer.theresError = true; _answer.error = _answerValidateCharacteristics.error; } }//validation if (!_answer.theresError) {//fill holes Return<object> _answerFillHoles = FillHoles(entities); if (_answerFillHoles.theresError) { _answer.theresError = true; _answer.error = _answerFillHoles.error; } }//fill holes if (!_answer.theresError) {//to group if (entities.Count <= 2) { BlatellaGroup _group = new BlatellaGroup(); foreach (int _id in entities.Keys) _group.entities.Add(entities[_id]); _answer.data.Add(new List<BlatellaGroup>() { _group }); } else { SortedList<int, Vector<decimal>> _entities = new SortedList<int, Vector<decimal>>(); foreach (KeyValuePair<int, SortedList<int, ICharacteristic>> _kv in entities) _entities.Add(_kv.Key, new Vector<decimal>(_kv.Key, _kv.Value.OrderBy(ii => ii.Key).Select(c => c.Value.weight).ToArray())); SortedList<int, SortedList<int, List<int>>> _groupsByk = new SortedList<int, SortedList<int, List<int>>>(); for (int _numberOfClusters = 2; _numberOfClusters < Math.Sqrt(entities.Count); _numberOfClusters++) //for (int _numberOfClusters = 2; _numberOfClusters < entities.Count/2; _numberOfClusters++) {//k to k Return<SortedList<int, List<int>>> _answerGroupingKMeans = GroupingKMeans(_entities, _numberOfClusters); if (_answerGroupingKMeans.theresError) { _answer.theresError = true; _answer.error = _answerGroupingKMeans.error; } else _groupsByk.Add(_numberOfClusters, _answerGroupingKMeans.data); if (_answer.theresError) break; }//k to k if (!_answer.theresError) { Return<int> _answerGetBestK = GetBestK(_groupsByk, _entities); if (_answerGetBestK.theresError) { _answer.theresError = true; _answer.error = _answerGetBestK.error; } else { List<BlatellaGroup> _listGroups = new List<BlatellaGroup>(); SortedList<int, List<int>> _groups = _groupsByk[_answerGetBestK.data]; foreach (int _idCentroid in _groups.Keys) { BlatellaGroup _group = new BlatellaGroup(); foreach (int _idEntity in _groups[_idCentroid]) _group.entities.Add(_idEntity); _listGroups.Add(_group); } _answer.data.Add(_listGroups); } } } }//to group } catch (Exception _ex) { _answer.theresError = true; _answer.error = Utility.GetError(_ex, this.GetType()); } } return _answer; }