public Return<object> CreateVectorSpaceModel(List<InfoGrp_Class> infos, string taskName, Training training) { int _sizePartition = 0; return CreateVectorSpaceModel(infos, taskName, training, _sizePartition); }
public Return<object> CreateVectorSpaceModel(List<InfoGrp_Class> infos, string taskName, Training training, int sizePartition) { Return<object> _answer = new Return<object>(); if (infos == null) { _answer.theresError = true; _answer.error = Utility.GetError(new ArgumentNullException("infos"), this.GetType()); } else if (string.IsNullOrWhiteSpace(taskName)) { _answer.theresError = true; _answer.error = Utility.GetError(new ArgumentNullException("taskName"), this.GetType()); } else if (training == null) { _answer.theresError = true; _answer.error = Utility.GetError(new ArgumentNullException("training"), this.GetType()); } else if (sizePartition < 0) { _answer.theresError = true; _answer.error = Utility.GetError(new ArgumentException("can not be lower than zero", "sizePartition"), this.GetType()); } else { try { Return<List<string>> _answerCreateVocabulary = CreateVocabulary(infos); if (_answerCreateVocabulary.theresError) { _answer.theresError = true; _answer.error = _answerCreateVocabulary.error; } else { List<string> _vocabulary = _answerCreateVocabulary.data.OrderBy(p => p).ToList(); List<InfoGrp_Class> _usefulInfos = infos.Where(i => i.wordsFromVocabulary.Any()).ToList(); Return<Tuple<SqlTransaction, string, string, string>> _answerCreateTableForTaskTraining = this.grpClassDataAccess.CreateTableForTaskTraining(taskName, training.name); if (_answerCreateTableForTaskTraining.theresError) { _answer.theresError = true; _answer.error = _answerCreateTableForTaskTraining.error; } else { SqlTransaction _transaction = _answerCreateTableForTaskTraining.data.Item1; string _nameForDocumentTable = _answerCreateTableForTaskTraining.data.Item2 , _nameForWordTable = _answerCreateTableForTaskTraining.data.Item3 , _nameForVectorSpaceModelTable = _answerCreateTableForTaskTraining.data.Item4; int _nextDocumentId = 0, _nextWordId = 0; List<Document> _documents = null; List<Word> _words = null; Stopwatch _stopwatch = new Stopwatch(); _stopwatch.Start(); if (!_answer.theresError) { Return<int> _answerGetNextDocumentId = this.grpClassDataAccess.GetNextDocumentId(_transaction, _nameForDocumentTable); if (_answerGetNextDocumentId.theresError) { _answer.theresError = true; _answer.error = _answerGetNextDocumentId.error; } else _nextDocumentId = _answerGetNextDocumentId.data; } if (!_answer.theresError) { Return<int> _answerGetNextWordId = this.grpClassDataAccess.GetNextWordId(_transaction, _nameForWordTable); if (_answerGetNextWordId.theresError) { _answer.theresError = true; _answer.error = _answerGetNextWordId.error; } else _nextWordId = _answerGetNextWordId.data; } if (!_answer.theresError) {//let's create the documents _documents = _usefulInfos.Select(i => new Document() { id = _nextDocumentId++ , groupId = i.groupId , language = !i.language.HasValue ? string.Empty : Enum.GetName(typeof(EnumLanguage), i.language.Value) , information = i.information , wordsFromVocabulary = i.wordsFromVocabulary } ).ToList(); Return<int> _answerAddDocuments = this.grpClassDataAccess.AddDocuments(_documents, _transaction, _nameForDocumentTable); if (_answerAddDocuments.theresError) { _answer.theresError = true; _answer.error = _answerAddDocuments.error; } }//let's create the documents if (!_answer.theresError) {//let's create the words _words = _vocabulary.Select(_w => new Word() { id = _nextWordId++ , trainingId = training.id , word = _w } ) .ToList(); Return<int> _answerAddWords = this.grpClassDataAccess.AddWords(_words, _transaction, _nameForWordTable); if (_answerAddWords.theresError) { _answer.theresError = true; _answer.error = _answerAddWords.error; } }//let's create the words if (!_answer.theresError) {//let's create partitions into the documents in order to save memory IEnumerable<IGrouping<int, Document>> _groupedUsefulDocuments = _documents.GroupBy(i => i.groupId); List<List<Document>> _usefulDocumentsPartitioned = new List<List<Document>>(); if (sizePartition > 0) _usefulDocumentsPartitioned.AddRange(_groupedUsefulDocuments.SelectMany(g => g.ToList().PartitionAccordingSizeOfPartitions(sizePartition))); else _usefulDocumentsPartitioned.Add(_documents); foreach (List<Document> _documentsPartition in _usefulDocumentsPartitioned) {//partition to partition if (!_answer.theresError) { _answer = CreateVectorSpaceModel(_documentsPartition, training.id, _words, _transaction, _nameForVectorSpaceModelTable); } if (_answer.theresError) break; }//partition to partition }//let's create partitions into the documents in order to save memory _stopwatch.Stop(); if (_answer.theresError) _transaction.Rollback(); else _transaction.Commit(); } } } catch (Exception _ex) { _answer.theresError = true; _answer.error = Utility.GetError(_ex, this.GetType()); } } return _answer; }
public Return<List<List<BlatellaGroup>>> Group(string taskName, Training entity) { Return<List<List<BlatellaGroup>>> _answer = new Return<List<List<BlatellaGroup>>>() { data = new List<List<BlatellaGroup>>() }; if (entity == null) { _answer.theresError = true; _answer.error = Utility.GetError(new ArgumentNullException("entity"), this.GetType()); } else { try { List<Document> _documents = new List<Document>(); List<Word> _words = new List<Word>(); List<VectorSpaceModel> _vectorSpaceModels = new List<VectorSpaceModel>(); if (!_answer.theresError) { Return<List<Document>> _answerGetDocumentsForTraining = this.grpClassDataAccess.GetDocumentsForTraining(taskName, entity); if (_answerGetDocumentsForTraining.theresError) { _answer.theresError = true; _answer.error = _answerGetDocumentsForTraining.error; } else _documents = _answerGetDocumentsForTraining.data; } if (!_answer.theresError) { Return<List<Word>> _answerGetWordsForTraining = this.grpClassDataAccess.GetWordsForTraining(taskName, entity); if (_answerGetWordsForTraining.theresError) { _answer.theresError = true; _answer.error = _answerGetWordsForTraining.error; } else _words = _answerGetWordsForTraining.data; } if (!_answer.theresError) { Return<List<VectorSpaceModel>> _answerGetVectorSpaceModelsForTraining = this.grpClassDataAccess.GetVectorSpaceModelsForTraining(taskName, entity); if (_answerGetVectorSpaceModelsForTraining.theresError) { _answer.theresError = true; _answer.error = _answerGetVectorSpaceModelsForTraining.error; } else _vectorSpaceModels = _answerGetVectorSpaceModelsForTraining.data; } if (!_answer.theresError && _documents.Any() && _words.Any() && _vectorSpaceModels.Any()) { _documents.ForEach(e => e.vectorSpaceModel = _vectorSpaceModels.Where(v => v.documentId == e.id).ToList()); _words.ForEach(e => e.vectorSpaceModel = _vectorSpaceModels.Where(v => v.documentId == e.id).ToList()); Return<SortedList<int, SortedList<int, ICharacteristic>>> _answerGetBinaryCharacteristics = GetBinaryCharacteristics(_documents, _words); if (_answerGetBinaryCharacteristics.theresError) { _answer.theresError = true; _answer.error = _answerGetBinaryCharacteristics.error; } else { Return<List<List<BlatellaGroup>>> _answerGrouping = this.groupingUtility.Grouping(_answerGetBinaryCharacteristics.data); if (_answerGrouping.theresError) { _answer.theresError = true; _answer.error = _answerGrouping.error; } else { foreach (List<BlatellaGroup> _clustering in _answerGrouping.data) {//clustering to clustering List<BlatellaGroup> _newDocumentClustering = new List<BlatellaGroup>(); foreach (BlatellaGroup _cluster in _clustering) {//cluster to cluster Return<object> _answerChangeIdsByDocuments = ChangeIdsByDocuments(_cluster, _documents); if (_answerChangeIdsByDocuments.theresError) { _answer.theresError = true; _answer.error = _answerChangeIdsByDocuments.error; } if (_answer.theresError) break; }//cluster to cluster if (_answer.theresError) break; }//clustering to clustering if (!_answer.theresError) _answer.data = _answerGrouping.data; } } } } catch (Exception _ex) { _answer.theresError = true; _answer.error = Utility.GetError(_ex, this.GetType()); } } return _answer; }
public Return<int> UpdateTraining(Training entity) { Return<int> _answer = new Return<int>() { data = 0 }; if (entity == null) { _answer.theresError = true; _answer.error = Utility.GetError(new ArgumentNullException("entity"), this.GetType()); } else { try { entity.lastModificationDate = DateTime.Now; _answer = this.grpClassDataAccess.UpdateTraining(entity); } catch (Exception _ex) { _answer.theresError = true; _answer.error = Utility.GetError(_ex, this.GetType()); } } return _answer; }
public Return<List<VectorSpaceModel>> GetVectorSpaceModelsForTraining(string taskName, Training entity) { Return<List<VectorSpaceModel>> _answer = new Return<List<VectorSpaceModel>>() { data = new List<VectorSpaceModel>() }; Return<SqlConnection> _answerConnection = this.connection; if (_answerConnection.theresError) { _answer.theresError = true; _answer.error = _answerConnection.error; } else { try { string _nameForVectorSpaceModelTable = GenerateDynamicNameForVectorSpaceModelTable(taskName, entity.name); using (SqlConnection _connection = _answerConnection.data) { if (_connection.State != ConnectionState.Open) _connection.Open(); using (SqlCommand _command = new SqlCommand() { CommandType = CommandType.Text, Connection = _connection, CommandTimeout = timeoutInSecondsBD }) {//using SqlCommand string _fields = string.Format("{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11},{12},{13}" , SQLGrp_Class.FLD_VSM_DOCUMENTID, SQLGrp_Class.FLD_VSM_WORDID, SQLGrp_Class.FLD_VSM_WEIGHT_BINARY, SQLGrp_Class.FLD_VSM_WEIGHT_TERMFREQUENCY , SQLGrp_Class.FLD_VSM_WEIGHT_WEIGHTEDTERMFREQUENCY, SQLGrp_Class.FLD_VSM_WEIGHT_AUGMENTEDNORMALIZEDTERMFREQUENCY, SQLGrp_Class.FLD_VSM_WEIGHT_LOGARITHMIC , SQLGrp_Class.FLD_VSM_WEIGHT_INVERSEDOCUMENTFREQUENCY, SQLGrp_Class.FLD_VSM_WEIGHT_TF_IDF, SQLGrp_Class.FLD_VSM_WEIGHT_PROBABILISTICINVERSEFREQUENCY , SQLGrp_Class.FLD_VSM_WEIGHT_NORMALFUNCTION, SQLGrp_Class.FLD_VSM_WEIGHT_GF_IDF, SQLGrp_Class.FLD_VSM_WEIGHT_ENTROPY, SQLGrp_Class.FLD_VSM_WEIGHT_WEIGHTEDINVERSEFREQUENCY); _command.CommandText = string.Format("select {0} from {1}", _fields, _nameForVectorSpaceModelTable); using (SqlDataReader _dataReader = _command.ExecuteReader()) { using (DataTable _table = new DataTable()) { _table.Load(_dataReader); _answer = DataRowCollectionToVectorSpaceModel(_table.Rows); } } }//using SqlCommand } } catch (Exception _ex) { _answer.theresError = true; _answer.error = Utility.GetError(_ex, this.GetType()); } } return _answer; }
public Return<List<Word>> GetWordsForTraining(string taskName, Training entity) { Return<List<Word>> _answer = new Return<List<Word>>() { data = new List<Word>() }; Return<SqlConnection> _answerConnection = this.connection; if (_answerConnection.theresError) { _answer.theresError = true; _answer.error = _answerConnection.error; } else { try { string _nameForWordTable = GenerateDynamicNameForWordTable(taskName, entity.name); using (SqlConnection _connection = _answerConnection.data) { if (_connection.State != ConnectionState.Open) _connection.Open(); using (SqlCommand _command = new SqlCommand() { CommandType = CommandType.Text, Connection = _connection, CommandTimeout = timeoutInSecondsBD }) {//using SqlCommand string _fields = string.Format("{0},{1},{2}" , SQLGrp_Class.FLD_WORD_ID, SQLGrp_Class.FLD_WORD_TRAININGID, SQLGrp_Class.FLD_WORD_WORD); _command.CommandText = string.Format("select {0} from {1}", _fields, _nameForWordTable); using (SqlDataReader _dataReader = _command.ExecuteReader()) { using (DataTable _table = new DataTable()) { _table.Load(_dataReader); _answer = DataRowCollectionToWord(_table.Rows); } } }//using SqlCommand } } catch (Exception _ex) { _answer.theresError = true; _answer.error = Utility.GetError(_ex, this.GetType()); } } return _answer; }
public Return<int> UpdateTraining(Training entity) { Return<int> _answer = new Return<int>() { data = 0 }; if (entity == null) { _answer.theresError = true; _answer.error = Utility.GetError(new ArgumentNullException("entity"), this.GetType()); } else { Return<SqlConnection> _answerConnection = this.connection; if (_answerConnection.theresError) { _answer.theresError = true; _answer.error = _answerConnection.error; } else { try { using (SqlConnection _connection = _answerConnection.data) { if (_connection.State != ConnectionState.Open) _connection.Open(); using (SqlCommand _command = new SqlCommand() { CommandType = CommandType.Text, Connection = _connection, CommandTimeout = timeoutInSecondsBD }) {//using SqlCommand string _fields = string.Format("{0},{1},{2},{3},{4},{5},{6},{7}" , SQLGrp_Class.FLD_TRAI_ID, SQLGrp_Class.FLD_TRAI_TASKID, SQLGrp_Class.FLD_TRAI_NAME, SQLGrp_Class.FLD_TRAI_GCTYPE, SQLGrp_Class.FLD_TRAI_GCDESCRIPTION , SQLGrp_Class.FLD_TRAI_STATETRAINING, SQLGrp_Class.FLD_TRAI_CREATIONDATE, SQLGrp_Class.FLD_TRAI_LASTMODIFICATIONDATE); _command.CommandText = string.Format("update {0} set {2}=@name,{3}=@type,{4}=@description,{5}=@stateTraining,{6}=@creationDate,{7}=@lastModificationDate" + " where {1}=@id" , SQLGrp_Class.TAB_TRAINING, SQLGrp_Class.FLD_TRAI_ID, SQLGrp_Class.FLD_TRAI_NAME, SQLGrp_Class.FLD_TRAI_GCTYPE , SQLGrp_Class.FLD_TRAI_GCDESCRIPTION, SQLGrp_Class.FLD_TRAI_STATETRAINING, SQLGrp_Class.FLD_TRAI_CREATIONDATE, SQLGrp_Class.FLD_TRAI_LASTMODIFICATIONDATE ); _command.Parameters.AddWithValue("@id", entity.id); _command.Parameters.AddWithValue("@name", entity.name); _command.Parameters.AddWithValue("@type", entity.type); if (entity.description != null) _command.Parameters.AddWithValue("@description", entity.description); else _command.Parameters.AddWithValue("@description", DBNull.Value); if (entity.stateTraining != null) _command.Parameters.AddWithValue("@stateTraining", entity.stateTraining); else _command.Parameters.AddWithValue("@stateTraining", DBNull.Value); _command.Parameters.AddWithValue("@creationDate", entity.creationDate); _command.Parameters.AddWithValue("@lastModificationDate", entity.lastModificationDate); _answer.data = _command.ExecuteNonQuery(); }//using SqlCommand } } catch (Exception _ex) { _answer.theresError = true; _answer.error = Utility.GetError(_ex, this.GetType()); } } } return _answer; }