コード例 #1
0
		public Return<object> CreateVectorSpaceModel(List<InfoGrp_Class> infos, string taskName, Training training)
		{
			int _sizePartition = 0;
			return CreateVectorSpaceModel(infos, taskName, training, _sizePartition);
		}
コード例 #2
0
		public Return<object> CreateVectorSpaceModel(List<InfoGrp_Class> infos, string taskName, Training training, int sizePartition)
		{
			Return<object> _answer = new Return<object>();
			if (infos == null)
			{
				_answer.theresError = true;
				_answer.error = Utility.GetError(new ArgumentNullException("infos"), this.GetType());
			}
			else
				if (string.IsNullOrWhiteSpace(taskName))
				{
					_answer.theresError = true;
					_answer.error = Utility.GetError(new ArgumentNullException("taskName"), this.GetType());
				}
				else
					if (training == null)
					{
						_answer.theresError = true;
						_answer.error = Utility.GetError(new ArgumentNullException("training"), this.GetType());
					}
					else
						if (sizePartition < 0)
						{
							_answer.theresError = true;
							_answer.error = Utility.GetError(new ArgumentException("can not be lower than zero", "sizePartition"), this.GetType());
						}
						else
						{
							try
							{
								Return<List<string>> _answerCreateVocabulary = CreateVocabulary(infos);
								if (_answerCreateVocabulary.theresError)
								{
									_answer.theresError = true;
									_answer.error = _answerCreateVocabulary.error;
								}
								else
								{
									List<string> _vocabulary = _answerCreateVocabulary.data.OrderBy(p => p).ToList();
									List<InfoGrp_Class> _usefulInfos = infos.Where(i => i.wordsFromVocabulary.Any()).ToList();


									Return<Tuple<SqlTransaction, string, string, string>> _answerCreateTableForTaskTraining = this.grpClassDataAccess.CreateTableForTaskTraining(taskName, training.name);
									if (_answerCreateTableForTaskTraining.theresError)
									{
										_answer.theresError = true;
										_answer.error = _answerCreateTableForTaskTraining.error;
									}
									else
									{
										SqlTransaction _transaction = _answerCreateTableForTaskTraining.data.Item1;
										string _nameForDocumentTable = _answerCreateTableForTaskTraining.data.Item2
												, _nameForWordTable = _answerCreateTableForTaskTraining.data.Item3
												, _nameForVectorSpaceModelTable = _answerCreateTableForTaskTraining.data.Item4;
										int _nextDocumentId = 0, _nextWordId = 0;
										List<Document> _documents = null;
										List<Word> _words = null;

										Stopwatch _stopwatch = new Stopwatch();
										_stopwatch.Start();

										if (!_answer.theresError)
										{
											Return<int> _answerGetNextDocumentId = this.grpClassDataAccess.GetNextDocumentId(_transaction, _nameForDocumentTable);
											if (_answerGetNextDocumentId.theresError)
											{
												_answer.theresError = true;
												_answer.error = _answerGetNextDocumentId.error;
											}
											else
												_nextDocumentId = _answerGetNextDocumentId.data;
										}

										if (!_answer.theresError)
										{
											Return<int> _answerGetNextWordId = this.grpClassDataAccess.GetNextWordId(_transaction, _nameForWordTable);
											if (_answerGetNextWordId.theresError)
											{
												_answer.theresError = true;
												_answer.error = _answerGetNextWordId.error;
											}
											else
												_nextWordId = _answerGetNextWordId.data;
										}

										if (!_answer.theresError)
										{//let's create the documents
											_documents = _usefulInfos.Select(i => new Document()
																				{
																					id = _nextDocumentId++
																					,
																					groupId = i.groupId
																					,
																					language = !i.language.HasValue ? string.Empty : Enum.GetName(typeof(EnumLanguage), i.language.Value)
																					,
																					information = i.information
																					,
																					wordsFromVocabulary = i.wordsFromVocabulary
																				}
																		).ToList();

											Return<int> _answerAddDocuments = this.grpClassDataAccess.AddDocuments(_documents, _transaction, _nameForDocumentTable);
											if (_answerAddDocuments.theresError)
											{
												_answer.theresError = true;
												_answer.error = _answerAddDocuments.error;
											}
										}//let's create the documents

										if (!_answer.theresError)
										{//let's create the words
											_words = _vocabulary.Select(_w => new Word()
																				{
																					id = _nextWordId++
																					,
																					trainingId = training.id
																					,
																					word = _w
																				}
																				)
																		.ToList();

											Return<int> _answerAddWords = this.grpClassDataAccess.AddWords(_words, _transaction, _nameForWordTable);
											if (_answerAddWords.theresError)
											{
												_answer.theresError = true;
												_answer.error = _answerAddWords.error;
											}
										}//let's create the words

										if (!_answer.theresError)
										{//let's create partitions into the documents in order to save memory
											IEnumerable<IGrouping<int, Document>> _groupedUsefulDocuments = _documents.GroupBy(i => i.groupId);
											List<List<Document>> _usefulDocumentsPartitioned = new List<List<Document>>();
											if (sizePartition > 0)
												_usefulDocumentsPartitioned.AddRange(_groupedUsefulDocuments.SelectMany(g => g.ToList().PartitionAccordingSizeOfPartitions(sizePartition)));
											else
												_usefulDocumentsPartitioned.Add(_documents);

											foreach (List<Document> _documentsPartition in _usefulDocumentsPartitioned)
											{//partition to partition
												if (!_answer.theresError)
												{
													_answer = CreateVectorSpaceModel(_documentsPartition, training.id, _words, _transaction, _nameForVectorSpaceModelTable);
												}

												if (_answer.theresError)
													break;
											}//partition to partition
										}//let's create partitions into the documents in order to save memory
										_stopwatch.Stop();

										if (_answer.theresError)
											_transaction.Rollback();
										else
											_transaction.Commit();
									}
								}
							}
							catch (Exception _ex)
							{
								_answer.theresError = true;
								_answer.error = Utility.GetError(_ex, this.GetType());
							}
						}
			return _answer;
		}
コード例 #3
0
		public Return<List<List<BlatellaGroup>>> Group(string taskName, Training entity)
		{
			Return<List<List<BlatellaGroup>>> _answer = new Return<List<List<BlatellaGroup>>>() { data = new List<List<BlatellaGroup>>() };
			if (entity == null)
			{
				_answer.theresError = true;
				_answer.error = Utility.GetError(new ArgumentNullException("entity"), this.GetType());
			}
			else
			{
				try
				{
					List<Document> _documents = new List<Document>();
					List<Word> _words = new List<Word>();
					List<VectorSpaceModel> _vectorSpaceModels = new List<VectorSpaceModel>();

					if (!_answer.theresError)
					{
						Return<List<Document>> _answerGetDocumentsForTraining = this.grpClassDataAccess.GetDocumentsForTraining(taskName, entity);
						if (_answerGetDocumentsForTraining.theresError)
						{
							_answer.theresError = true;
							_answer.error = _answerGetDocumentsForTraining.error;
						}
						else
							_documents = _answerGetDocumentsForTraining.data;
					}

					if (!_answer.theresError)
					{
						Return<List<Word>> _answerGetWordsForTraining = this.grpClassDataAccess.GetWordsForTraining(taskName, entity);
						if (_answerGetWordsForTraining.theresError)
						{
							_answer.theresError = true;
							_answer.error = _answerGetWordsForTraining.error;
						}
						else
							_words = _answerGetWordsForTraining.data;
					}

					if (!_answer.theresError)
					{
						Return<List<VectorSpaceModel>> _answerGetVectorSpaceModelsForTraining = this.grpClassDataAccess.GetVectorSpaceModelsForTraining(taskName, entity);
						if (_answerGetVectorSpaceModelsForTraining.theresError)
						{
							_answer.theresError = true;
							_answer.error = _answerGetVectorSpaceModelsForTraining.error;
						}
						else
							_vectorSpaceModels = _answerGetVectorSpaceModelsForTraining.data;
					}

					if (!_answer.theresError && _documents.Any() && _words.Any() && _vectorSpaceModels.Any())
					{
						_documents.ForEach(e => e.vectorSpaceModel = _vectorSpaceModels.Where(v => v.documentId == e.id).ToList());
						_words.ForEach(e => e.vectorSpaceModel = _vectorSpaceModels.Where(v => v.documentId == e.id).ToList());
						Return<SortedList<int, SortedList<int, ICharacteristic>>> _answerGetBinaryCharacteristics = GetBinaryCharacteristics(_documents, _words);
						if (_answerGetBinaryCharacteristics.theresError)
						{
							_answer.theresError = true;
							_answer.error = _answerGetBinaryCharacteristics.error;
						}
						else
						{
							Return<List<List<BlatellaGroup>>> _answerGrouping = this.groupingUtility.Grouping(_answerGetBinaryCharacteristics.data);
							if (_answerGrouping.theresError)
							{
								_answer.theresError = true;
								_answer.error = _answerGrouping.error;
							}
							else
							{
								foreach (List<BlatellaGroup> _clustering in _answerGrouping.data)
								{//clustering to clustering
									List<BlatellaGroup> _newDocumentClustering = new List<BlatellaGroup>();
									foreach (BlatellaGroup _cluster in _clustering)
									{//cluster to cluster
										Return<object> _answerChangeIdsByDocuments = ChangeIdsByDocuments(_cluster, _documents);
										if (_answerChangeIdsByDocuments.theresError)
										{
											_answer.theresError = true;
											_answer.error = _answerChangeIdsByDocuments.error;
										}

										if (_answer.theresError)
											break;
									}//cluster to cluster
									if (_answer.theresError)
										break;
								}//clustering to clustering

								if (!_answer.theresError)
									_answer.data = _answerGrouping.data;
							}
						}
					}
				}
				catch (Exception _ex)
				{
					_answer.theresError = true;
					_answer.error = Utility.GetError(_ex, this.GetType());
				}
			}
			return _answer;
		}
コード例 #4
0
		public Return<int> UpdateTraining(Training entity)
		{
			Return<int> _answer = new Return<int>() { data = 0 };
			if (entity == null)
			{
				_answer.theresError = true;
				_answer.error = Utility.GetError(new ArgumentNullException("entity"), this.GetType());
			}
			else
			{
				try
				{
					entity.lastModificationDate = DateTime.Now;
					_answer = this.grpClassDataAccess.UpdateTraining(entity);
				}
				catch (Exception _ex)
				{
					_answer.theresError = true;
					_answer.error = Utility.GetError(_ex, this.GetType());
				}
			}
			return _answer;
		}
コード例 #5
0
		public Return<List<VectorSpaceModel>> GetVectorSpaceModelsForTraining(string taskName, Training entity)
		{
			Return<List<VectorSpaceModel>> _answer = new Return<List<VectorSpaceModel>>() { data = new List<VectorSpaceModel>() };
			Return<SqlConnection> _answerConnection = this.connection;
			if (_answerConnection.theresError)
			{
				_answer.theresError = true;
				_answer.error = _answerConnection.error;
			}
			else
			{
				try
				{
					string _nameForVectorSpaceModelTable = GenerateDynamicNameForVectorSpaceModelTable(taskName, entity.name);
					using (SqlConnection _connection = _answerConnection.data)
					{
						if (_connection.State != ConnectionState.Open)
							_connection.Open();
						using (SqlCommand _command = new SqlCommand() { CommandType = CommandType.Text, Connection = _connection, CommandTimeout = timeoutInSecondsBD })
						{//using SqlCommand
							string _fields = string.Format("{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11},{12},{13}"
														, SQLGrp_Class.FLD_VSM_DOCUMENTID, SQLGrp_Class.FLD_VSM_WORDID, SQLGrp_Class.FLD_VSM_WEIGHT_BINARY, SQLGrp_Class.FLD_VSM_WEIGHT_TERMFREQUENCY
														, SQLGrp_Class.FLD_VSM_WEIGHT_WEIGHTEDTERMFREQUENCY, SQLGrp_Class.FLD_VSM_WEIGHT_AUGMENTEDNORMALIZEDTERMFREQUENCY, SQLGrp_Class.FLD_VSM_WEIGHT_LOGARITHMIC
														, SQLGrp_Class.FLD_VSM_WEIGHT_INVERSEDOCUMENTFREQUENCY, SQLGrp_Class.FLD_VSM_WEIGHT_TF_IDF, SQLGrp_Class.FLD_VSM_WEIGHT_PROBABILISTICINVERSEFREQUENCY
														, SQLGrp_Class.FLD_VSM_WEIGHT_NORMALFUNCTION, SQLGrp_Class.FLD_VSM_WEIGHT_GF_IDF, SQLGrp_Class.FLD_VSM_WEIGHT_ENTROPY, SQLGrp_Class.FLD_VSM_WEIGHT_WEIGHTEDINVERSEFREQUENCY);

							_command.CommandText = string.Format("select {0} from {1}", _fields, _nameForVectorSpaceModelTable);
							using (SqlDataReader _dataReader = _command.ExecuteReader())
							{
								using (DataTable _table = new DataTable())
								{
									_table.Load(_dataReader);
									_answer = DataRowCollectionToVectorSpaceModel(_table.Rows);
								}
							}
						}//using SqlCommand
					}
				}
				catch (Exception _ex)
				{
					_answer.theresError = true;
					_answer.error = Utility.GetError(_ex, this.GetType());
				}
			}
			return _answer;
		}
コード例 #6
0
		public Return<List<Word>> GetWordsForTraining(string taskName, Training entity)
		{
			Return<List<Word>> _answer = new Return<List<Word>>() { data = new List<Word>() };
			Return<SqlConnection> _answerConnection = this.connection;
			if (_answerConnection.theresError)
			{
				_answer.theresError = true;
				_answer.error = _answerConnection.error;
			}
			else
			{
				try
				{
					string _nameForWordTable = GenerateDynamicNameForWordTable(taskName, entity.name);
					using (SqlConnection _connection = _answerConnection.data)
					{
						if (_connection.State != ConnectionState.Open)
							_connection.Open();
						using (SqlCommand _command = new SqlCommand() { CommandType = CommandType.Text, Connection = _connection, CommandTimeout = timeoutInSecondsBD })
						{//using SqlCommand
							string _fields = string.Format("{0},{1},{2}"
														, SQLGrp_Class.FLD_WORD_ID, SQLGrp_Class.FLD_WORD_TRAININGID, SQLGrp_Class.FLD_WORD_WORD);

							_command.CommandText = string.Format("select {0} from {1}", _fields, _nameForWordTable);
							using (SqlDataReader _dataReader = _command.ExecuteReader())
							{
								using (DataTable _table = new DataTable())
								{
									_table.Load(_dataReader);
									_answer = DataRowCollectionToWord(_table.Rows);
								}
							}
						}//using SqlCommand
					}
				}
				catch (Exception _ex)
				{
					_answer.theresError = true;
					_answer.error = Utility.GetError(_ex, this.GetType());
				}
			}
			return _answer;
		}
コード例 #7
0
		public Return<int> UpdateTraining(Training entity)
		{
			Return<int> _answer = new Return<int>() { data = 0 };
			if (entity == null)
			{
				_answer.theresError = true;
				_answer.error = Utility.GetError(new ArgumentNullException("entity"), this.GetType());
			}
			else
			{
				Return<SqlConnection> _answerConnection = this.connection;
				if (_answerConnection.theresError)
				{
					_answer.theresError = true;
					_answer.error = _answerConnection.error;
				}
				else
				{
					try
					{
						using (SqlConnection _connection = _answerConnection.data)
						{
							if (_connection.State != ConnectionState.Open)
								_connection.Open();
							using (SqlCommand _command = new SqlCommand() { CommandType = CommandType.Text, Connection = _connection, CommandTimeout = timeoutInSecondsBD })
							{//using SqlCommand

								string _fields = string.Format("{0},{1},{2},{3},{4},{5},{6},{7}"
																, SQLGrp_Class.FLD_TRAI_ID, SQLGrp_Class.FLD_TRAI_TASKID, SQLGrp_Class.FLD_TRAI_NAME, SQLGrp_Class.FLD_TRAI_GCTYPE, SQLGrp_Class.FLD_TRAI_GCDESCRIPTION
																, SQLGrp_Class.FLD_TRAI_STATETRAINING, SQLGrp_Class.FLD_TRAI_CREATIONDATE, SQLGrp_Class.FLD_TRAI_LASTMODIFICATIONDATE);

								_command.CommandText = string.Format("update {0} set {2}=@name,{3}=@type,{4}=@description,{5}=@stateTraining,{6}=@creationDate,{7}=@lastModificationDate"
																+ " where {1}=@id"
																, SQLGrp_Class.TAB_TRAINING, SQLGrp_Class.FLD_TRAI_ID, SQLGrp_Class.FLD_TRAI_NAME, SQLGrp_Class.FLD_TRAI_GCTYPE
																, SQLGrp_Class.FLD_TRAI_GCDESCRIPTION, SQLGrp_Class.FLD_TRAI_STATETRAINING, SQLGrp_Class.FLD_TRAI_CREATIONDATE, SQLGrp_Class.FLD_TRAI_LASTMODIFICATIONDATE
																);

								_command.Parameters.AddWithValue("@id", entity.id);
								_command.Parameters.AddWithValue("@name", entity.name);
								_command.Parameters.AddWithValue("@type", entity.type);

								if (entity.description != null)
									_command.Parameters.AddWithValue("@description", entity.description);
								else
									_command.Parameters.AddWithValue("@description", DBNull.Value);

								if (entity.stateTraining != null)
									_command.Parameters.AddWithValue("@stateTraining", entity.stateTraining);
								else
									_command.Parameters.AddWithValue("@stateTraining", DBNull.Value);

								_command.Parameters.AddWithValue("@creationDate", entity.creationDate);
								_command.Parameters.AddWithValue("@lastModificationDate", entity.lastModificationDate);
								_answer.data = _command.ExecuteNonQuery();
							}//using SqlCommand
						}
					}
					catch (Exception _ex)
					{
						_answer.theresError = true;
						_answer.error = Utility.GetError(_ex, this.GetType());
					}
				}
			}
			return _answer;
		}