public override void ModelInit_FromConfig() { if (!ParameterSetting.ISSEED) { DNN_Query = new DNN(ParameterSetting.FEATURE_DIMENSION_QUERY, ParameterSetting.SOURCE_LAYER_DIM, ParameterSetting.SOURCE_ACTIVATION, ParameterSetting.SOURCE_LAYERWEIGHT_SIGMA, ParameterSetting.SOURCE_ARCH, ParameterSetting.SOURCE_ARCH_WIND, false); if (ParameterSetting.IS_SHAREMODEL) { DNN_Doc = DNN_Query; } else { DNN_Doc = new DNN(ParameterSetting.FEATURE_DIMENSION_DOC, ParameterSetting.TARGET_LAYER_DIM, ParameterSetting.TARGET_ACTIVATION, ParameterSetting.TARGET_LAYERWEIGHT_SIGMA, ParameterSetting.TARGET_ARCH, ParameterSetting.TARGET_ARCH_WIND, false); } DNN_Query.Init(); if (!ParameterSetting.IS_SHAREMODEL) { if (ParameterSetting.MIRROR_INIT) { DNN_Doc.Init(DNN_Query); } else { DNN_Doc.Init(); } } ParameterSetting.FEATURE_DIMENSION_QUERY = DNN_Query.neurallayers[0].Number; ParameterSetting.FEATURE_DIMENSION_DOC = DNN_Doc.neurallayers[0].Number; } else { LoadModel(ParameterSetting.SEEDMODEL1, ref DNN_Query, ParameterSetting.SEEDMODEL2, ref DNN_Doc, true); } Program.Print("Source Neural Network Structure " + DNN_Query.DNN_Descr()); Program.Print("Target Neural Network Structure " + DNN_Doc.DNN_Descr()); Program.Print("Feature Num Query " + ParameterSetting.FEATURE_DIMENSION_QUERY.ToString()); Program.Print("Feature Num Doc " + ParameterSetting.FEATURE_DIMENSION_DOC.ToString()); Program.Print("Sharing Model " + ParameterSetting.IS_SHAREMODEL.ToString()); Program.Print("Mirror Init Model " + ParameterSetting.MIRROR_INIT.ToString()); Program.Print("Math Lib " + ParameterSetting.MATH_LIB.ToString()); if (ParameterSetting.MATH_LIB == MathLibType.cpu) { Program.Print("CPU Math thread num " + ParameterSetting.BasicMathLibThreadNum.ToString()); } }
public DSSM_Train(DNN dnn_query, DNN dnn_doc) { // initialize two runs for the models. So theoretically it supports two runs (feedforward and backward prop) on the same model instance. DNN_Query = dnn_query; DNN_Doc = dnn_doc; Init(DNN_Query, DNN_Doc); }
void LoadModel(string queryModelFile, ref DNN queryModel, string docModelFile, ref DNN docModel, bool allocateStructureFromEmpty) { if (allocateStructureFromEmpty) { queryModel = new DNN(queryModelFile); if (ParameterSetting.IS_SHAREMODEL) { docModel = queryModel; } else { docModel = new DNN(docModelFile); } } else { queryModel.Model_Load(queryModelFile, false); if (ParameterSetting.IS_SHAREMODEL) { docModel = queryModel; } else { docModel.Model_Load(docModelFile, false); } } ParameterSetting.FEATURE_DIMENSION_QUERY = queryModel.neurallayers[0].Number; ParameterSetting.FEATURE_DIMENSION_DOC = docModel.neurallayers[0].Number; }
public void Init(DNN model) { for (int i = 0; i < neurallinks.Count; i++) { neurallinks[i].Init(model.neurallinks[i]); } }
public DNN_BatchTrain_CG_HS(DNN dnn) : base(dnn) { parameters = new CudaPieceFloat(Num, true, true); direction = new CudaPieceFloat(Num, true, true); for (int i = 0; i < GradHistory; i++) { grad_list.Add(new CudaPieceFloat(Num, true, true)); } }
public DNNRun(DNN model) { DnnModel = model; for (int i = 0; i < DnnModel.neurallayers.Count; ++i) { neurallayers.Add(new NeuralLayerData(DnnModel.neurallayers[i], i != 0)); } for (int i = 0; i < DnnModel.neurallinks.Count; ++i) { neurallinks.Add(new NeuralLinkData(DnnModel.neurallinks[i])); } }
/// <summary> /// Before call this stuff, you must call CopyOutFromCuda() /// The returns is only used for backup purpose. So its does not allocate any GPU memory. /// </summary> /// <returns></returns> public DNN CreateBackupClone() { DNN backupClone = new DNN( this.neurallayers[0].Number, this.neurallinks.Select(o => o.Neural_Out.Number).ToArray(), this.neurallinks.Select(o => (int)o.Af).ToArray(), this.neurallinks.Select(o => o.initWeightSigma).ToArray(), this.neurallinks.Select(o => (int)o.Nt).ToArray(), this.neurallinks.Select(o => o.N_Winsize).ToArray(), true); backupClone.Init(this); return(backupClone); }
void Init(DNN dnn_query, DNN dnn_doc) { dnn_model_query = new DNNRun(dnn_query); dnn_model_doc = new DNNRun(dnn_doc); Pos_QD_Pair_TOP = new Layer_Output_Deriv_QD_PairTOP(dnn_query, dnn_doc); Neg_QD_Pair_TOP = new Layer_Output_Deriv_QD_PairTOP_Full(dnn_query, dnn_doc, ParameterSetting.NTRIAL); alphaCudaPiece = new CudaPieceFloat(ParameterSetting.BATCH_SIZE * (ParameterSetting.NTRIAL + 1), true, true); distCudaPiece = new CudaPieceFloat(ParameterSetting.BATCH_SIZE * (ParameterSetting.NTRIAL + 1), true, true); GPU_negative_index_Array = new CudaPieceInt(ParameterSetting.NTRIAL * ParameterSetting.BATCH_SIZE, true, true); GPU_Inver_negative_index_Array = new CudaPieceInt(ParameterSetting.NTRIAL * ParameterSetting.BATCH_SIZE, true, true); GPU_Inver_negative_value_Array = new CudaPieceInt(ParameterSetting.NTRIAL * ParameterSetting.BATCH_SIZE, true, true); if (ParameterSetting.PSEUDO_RANDOM) { neg_random = new Random(ParameterSetting.RANDOM_SEED); } }
public override void Update(DNN dnn) { int ParNum = 0; for (int i = 0; i < dnn.neurallinks.Count; i++) { int mnum = dnn.neurallinks[i].Neural_In.Number * dnn.neurallinks[i].N_Winsize * dnn.neurallinks[i].Neural_Out.Number; MathOperatorManager.GlobalInstance.Zero(dnn.neurallinks[i].weight, mnum); MathOperatorManager.GlobalInstance.Matrix_Add_OFFSET(dnn.neurallinks[i].weight, 0, parameters, ParNum, mnum, 1.0f); ParNum += mnum; if (ParameterSetting.UpdateBias) { mnum = dnn.neurallinks[i].Neural_Out.Number; MathOperatorManager.GlobalInstance.Zero(dnn.neurallinks[i].bias, mnum); MathOperatorManager.GlobalInstance.Matrix_Add_OFFSET(dnn.neurallinks[i].bias, 0, parameters, ParNum, mnum, 1.0f); ParNum += mnum; } } GradIdx += 1; }
/// <summary> /// Init Model Parameters. /// </summary> /// <param name="dnnData"></param> public override void Init(DNN dnn) { MathOperatorManager.GlobalInstance.Zero(parameters, Num); int ParNum = 0; for (int i = 0; i < dnn.neurallinks.Count; i++) { dnn.neurallinks[i].weight.CopyOutFromCuda(); int mnum = dnn.neurallinks[i].Neural_In.Number * dnn.neurallinks[i].N_Winsize * dnn.neurallinks[i].Neural_Out.Number; MathOperatorManager.GlobalInstance.Matrix_Add_OFFSET(parameters, ParNum, dnn.neurallinks[i].weight, 0, mnum, 1.0f); ParNum += mnum; if (ParameterSetting.UpdateBias) { mnum = dnn.neurallinks[i].Neural_Out.Number; MathOperatorManager.GlobalInstance.Matrix_Add_OFFSET(parameters, ParNum, dnn.neurallinks[i].bias, 0, mnum, 1.0f); ParNum += mnum; } } parameters.CopyOutFromCuda(); GradIdx = 0; }
public virtual void Update(DNN dnn) { }
public virtual void Init(DNN dnn) { }
public DNN_BatchTrain(DNN dnn) { Num = dnn.ModelParameterNumber; }
public override void Training() { Init(DNN_Query, DNN_Doc); DNN dnn_query_backup = null, dnn_doc_backup = null; Program.Print("Starting DNN Learning!"); float trainingLoss = 0; float previous_devEval = 0; float VALIDATION_Eval = 0; //// determin the last stopped iteration int lastRunStopIter = -1; for (int iter = 0; iter <= ParameterSetting.MAX_ITER; ++iter) { if (!File.Exists(ParameterSetting.MODEL_PATH + "_QUERY_ITER" + iter.ToString())) { break; } lastRunStopIter = iter; } if (lastRunStopIter == -1) { Program.Print("Initialization (Iter 0)"); Program.Print("Saving models ..."); DNN_Query.CopyOutFromCuda(); Tuple <string, string> dssmModelPaths = ComposeDSSMModelPaths(0); DNN_Query.Model_Save(dssmModelPaths.Item1); if (!ParameterSetting.IS_SHAREMODEL) { DNN_Doc.CopyOutFromCuda(); DNN_Doc.Model_Save(dssmModelPaths.Item2); } if (ParameterSetting.ISVALIDATE) { Program.Print("Start validation process ..."); if (!ParameterSetting.VALIDATE_MODEL_ONLY) { VALIDATION_Eval = Evaluate(); } else { VALIDATION_Eval = EvaluateModelOnly(dssmModelPaths.Item1, dssmModelPaths.Item2); } Program.Print("Dataset VALIDATION :\n/*******************************/ \n" + VALIDATION_Eval.ToString() + " \n/*******************************/ \n"); } File.WriteAllText(ParameterSetting.MODEL_PATH + "_LEARNING_RATE_ITER" + 0.ToString(), LearningParameters.lr_mid.ToString()); lastRunStopIter = 0; } else { if (ParameterSetting.ISVALIDATE) { //// go through all previous saved runs and print validation for (int iter = 0; iter <= lastRunStopIter; ++iter) { Program.Print("Loading from previously trained Iter " + iter.ToString()); Tuple <string, string> dssmModelPaths = ComposeDSSMModelPaths(iter); LoadModel(dssmModelPaths.Item1, ref DNN_Query, dssmModelPaths.Item2, ref DNN_Doc, false); Program.Print("Start validation process ..."); if (!ParameterSetting.VALIDATE_MODEL_ONLY) { VALIDATION_Eval = Evaluate(); } else { VALIDATION_Eval = EvaluateModelOnly(dssmModelPaths.Item1, dssmModelPaths.Item2); } Program.Print("Dataset VALIDATION :\n/*******************************/ \n" + VALIDATION_Eval.ToString() + " \n/*******************************/ \n"); if (File.Exists(ParameterSetting.MODEL_PATH + "_LEARNING_RATE" + iter.ToString())) { LearningParameters.lr_mid = float.Parse(File.ReadAllText(ParameterSetting.MODEL_PATH + "_LEARNING_RATE" + iter.ToString())); } } } else { //// just load the last iteration int iter = lastRunStopIter; Program.Print("Loading from previously trained Iter " + iter.ToString()); LoadModel(ParameterSetting.MODEL_PATH + "_QUERY_ITER" + iter.ToString(), ref DNN_Query, ParameterSetting.MODEL_PATH + "_DOC_ITER" + iter.ToString(), ref DNN_Doc, false); if (File.Exists(ParameterSetting.MODEL_PATH + "_LEARNING_RATE" + iter.ToString())) { LearningParameters.lr_mid = float.Parse(File.ReadAllText(ParameterSetting.MODEL_PATH + "_LEARNING_RATE" + iter.ToString())); } } } //// Clone to backup models if (ParameterSetting.ISVALIDATE) { dnn_query_backup = (DNN)DNN_Query.CreateBackupClone(); if (!ParameterSetting.IS_SHAREMODEL) { dnn_doc_backup = (DNN)DNN_Doc.CreateBackupClone(); } } if (ParameterSetting.NOTrain) { return; } Program.Print("total query sample number : " + PairStream.qstream.total_Batch_Size.ToString()); Program.Print("total doc sample number : " + PairStream.dstream.total_Batch_Size.ToString()); Program.Print("Training batches: " + PairStream.qstream.BATCH_NUM.ToString()); //Program.Print("Learning Objective : " + ParameterSetting.OBJECTIVE.ToString()); LearningParameters.total_doc_num = PairStream.dstream.total_Batch_Size; previous_devEval = VALIDATION_Eval; Program.Print("Start Training"); Program.Print("-----------------------------------------------------------"); int mmindex = 0; for (int iter = lastRunStopIter + 1; iter <= ParameterSetting.MAX_ITER; iter++) { Program.Print("ITER : " + iter.ToString()); LearningParameters.learning_rate = LearningParameters.lr_mid; LearningParameters.momentum = 0.0f; Program.timer.Reset(); Program.timer.Start(); //// load the training file and all associated streams, the "open action" is cheap if (iter != lastRunStopIter + 1) { //// we don't need to load if "iter == lastRunStopIter + 1", because it has been already opened. //// we only open a new pair from the second iteration LoadPairDataAtIdx(); } /// adjust learning rate here. PairStream.Init_Batch(); trainingLoss = 0; LearningParameters.neg_static_sample = false; mmindex = 0; while (PairStream.Next_Batch(SrcNorm, TgtNorm)) { trainingLoss += feedstream_batch(PairStream.GPU_qbatch, PairStream.GPU_dbatch, true, PairStream.srNCEProbDist); mmindex += 1; if (mmindex % 50 == 0) { Console.Write("Training :{0}\r", mmindex.ToString()); } } Program.Print("Training Loss : " + trainingLoss.ToString()); Program.Print("Learning Rate : " + (LearningParameters.learning_rate.ToString())); Tuple <string, string> dssmModelPaths = ComposeDSSMModelPaths(iter); Program.Print("Saving models ..."); DNN_Query.CopyOutFromCuda(); DNN_Query.Model_Save(dssmModelPaths.Item1); if (!ParameterSetting.IS_SHAREMODEL) { DNN_Doc.CopyOutFromCuda(); DNN_Doc.Model_Save(dssmModelPaths.Item2); } if (ParameterSetting.ISVALIDATE) { Program.Print("Start validation process ..."); if (!ParameterSetting.VALIDATE_MODEL_ONLY) { VALIDATION_Eval = Evaluate(); } else { VALIDATION_Eval = EvaluateModelOnly(dssmModelPaths.Item1, dssmModelPaths.Item2); } Program.Print("Dataset VALIDATION :\n/*******************************/ \n" + VALIDATION_Eval.ToString() + " \n/*******************************/ \n"); if (VALIDATION_Eval >= previous_devEval - LearningParameters.accept_range) { Console.WriteLine("Accepted it"); previous_devEval = VALIDATION_Eval; if (LearningParameters.IsrateDown) { LearningParameters.lr_mid = LearningParameters.lr_mid * LearningParameters.down_rate; } //// save model to backups dnn_query_backup.Init(DNN_Query); if (!ParameterSetting.IS_SHAREMODEL) { dnn_doc_backup.Init(DNN_Doc); } } else { Console.WriteLine("Reject it"); LearningParameters.IsrateDown = true; LearningParameters.lr_mid = LearningParameters.lr_mid * LearningParameters.reject_rate; //// recover model from the last saved backup DNN_Query.Init(dnn_query_backup); if (!ParameterSetting.IS_SHAREMODEL) { DNN_Doc.Init(dnn_doc_backup); } } } //// write the learning rate after this iter File.WriteAllText(ParameterSetting.MODEL_PATH + "_LEARNING_RATE_ITER" + iter.ToString(), LearningParameters.lr_mid.ToString()); Program.timer.Stop(); Program.Print("Training Runing Time : " + Program.timer.Elapsed.ToString()); Program.Print("-----------------------------------------------------------"); } //// Final save DNN_Query.CopyOutFromCuda(); DNN_Query.Model_Save(ParameterSetting.MODEL_PATH + "_QUERY_DONE"); if (!ParameterSetting.IS_SHAREMODEL) { DNN_Doc.CopyOutFromCuda(); DNN_Doc.Model_Save(ParameterSetting.MODEL_PATH + "_DOC_DONE"); } //pstream.General_Train_Test(ParameterSetting.TRAIN_TEST_RATE); //dnn_train }
public Layer_Output_Deriv_QD_PairTOP_Full(DNN query_dnn_model, DNN doc_dnn_model, int nTrail) { cuda_layer_Deriv_Q = new CudaPieceFloat(nTrail * query_dnn_model.OutputLayerSize * ParameterSetting.BATCH_SIZE, false, true); cuda_layer_Deriv_D = new CudaPieceFloat(nTrail * doc_dnn_model.OutputLayerSize * ParameterSetting.BATCH_SIZE, false, true); }