/// <summary> /// Test JMLNeurophClassifier /// </summary> /// <param name="jmlDataset"> Dataset Java-ML data set </param> private static void testJMLNeurophClassifier(Dataset jmlDataset) { MultiLayerPerceptron neuralNet = new MultiLayerPerceptron(4, 16, 3); // set labels for output neurons neuralNet.OutputNeurons[0].Label = "Setosa"; neuralNet.OutputNeurons[1].Label = "Versicolor"; neuralNet.OutputNeurons[2].Label = "Virginica"; // initialize NeurophJMLClassifier JMLNeurophClassifier jmlnClassifier = new JMLNeurophClassifier(neuralNet); // Process Java-ML data set jmlnClassifier.buildClassifier(jmlDataset); // test item //double[] item = {5.1, 3.5, 1.4, 0.2}; // normalized item is below double[] item = new double[] { -0.27777777777777773, 0.1249999999999999, -0.4322033898305085, -0.45833333333333337 }; // Java-ML instance out of test item Instance instance = new DenseInstance(item); // why are these not normalised? Console.WriteLine("NeurophJMLClassifier - classify of {0.22222222222222213, 0.6249999999999999, 0.06779661016949151, 0.04166666666666667}"); Console.WriteLine(jmlnClassifier.classify(instance)); Console.WriteLine("NeurophJMLClassifier - classDistribution of {0.22222222222222213, 0.6249999999999999, 0.06779661016949151, 0.04166666666666667}"); Console.WriteLine(jmlnClassifier.classDistribution(instance)); }
public void get_value_instance() { var dValues = new[] { 1f, 0f, 3f, 4f, 1f, 0f, 1f, 0f }; var sOrdinalValues = new[] { 1f, 0f, 3f, 4f }; var dBinaryOffset = sOrdinalValues.Length; var sValues = new[] { 1f, 3f, 4f }; var sIndices = new[] { 0, 2, 3 }; var sBinaryValues = new[] { 1f, 0f, 1f, 0f }; var sBinaryIndices = new[] { 0, 2 }; var dInstance = new DenseInstance( dValues, dBinaryOffset); var sInstance = new SparseInstance( sValues, sIndices, sBinaryIndices, sOrdinalValues.Length, sBinaryValues.Length); var sOutputValues = sInstance.GetValues(); var dOutputValues = dInstance.GetValues(); Assert.Equal(3f, sInstance.GetValue(2)); Assert.Equal(3f, dInstance.GetValue(2)); Assert.Equal(0f, sInstance.GetValue(7)); Assert.Equal(0f, dInstance.GetValue(7)); Assert.True(Math.Abs(dOutputValues.L2Norm() - sOutputValues.L2Norm()) < Epsilon); }
public void dist_dense_instance() { var values1 = new[] { 1f, 0f, 3f, 4f, 1f, 0f, 1f, 0f }; // index offset which indicates the start of the binary features var binaryOffset1 = 5; var values2 = new[] { 5f, 6f, 0f, 8f, 0f, 1f, 1f, 0f }; // index offset which indicates the start of the binary features var binaryOffset2 = 5; var t = new[, ] { { 1f, 0f, 3f, 4f, 1f, 0f, 1f, 0f }, { 5f, 6f, 0f, 8f, 0f, 1f, 1f, 0f } }; var instance1 = new DenseInstance( values1, binaryOffset1); var instance2 = new DenseInstance( values2, binaryOffset2); var distInstance1 = instance1.L2Dist(instance2); var distInstance2 = instance1.L2Dist(t, 1, 8); var distArray = t.L2Dist(0, 1, 8); Assert.True(Math.Abs(distInstance1 - distArray) < Epsilon); Assert.True(Math.Abs(distInstance2 - distArray) < Epsilon); }
private Instance createSingleWhoInstance(FastVector fvWho, Token candidate) { //first word-n attribute number int wordsBeforeFirstAttributeNumber = 6; //first pos-n attribute number int posBeforeFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whoWordsBefore + whoWordsAfter; //word+1 attribute number int wordsAfterFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whoWordsBefore; //pos+1 attribute number int posAfterFirstAttributeNumber = posBeforeFirstAttributeNumber + whoWordsBefore; int totalAttributeCount = wordsBeforeFirstAttributeNumber + whoWordsBefore * 2 + whoWordsAfter * 2 + 1; Instance whoCandidate = new DenseInstance(totalAttributeCount); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(0), candidate.Value); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(1), candidate.Value.Split(' ').Count()); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(2), candidate.Sentence); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(3), candidate.Position); double sentenceStartProximity = -1; foreach (List <Token> tokenList in segregatedArticleCurrent) { if (tokenList.Count > 0 && tokenList[0].Sentence == candidate.Sentence) { sentenceStartProximity = (double)(candidate.Position - tokenList[0].Position) / (double)tokenList.Count; break; } } if (sentenceStartProximity > -1) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(4), sentenceStartProximity); } whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(5), candidate.Frequency); for (int i = whoWordsBefore; i > 0; i--) { if (candidate.Position - i - 1 >= 0) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(whoWordsBefore - i + wordsBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].Value); if (articleCurrent[candidate.Position - i - 1].PartOfSpeech != null) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(whoWordsBefore - i + posBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].PartOfSpeech); } } } for (int i = 0; i < whoWordsAfter; i++) { if (candidate.Position + i < articleCurrent.Count) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(wordsAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].Value); if (articleCurrent[candidate.Position + i].PartOfSpeech != null) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(posAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].PartOfSpeech); } } } return(whoCandidate); }
public List <double> testMLPUsingWeka(string[] attributeArray, string[] classNames, double[] dataValues, string classHeader, string defaultclass, string modelName, int hiddelLayers = 7, double learningRate = 0.03, double momentum = 0.4, int decimalPlaces = 2, int trainingTime = 1000) { java.util.ArrayList classLabel = new java.util.ArrayList(); foreach (string className in classNames) { classLabel.Add(className); } weka.core.Attribute classHeaderName = new weka.core.Attribute(classHeader, classLabel); java.util.ArrayList attributeList = new java.util.ArrayList(); foreach (string attribute in attributeArray) { weka.core.Attribute newAttribute = new weka.core.Attribute(attribute); attributeList.Add(newAttribute); } attributeList.add(classHeaderName); weka.core.Instances data = new weka.core.Instances("TestInstances", attributeList, 0); data.setClassIndex(data.numAttributes() - 1); // Set instance's values for the attributes weka.core.Instance inst_co = new DenseInstance(data.numAttributes()); for (int i = 0; i < data.numAttributes() - 1; i++) { inst_co.setValue(i, dataValues.ElementAt(i)); } inst_co.setValue(classHeaderName, defaultclass); data.add(inst_co); java.io.File path = new java.io.File("/models/"); weka.classifiers.functions.MultilayerPerceptron clRead = loadModel(modelName, path); clRead.setHiddenLayers(hiddelLayers.ToString()); clRead.setLearningRate(learningRate); clRead.setMomentum(momentum); clRead.setNumDecimalPlaces(decimalPlaces); clRead.setTrainingTime(trainingTime); weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(data); data = weka.filters.Filter.useFilter(data, myRandom); double classValue = clRead.classifyInstance(data.get(0)); double[] predictionDistribution = clRead.distributionForInstance(data.get(0)); List <double> predictionDistributions = new List <double>(); for (int predictionDistributionIndex = 0; predictionDistributionIndex < predictionDistribution.Count(); predictionDistributionIndex++) { string classValueString1 = classLabel.get(predictionDistributionIndex).ToString(); double prob = predictionDistribution[predictionDistributionIndex] * 100; predictionDistributions.Add(prob); } List <double> prediction = new List <double>(); prediction.Add(classValue); prediction.AddRange(predictionDistributions); return(prediction); }
private Arquivo AvaliarWekaClassificacao(Arquivo _arquivoAvaliado) { // Importa o Classificador que foi salvo pelo treinamento FilteredClassifier classifier = (FilteredClassifier)ImportaClassificadorSalvo(); // Importa os Dados de Treinamento salvo pelo treinamento Instances dadosTreinamento = ImportaDadosTreinamentoSalvo(); dadosTreinamento.setClassIndex(dadosTreinamento.numAttributes() - 1); // Classe fica em último int numAttributes = dadosTreinamento.numAttributes(); // Cria a instância de teste Instance instance = new DenseInstance(numAttributes); instance.setDataset(dadosTreinamento); // Inicializa todos os atributos como valor zero. for (int i = 0; i < numAttributes; i++) { instance.setValue(i, 0); } // Insere o texto a ser avaliado no primeiro atributo for (int i = 0; i < numAttributes - 1; i++) { instance.setValue(i, _arquivoAvaliado.TextoFormatado); } // Indica que a Classe está faltando, para que a mesma possa ser classificada instance.setClassMissing(); // Classifica a instância de teste var resultado = ""; try { // Realiza a classificação da instância, retornando o resultado previsto var predicao = classifier.classifyInstance(instance); instance.setClassValue(predicao); // Realiza a tradução do resultado numérico na Classificação esperada resultado = dadosTreinamento.classAttribute().value((int)predicao); var distribuicao = classifier.distributionForInstance(instance); } catch (Exception) { throw (new ClassificationException("O texto não pode ser classificado quanto à sua qualidade.")); } // Atribui o resultado ao arquivo avaliado _arquivoAvaliado.Classificacao = resultado; return(_arquivoAvaliado); }
private Instance createSingleWhyInstance(FastVector fvWhy, Token candidate) { //first word-n attribute number int wordsBeforeFirstAttributeNumber = 7; //first pos-n attribute number int posBeforeFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whyWordsBefore + whyWordsAfter; //word+1 attribute number int wordsAfterFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whyWordsBefore; //pos+1 attribute number int posAfterFirstAttributeNumber = posBeforeFirstAttributeNumber + whyWordsBefore; int totalAttributeCount = wordsBeforeFirstAttributeNumber + whyWordsBefore * 2 + whyWordsAfter * 2 + 1; Instance whyCandidate = new DenseInstance(totalAttributeCount); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(0), candidate.Value); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(1), candidate.Value.Split(' ').Count()); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(2), candidate.Sentence); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(3), candidate.Score); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(4), candidate.NumWho); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(5), candidate.NumWhen); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(6), candidate.NumWhere); for (int i = whyWordsBefore; i > 0; i--) { if (candidate.Position - i - 1 >= 0) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(whyWordsBefore - i + wordsBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].Value); if (articleCurrent[candidate.Position - i - 1].PartOfSpeech != null) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(whyWordsBefore - i + posBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].PartOfSpeech); } } } for (int i = 0; i < whyWordsAfter; i++) { if (candidate.Position + i < articleCurrent.Count) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(wordsAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].Value); if (articleCurrent[candidate.Position + i].PartOfSpeech != null) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(posAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].PartOfSpeech); } } } return(whyCandidate); }
/// <summary> /// 预测薪资 /// </summary> /// <param name="jobScore">职位得分</param> /// <param name="schoolScore">学校得分</param> /// <param name="degreeScore">学历得分</param> /// <param name="addrScore">地区得分</param> /// <param name="year">工作年限</param> /// <returns>预估薪资</returns> public double predicate(double jobScore, double schoolScore, double degreeScore, double addrScore, double year) { Instance inst = new DenseInstance(header.numAttributes()); inst.setDataset(header); // 职位得分 inst.setValue(0, jobScore); // 学校得分 inst.setValue(1, schoolScore); // 学历得分 inst.setValue(2, degreeScore); // 地区得分 inst.setValue(3, addrScore); // 工作年限 inst.setValue(4, year); // 薪资(待预测) inst.setValue(5, 0); return cl.classifyInstance(inst); }
/// <summary> /// Test NeurophWekaClassifier /// </summary> /// <param name="wekaDataset"> Instances Weka data set </param> private static void testNeurophWekaClassifier(Instances wekaDataset) { try { MultiLayerPerceptron neuralNet = new MultiLayerPerceptron(4, 16, 3); // set labels manualy neuralNet.OutputNeurons[0].Label = "Setosa"; neuralNet.OutputNeurons[1].Label = "Versicolor"; neuralNet.OutputNeurons[2].Label = "Virginica"; // initialize NeurophWekaClassifier WekaNeurophClassifier neurophWekaClassifier = new WekaNeurophClassifier(neuralNet); // set class index on data set wekaDataset.setClassIndex(4); // process data set neurophWekaClassifier.buildClassifier(wekaDataset); // test item //double[] item = {5.1, 3.5, 1.4, 0.2, 0.0}; // normalized item is below double[] item = new double[] { 0.22222222222222213, 0.6249999999999999, 0.06779661016949151, 0.04166666666666667, 0 }; // create weka instance for test item Instance instance = new DenseInstance(1, item); // test classification Console.WriteLine("NeurophWekaClassifier - classifyInstance for {5.1, 3.5, 1.4, 0.2}"); Console.WriteLine("Class idx: " + neurophWekaClassifier.classifyInstance(instance)); Console.WriteLine("NeurophWekaClassifier - distributionForInstance for {5.1, 3.5, 1.4, 0.2}"); double[] dist = neurophWekaClassifier.distributionForInstance(instance); for (int i = 0; i < dist.Length; i++) { Console.WriteLine("Class " + i + ": " + dist[i]); } } catch (Exception ex) { //JAVA TO C# CONVERTER WARNING: The .NET Type.FullName property will not always yield results identical to the Java Class.getName method: Logger.getLogger(typeof(WekaNeurophSample).FullName).log(Level.SEVERE, null, ex); } }
//Предобработка конкретного домена public static Instance GetInstanceForWeka(string domain, string labelClass, int numAttributes) { DenseInstance instance = new DenseInstance(numAttributes); //Выделение атрибутов instance.setValue(<индекс атрибута>, <значение>); //instance.setValue(0, domain.Length); InputLayerWrapperMulti wrapper = new InputLayerWrapperMulti(); wrapper.ConfigureDomainAttribute(domain); double[] values = wrapper.valuesAttributes; for (int i = 0; i < values.Length; i++) { instance.setValue(i, values[i]); } instance.setValue(numAttributes - 1, Convert.ToDouble(labelClass)); //Метка класса return(instance); }
public string testHybridEmotionUsingWeka(string[] attributeArray, string[] classNames, double[] dataValues, string classHeader, string defaultclass, string modelName) { java.util.ArrayList classLabel = new java.util.ArrayList(); foreach (string className in classNames) { classLabel.Add(className); } weka.core.Attribute classHeaderName = new weka.core.Attribute(classHeader, classLabel); java.util.ArrayList attributeList = new java.util.ArrayList(); foreach (string attribute in attributeArray) { weka.core.Attribute newAttribute = new weka.core.Attribute(attribute); attributeList.Add(newAttribute); } attributeList.add(classHeaderName); weka.core.Instances data = new weka.core.Instances("TestInstances", attributeList, 0); data.setClassIndex(data.numAttributes() - 1); // Set instance's values for the attributes weka.core.Instance inst_co = new DenseInstance(data.numAttributes()); for (int i = 0; i < data.numAttributes() - 1; i++) { inst_co.setValue(i, dataValues.ElementAt(i)); } inst_co.setValue(classHeaderName, defaultclass); data.add(inst_co); java.io.File path = new java.io.File("/models/"); weka.classifiers.meta.Bagging clRead = loadBaggingModel(modelName, path); weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(data); data = weka.filters.Filter.useFilter(data, myRandom); double classValue = clRead.classifyInstance(data.get(0)); string classValueString = classLabel.get(Int32.Parse(classValue.ToString())).ToString(); return(classValueString); }
/// <summary> /// Converts Neuroph data set to Java-ML data set </summary> /// <param name="neurophDataset"> Dataset Neuroph data set </param> /// <returns> Dataset Java-ML data set </returns> public static Dataset convertNeurophToJMLDataset(DataSet neurophDataset) { Dataset jmlDataset = new DefaultDataset(); int numInputs = neurophDataset.InputSize; int numOutputs = neurophDataset.OutputSize; foreach (DataSetRow row in neurophDataset.Rows) { if (numOutputs > 0) { double[] mergedIO = new double[numInputs + numOutputs]; for (int i = 0; i < numInputs; i++) { mergedIO[i] = row.Input[i]; } for (int i = 0; i < numOutputs; i++) { mergedIO[numInputs + i] = row.DesiredOutput[i]; } Instance instance = new DenseInstance(mergedIO); instance.ClassValue = row.Label; jmlDataset.add(instance); } else { Instance instance = new DenseInstance(row.Input); instance.ClassValue = row.Label; jmlDataset.add(instance); } } return(jmlDataset); }
public void Train(string text, IList<int> tags) { Instance ins = new DenseInstance(tagsNb+1); ins.setDataset(dataSet); ins.setValue(0, text); for (int i = 0; i < tagsNb; i++) { if (tags.Contains(i)) { ins.setValue(i + 1, 1); } else { ins.setValue(i + 1, 0); } } dataSet.add(ins); }
public IEnumerable<double> Tag(string text) { Instance ins = new DenseInstance(tagsNb+1); ins.setDataset(dataSet); ins.setValue(0, text); MultiLabelOutput mlo = cl.makePrediction(ins); List<double> outp = new List<Double>(); foreach (bool b in mlo.getBipartition()) { if (b) { outp.Add(1); } else { outp.Add(0); } } return outp; }
public void TrainandTest(CandidateClassifier classifierInfo, CandidateParameter cp) { //string dealType = classifierInfo.DealType; Classifier cls = null; if (TestParameters.UseTrain) { string modelFileName = GetModelFileName(classifierInfo.Name); if (TestParameters.SaveModel) { cls = WekaUtils.TryLoadClassifier(modelFileName); } Instances trainInstancesNew, trainInstances; trainInstances = m_trainInstances; trainInstancesNew = m_trainInstancesNew; if (cls == null) { if (classifierInfo.Classifier == null) { classifierInfo.Classifier = WekaUtils.CreateClassifier(cp.ClassifierType, m_currentTp, m_currentSl); } cls = classifierInfo.Classifier; } else { if (TestParameters.EnableDetailLog) { System.Console.WriteLine("Model is loaded."); } } if (m_enableTrainSplitTest) { Instances trainTrainInst, trainTestInst; DateTime splitTrainTimeEnd; if (m_trainSplitTestNums != -1) { int trainTrainSize = trainInstancesNew.numInstances() - m_trainSplitTestNums; int trainTestSize = m_trainSplitTestNums; trainTrainInst = new Instances(trainInstancesNew, 0, trainTrainSize); trainTestInst = new Instances(trainInstancesNew, trainTrainSize, trainTestSize); splitTrainTimeEnd = WekaUtils.GetDateValueFromInstances(trainInstances, 0, trainTrainSize); } else if (m_trainSplitPercent != -1) { if (m_trainSplitPercent == 100.0) { int trainTrainSize = trainInstancesNew.numInstances(); trainTrainInst = new Instances(trainInstancesNew, 0, trainTrainSize); trainTestInst = new Instances(trainInstancesNew, 0, trainTrainSize); splitTrainTimeEnd = WekaUtils.GetDateValueFromInstances(trainInstances, 0, trainTrainSize); } else { int trainTrainSize = (int)Math.Round(trainInstancesNew.numInstances() * m_trainSplitPercent / 100); int trainTestSize = trainInstancesNew.numInstances() - trainTrainSize; trainTrainInst = new Instances(trainInstancesNew, 0, trainTrainSize); trainTestInst = new Instances(trainInstancesNew, trainTrainSize, trainTestSize); splitTrainTimeEnd = WekaUtils.GetDateValueFromInstances(trainInstances, 0, trainTrainSize); } } else { trainTrainInst = new Instances(trainInstancesNew, 0); trainTestInst = new Instances(trainInstancesNew, 0); DateTime dt = WekaUtils.GetDateValueFromInstances(trainInstances, 0, trainInstances.numInstances() - 1); splitTrainTimeEnd = m_trainTimeEnd.AddMinutes(-TestParameters.BatchTestMinutes); while (splitTrainTimeEnd > dt) { splitTrainTimeEnd = splitTrainTimeEnd.AddMinutes(-TestParameters.BatchTestMinutes); } for (int i = 0; i < trainInstances.numInstances(); ++i) { dt = WekaUtils.GetDateValueFromInstances(trainInstances, 0, i); if (dt <= splitTrainTimeEnd) { var ins = new DenseInstance(trainInstancesNew.instance(i)); trainTrainInst.add(ins); } else { var ins = new DenseInstance(trainInstancesNew.instance(i)); trainTestInst.add(ins); } } } cls = WekaUtils.TrainInstances(trainTrainInst, TestParameters.SaveModel ? modelFileName : null, cls); //m_classifierQueue[dealType].Enqueue(new ClassifierInfo(cls, splitTrainTimeEnd)); //foreach (var i in m_classifierQueue[dealType]) //{ // var e = WekaUtils.TestInstances(trainTestInst, i.Cls); // i.TotalCost = i.TotalCost * m_classifierQueueFactor + e.totalCost(); // i.TotalNum = (int)(i.TotalNum * m_classifierQueueFactor) + (int)e.numInstances(); //} //WriteEvalSummary(eval1, string.Format("Test Data from {0} to {1}", m_testTimeStart.ToString(Parameters.DateTimeFormat), m_testTimeEnd.ToString(Parameters.DateTimeFormat))); } else { cls = WekaUtils.TrainInstances(trainInstancesNew, TestParameters.SaveModel ? modelFileName : null, cls); //m_classifierQueue[dealType].Enqueue(new ClassifierInfo(cls, m_trainTimeEnd)); //foreach (var i in m_classifierQueue[dealType]) //{ // var e = WekaUtils.TestInstances(trainInstancesNew, i.Cls); // i.TotalCost = i.TotalCost * m_classifierQueueFactor + e.totalCost(); // i.TotalNum = (int)(i.TotalNum * m_classifierQueueFactor) + (int)e.numInstances(); //} } if (TestParameters.EnableDetailLog) { System.Console.WriteLine("Model is trained."); } classifierInfo.Classifier = cls; //if (classifierInfo.CurrentTrainInstances1 != null) //{ // classifierInfo.CurrentTrainInstances1.clear(); //} //if (classifierInfo.CurrentTrainInstancesNew1 != null) //{ // classifierInfo.CurrentTrainInstancesNew1.clear(); //} //classifierInfo.CurrentTrainInstances = new Instances(trainInstances, 0, trainInstances.numInstances()); //classifierInfo.CurrentTrainInstancesNew = new Instances(trainInstancesNew, 0, trainInstancesNew.numInstances()); if (classifierInfo.MoneyManagement == null) { classifierInfo.MoneyManagement = WekaUtils.CreateMoneyManagement(cp.MoneyManagementType, m_currentTp, m_currentSl); } IMoneyManagement mm = WekaUtils.TrainInstances4MM(trainInstancesNew, TestParameters.SaveModel ? modelFileName : null, classifierInfo.MoneyManagement); classifierInfo.MoneyManagement = mm; } else { if (classifierInfo.Classifier == null) { classifierInfo.Classifier = WekaUtils.CreateClassifier(cp.ClassifierType, m_currentTp, m_currentSl); } cls = classifierInfo.Classifier; if (classifierInfo.MoneyManagement == null) { classifierInfo.MoneyManagement = WekaUtils.CreateMoneyManagement(cp.MoneyManagementType, m_currentTp, m_currentSl); } } if (m_enableTest) { Instances testInstancesNew, testInstances; testInstances = m_testInstances; testInstancesNew = m_testInstancesNew; double[] cv = WekaUtils.ClassifyInstances(testInstancesNew, cls); if (TestParameters.EnableExcludeClassifier) { bool hasPositive = false; for (int i = 0; i < cv.Length; ++i) { if (cv[i] == 2) { hasPositive = true; break; } } if (hasPositive) { // Exclude if (classifierInfo.ExcludeClassifier == null) { string modelFileName4Exclude = GetExcludeModelFileName(classifierInfo.Name); classifierInfo.ExcludeClassifier = WekaUtils.TryLoadClassifier(modelFileName4Exclude); } if (classifierInfo.ExcludeClassifier != null) { double[] cv2 = WekaUtils.ClassifyInstances(testInstancesNew, classifierInfo.ExcludeClassifier); // cv2 == 0 -> is class, otherwise = double.NaN; for (int i = 0; i < cv.Length; ++i) { cv[i] = cv[i] == 2 && cv2[i] == 2 ? 2 : 0; } } } } classifierInfo.CurrentTestRet = cv; classifierInfo.CurrentClassValue = new double[testInstances.numInstances()]; for (int i = 0; i < testInstances.numInstances(); ++i) { classifierInfo.CurrentClassValue[i] = testInstances.instance(i).classValue(); } for (int i = 0; i < testInstances.numInstances(); i++) { if (cv[i] == 2) { double openPrice = testInstances.instance(i).value(testInstances.attribute("mainClose")); DateTime openTime = WekaUtils.GetDateValueFromInstances(testInstances, 0, i); if (testInstances.instance(i).classValue() == 2 || testInstances.instance(i).classValue() == 0) { classifierInfo.Deals.AddDeal(openTime, openPrice, classifierInfo.DealType, classifierInfo.MoneyManagement.GetVolume(testInstances.instance(i)), testInstances.instance(i).classValue() == 2 ? -classifierInfo.Tp : classifierInfo.Sl, WekaUtils.GetDateValueFromInstances(testInstances, 1, i)); } else if (testInstances.instance(i).classValue() == 1) { double closePriceTp, closePriceSl; if (classifierInfo.DealType == 'B') { closePriceTp = openPrice + classifierInfo.Tp * DealsInfo.GetPoint(0); closePriceSl = openPrice - classifierInfo.Sl * DealsInfo.GetPoint(0); } else { closePriceTp = openPrice - classifierInfo.Tp * DealsInfo.GetPoint(0); closePriceSl = openPrice + classifierInfo.Sl * DealsInfo.GetPoint(0); } classifierInfo.Deals.AddDeal(openTime, openPrice, classifierInfo.DealType, classifierInfo.MoneyManagement.GetVolume(testInstances.instance(i)), closePriceTp, closePriceSl); } else { throw new AssertException("classValue should be 0,1,2."); } } } } }
public static void CreateArffFiles() { java.util.ArrayList atts; java.util.ArrayList attsRel; java.util.ArrayList attVals; java.util.ArrayList attValsRel; Instances data; Instances dataRel; double[] vals; double[] valsRel; int i; // 1. set up attributes atts = new java.util.ArrayList(); // - numeric atts.Add(new weka.core.Attribute("att1")); // - nominal attVals = new java.util.ArrayList(); for (i = 0; i < 5; i++) { attVals.add("val" + (i + 1)); } weka.core.Attribute nominal = new weka.core.Attribute("att2", attVals); atts.add(nominal); // - string atts.add(new weka.core.Attribute("att3", (java.util.ArrayList)null)); // - date atts.add(new weka.core.Attribute("att4", "yyyy-MM-dd")); // - relational attsRel = new java.util.ArrayList(); // -- numeric attsRel.add(new weka.core.Attribute("att5.1")); // -- nominal attValsRel = new java.util.ArrayList(); for (i = 0; i < 5; i++) { attValsRel.Add("val5." + (i + 1)); } attsRel.add(new weka.core.Attribute("att5.2", attValsRel)); dataRel = new Instances("att5", attsRel, 0); atts.add(new weka.core.Attribute("att5", dataRel, 0)); // 2. create Instances object data = new Instances("MyRelation", atts, 0); // 3. fill with data // first instance vals = new double[data.numAttributes()]; // - numeric vals[0] = Math.PI; // - nominal vals[1] = attVals.indexOf("val3"); // - string vals[2] = data.attribute(2).addStringValue("This is a string!"); // - date vals[3] = data.attribute(3).parseDate("2001-11-09"); // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.PI + 1; valsRel[1] = attValsRel.indexOf("val5.3"); weka.core.Instance inst = new DenseInstance(2); inst.setValue(1, valsRel[0]); inst.setValue(1, valsRel[1]); dataRel.add(inst); // -- second instance valsRel = new double[2]; valsRel[0] = Math.PI + 2; valsRel[1] = attValsRel.indexOf("val5.2"); dataRel.add(inst); vals[4] = data.attribute(4).addRelation(dataRel); // add weka.core.Instance inst2 = new DenseInstance(4); inst2.setValue(1, vals[0]); inst2.setValue(1, vals[1]); inst2.setValue(1, vals[2]); inst2.setValue(1, vals[3]); data.add(inst2); // second instance vals = new double[data.numAttributes()]; // important: needs NEW array! // - numeric vals[0] = Math.E; // - nominal vals[1] = attVals.indexOf("val1"); // - string vals[2] = data.attribute(2).addStringValue("And another one!"); // - date vals[3] = data.attribute(3).parseDate("2000-12-01"); // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.E + 1; valsRel[1] = attValsRel.indexOf("val5.4"); dataRel.add(inst); // -- second instance valsRel = new double[2]; valsRel[0] = Math.E + 2; valsRel[1] = attValsRel.indexOf("val5.1"); dataRel.add(inst); vals[4] = data.attribute(4).addRelation(dataRel); // add data.add(inst2); data.setClassIndex(data.numAttributes() - 1); // 4. output data for (int x = 0; x < data.numInstances(); x++) { weka.core.Instance ins = data.instance(x); System.Console.WriteLine(ins.value(x).ToString()); } return; }
/// <summary> /// Converts Neuroph data set to Weka data set </summary> /// <param name="neurophDataset"> DataSet Neuroph data set </param> /// <returns> instances Weka data set </returns> public static Instances convertNeurophToWekaDataset(DataSet neurophDataset) { IDictionary <double[], string> classValues = getClassValues(neurophDataset); Instances instances = createEmptyWekaDataSet(neurophDataset.InputSize, neurophDataset.size(), classValues); int numInputs = neurophDataset.InputSize; // int numOutputs = neurophDataset.getOutputSize(); int numOutputs = 1; // why is this, and the above line is commented? probably because weka instances.ClassIndex = numInputs; IEnumerator <DataSetRow> iterator = neurophDataset.GetEnumerator(); while (iterator.MoveNext()) // iterate all dataset rows { DataSetRow row = iterator.Current; if (numOutputs > 0) // if it is supervised (has outputs) { Instance instance = new DenseInstance(numInputs + numOutputs); for (int i = 0; i < numInputs; i++) { instance.setValue(i, row.Input[i]); } instance.Dataset = instances; // set output attribute, as String and double value of class foreach (KeyValuePair <double[], string> entry in classValues) { if (entry.Value.Equals(row.Label)) { instance.setValue(numInputs, entry.Value); double[] rowDouble = row.DesiredOutput; for (int i = 0; i < rowDouble.Length; i++) { if (rowDouble[i] == 1) { instance.setValue(numInputs, i); } break; } break; } } instances.add(instance); } // if it is unsupervised - has only inputs else { // create new instance Instance instance = new DenseInstance(numInputs); // set all input values for (int i = 0; i < numInputs; i++) { instance.setValue(i, row.Input[i]); } // and add instance to weka dataset instance.Dataset = instances; instances.add(instance); } } return(instances); }
private Instance createSingleWhyInstance(FastVector fvWhy, Token candidate) { //first word-n attribute number int wordsBeforeFirstAttributeNumber = 7; //first pos-n attribute number int posBeforeFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whyWordsBefore + whyWordsAfter; //word+1 attribute number int wordsAfterFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whyWordsBefore; //pos+1 attribute number int posAfterFirstAttributeNumber = posBeforeFirstAttributeNumber + whyWordsBefore; int totalAttributeCount = wordsBeforeFirstAttributeNumber + whyWordsBefore * 2 + whyWordsAfter * 2 + 1; Instance whyCandidate = new DenseInstance(totalAttributeCount); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(0), candidate.Value); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(1), candidate.Value.Split(' ').Count()); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(2), candidate.Sentence); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(3), candidate.Score); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(4), candidate.NumWho); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(5), candidate.NumWhen); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(6), candidate.NumWhere); for (int i = whyWordsBefore; i > 0; i--) { if (candidate.Position - i - 1 >= 0) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(whyWordsBefore - i + wordsBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].Value); if (articleCurrent[candidate.Position - i - 1].PartOfSpeech != null) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(whyWordsBefore - i + posBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].PartOfSpeech); } } } for (int i = 0; i < whyWordsAfter; i++) { if (candidate.Position + i < articleCurrent.Count) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(wordsAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].Value); if (articleCurrent[candidate.Position + i].PartOfSpeech != null) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(posAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].PartOfSpeech); } } } return whyCandidate; }
public IEnumerable<double> Tag(string text) { Instance ins = new DenseInstance(tagsNb + 1); ins.setDataset(oDataSet); ins.setValue(0, text); stv.input(ins); ins = stv.output(); MultiLabelOutput mlo = lps.makePrediction(ins); List<double> outp = new List<Double>(); int i = 0; foreach (bool b in mlo.getBipartition()) { if (b) { outp.Add(mlo.getConfidences()[i++]/2 + 0.5); } else { outp.Add(0.5 - mlo.getConfidences()[i++]/2); } } return outp; }
public List <double> testSMOUsingWeka(string[] attributeArray, string[] classNames, double[] dataValues, string classHeader, string defaultclass, string modelName, int hiddelLayers = 7, double learningRate = 0.03, double momentum = 0.4, int decimalPlaces = 2, int trainingTime = 1000) { java.util.ArrayList classLabel = new java.util.ArrayList(); foreach (string className in classNames) { classLabel.Add(className); } weka.core.Attribute classHeaderName = new weka.core.Attribute(classHeader, classLabel); java.util.ArrayList attributeList = new java.util.ArrayList(); foreach (string attribute in attributeArray) { weka.core.Attribute newAttribute = new weka.core.Attribute(attribute); attributeList.Add(newAttribute); } attributeList.add(classHeaderName); weka.core.Instances data = new weka.core.Instances("TestInstances", attributeList, 0); data.setClassIndex(data.numAttributes() - 1); // Set instance's values for the attributes weka.core.Instance inst_co = new DenseInstance(data.numAttributes()); for (int i = 0; i < data.numAttributes() - 1; i++) { inst_co.setValue(i, Math.Round(dataValues.ElementAt(i), 5)); } inst_co.setValue(classHeaderName, defaultclass); data.add(inst_co); weka.core.Instance currentInst = data.get(0); int j = 0; //foreach (float value in dataValues) //{ // // double roundedValue = Math.Round(value); // //var rounded = Math.Floor(value * 100) / 100; // if (array.ElementAt(j) != value) // { // System.Console.WriteLine("Masla occur"); // } // j++; //} //double predictedClass = cl.classifyInstance(data.get(0)); weka.classifiers.functions.SMO clRead = new weka.classifiers.functions.SMO(); try { java.io.File path = new java.io.File("/models/"); clRead = loadSMOModel(modelName, path); } catch (Exception e) { //string p1 = Assembly.GetExecutingAssembly().Location; string ClassifierName = Path.GetFileName(Path.GetFileName(modelName)); string Path1 = HostingEnvironment.MapPath(@"~//libs//models//" + ClassifierName); //string Path1 = HostingEnvironment.MapPath(@"~//libs//models//FusionCustomized.model"); clRead = (weka.classifiers.functions.SMO)weka.core.SerializationHelper.read(modelName); } // weka.classifiers.functions.SMO clRead = loadSMOModel(modelName, path); clRead.setBatchSize("100"); clRead.setCalibrator(new weka.classifiers.functions.Logistic()); clRead.setKernel(new weka.classifiers.functions.supportVector.PolyKernel()); clRead.setEpsilon(1.02E-12); clRead.setC(1.0); clRead.setDebug(false); clRead.setChecksTurnedOff(false); clRead.setFilterType(new SelectedTag(weka.classifiers.functions.SMO.FILTER_NORMALIZE, weka.classifiers.functions.SMO.TAGS_FILTER)); double classValue = clRead.classifyInstance(data.get(0)); double[] predictionDistribution = clRead.distributionForInstance(data.get(0)); //for (int predictionDistributionIndex = 0; // predictionDistributionIndex < predictionDistribution.Count(); // predictionDistributionIndex++) //{ // string classValueString1 = classLabel.get(predictionDistributionIndex).ToString(); // double prob= predictionDistribution[predictionDistributionIndex]*100; // System.Console.WriteLine(classValueString1 + ":" + prob); //} List <double> prediction = new List <double>(); prediction.Add(classValue); //prediction.AddRange(predictionDistribution); return(prediction); }
public IEnumerable<double> Tag(string text) { Instance instance = new DenseInstance (1.0d, new ConcreteBasicTextVector (text, false).ToVector (1)); double[] val = new double[this.not]; for (int i = 0; i < this.not; i++) { instance.setDataset (datasets [i]); val [i] = classifiers [i].classifyInstance (instance); } return val; }
private Instance createSingleWhoInstance(FastVector fvWho, Token candidate) { //first word-n attribute number int wordsBeforeFirstAttributeNumber = 6; //first pos-n attribute number int posBeforeFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whoWordsBefore + whoWordsAfter; //word+1 attribute number int wordsAfterFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whoWordsBefore; //pos+1 attribute number int posAfterFirstAttributeNumber = posBeforeFirstAttributeNumber + whoWordsBefore; int totalAttributeCount = wordsBeforeFirstAttributeNumber + whoWordsBefore * 2 + whoWordsAfter * 2 + 1; Instance whoCandidate = new DenseInstance(totalAttributeCount); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(0), candidate.Value); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(1), candidate.Value.Split(' ').Count()); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(2), candidate.Sentence); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(3), candidate.Position); double sentenceStartProximity = -1; foreach (List<Token> tokenList in segregatedArticleCurrent) { if (tokenList.Count > 0 && tokenList[0].Sentence == candidate.Sentence) { sentenceStartProximity = (double)(candidate.Position - tokenList[0].Position) / (double)tokenList.Count; break; } } if (sentenceStartProximity > -1) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(4), sentenceStartProximity); } whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(5), candidate.Frequency); for (int i = whoWordsBefore; i > 0; i--) { if (candidate.Position - i - 1 >= 0) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(whoWordsBefore - i + wordsBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].Value); if (articleCurrent[candidate.Position - i - 1].PartOfSpeech != null) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(whoWordsBefore - i + posBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].PartOfSpeech); } } } for (int i = 0; i < whoWordsAfter; i++) { if (candidate.Position + i < articleCurrent.Count) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(wordsAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].Value); if (articleCurrent[candidate.Position + i].PartOfSpeech != null) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(posAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].PartOfSpeech); } } } return whoCandidate; }