private Instance createSingleWhoInstance(FastVector fvWho, Token candidate) { //first word-n attribute number int wordsBeforeFirstAttributeNumber = 6; //first pos-n attribute number int posBeforeFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whoWordsBefore + whoWordsAfter; //word+1 attribute number int wordsAfterFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whoWordsBefore; //pos+1 attribute number int posAfterFirstAttributeNumber = posBeforeFirstAttributeNumber + whoWordsBefore; int totalAttributeCount = wordsBeforeFirstAttributeNumber + whoWordsBefore * 2 + whoWordsAfter * 2 + 1; Instance whoCandidate = new DenseInstance(totalAttributeCount); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(0), candidate.Value); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(1), candidate.Value.Split(' ').Count()); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(2), candidate.Sentence); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(3), candidate.Position); double sentenceStartProximity = -1; foreach (List <Token> tokenList in segregatedArticleCurrent) { if (tokenList.Count > 0 && tokenList[0].Sentence == candidate.Sentence) { sentenceStartProximity = (double)(candidate.Position - tokenList[0].Position) / (double)tokenList.Count; break; } } if (sentenceStartProximity > -1) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(4), sentenceStartProximity); } whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(5), candidate.Frequency); for (int i = whoWordsBefore; i > 0; i--) { if (candidate.Position - i - 1 >= 0) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(whoWordsBefore - i + wordsBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].Value); if (articleCurrent[candidate.Position - i - 1].PartOfSpeech != null) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(whoWordsBefore - i + posBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].PartOfSpeech); } } } for (int i = 0; i < whoWordsAfter; i++) { if (candidate.Position + i < articleCurrent.Count) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(wordsAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].Value); if (articleCurrent[candidate.Position + i].PartOfSpeech != null) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(posAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].PartOfSpeech); } } } return(whoCandidate); }
public List <double> testMLPUsingWeka(string[] attributeArray, string[] classNames, double[] dataValues, string classHeader, string defaultclass, string modelName, int hiddelLayers = 7, double learningRate = 0.03, double momentum = 0.4, int decimalPlaces = 2, int trainingTime = 1000) { java.util.ArrayList classLabel = new java.util.ArrayList(); foreach (string className in classNames) { classLabel.Add(className); } weka.core.Attribute classHeaderName = new weka.core.Attribute(classHeader, classLabel); java.util.ArrayList attributeList = new java.util.ArrayList(); foreach (string attribute in attributeArray) { weka.core.Attribute newAttribute = new weka.core.Attribute(attribute); attributeList.Add(newAttribute); } attributeList.add(classHeaderName); weka.core.Instances data = new weka.core.Instances("TestInstances", attributeList, 0); data.setClassIndex(data.numAttributes() - 1); // Set instance's values for the attributes weka.core.Instance inst_co = new DenseInstance(data.numAttributes()); for (int i = 0; i < data.numAttributes() - 1; i++) { inst_co.setValue(i, dataValues.ElementAt(i)); } inst_co.setValue(classHeaderName, defaultclass); data.add(inst_co); java.io.File path = new java.io.File("/models/"); weka.classifiers.functions.MultilayerPerceptron clRead = loadModel(modelName, path); clRead.setHiddenLayers(hiddelLayers.ToString()); clRead.setLearningRate(learningRate); clRead.setMomentum(momentum); clRead.setNumDecimalPlaces(decimalPlaces); clRead.setTrainingTime(trainingTime); weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(data); data = weka.filters.Filter.useFilter(data, myRandom); double classValue = clRead.classifyInstance(data.get(0)); double[] predictionDistribution = clRead.distributionForInstance(data.get(0)); List <double> predictionDistributions = new List <double>(); for (int predictionDistributionIndex = 0; predictionDistributionIndex < predictionDistribution.Count(); predictionDistributionIndex++) { string classValueString1 = classLabel.get(predictionDistributionIndex).ToString(); double prob = predictionDistribution[predictionDistributionIndex] * 100; predictionDistributions.Add(prob); } List <double> prediction = new List <double>(); prediction.Add(classValue); prediction.AddRange(predictionDistributions); return(prediction); }
private Arquivo AvaliarWekaClassificacao(Arquivo _arquivoAvaliado) { // Importa o Classificador que foi salvo pelo treinamento FilteredClassifier classifier = (FilteredClassifier)ImportaClassificadorSalvo(); // Importa os Dados de Treinamento salvo pelo treinamento Instances dadosTreinamento = ImportaDadosTreinamentoSalvo(); dadosTreinamento.setClassIndex(dadosTreinamento.numAttributes() - 1); // Classe fica em último int numAttributes = dadosTreinamento.numAttributes(); // Cria a instância de teste Instance instance = new DenseInstance(numAttributes); instance.setDataset(dadosTreinamento); // Inicializa todos os atributos como valor zero. for (int i = 0; i < numAttributes; i++) { instance.setValue(i, 0); } // Insere o texto a ser avaliado no primeiro atributo for (int i = 0; i < numAttributes - 1; i++) { instance.setValue(i, _arquivoAvaliado.TextoFormatado); } // Indica que a Classe está faltando, para que a mesma possa ser classificada instance.setClassMissing(); // Classifica a instância de teste var resultado = ""; try { // Realiza a classificação da instância, retornando o resultado previsto var predicao = classifier.classifyInstance(instance); instance.setClassValue(predicao); // Realiza a tradução do resultado numérico na Classificação esperada resultado = dadosTreinamento.classAttribute().value((int)predicao); var distribuicao = classifier.distributionForInstance(instance); } catch (Exception) { throw (new ClassificationException("O texto não pode ser classificado quanto à sua qualidade.")); } // Atribui o resultado ao arquivo avaliado _arquivoAvaliado.Classificacao = resultado; return(_arquivoAvaliado); }
//Предобработка конкретного домена public static Instance GetInstanceForWeka(string domain, string labelClass, int numAttributes) { DenseInstance instance = new DenseInstance(numAttributes); //Выделение атрибутов instance.setValue(<индекс атрибута>, <значение>); //instance.setValue(0, domain.Length); InputLayerWrapperMulti wrapper = new InputLayerWrapperMulti(); wrapper.ConfigureDomainAttribute(domain); double[] values = wrapper.valuesAttributes; for (int i = 0; i < values.Length; i++) { instance.setValue(i, values[i]); } instance.setValue(numAttributes - 1, Convert.ToDouble(labelClass)); //Метка класса return(instance); }
public string testHybridEmotionUsingWeka(string[] attributeArray, string[] classNames, double[] dataValues, string classHeader, string defaultclass, string modelName) { java.util.ArrayList classLabel = new java.util.ArrayList(); foreach (string className in classNames) { classLabel.Add(className); } weka.core.Attribute classHeaderName = new weka.core.Attribute(classHeader, classLabel); java.util.ArrayList attributeList = new java.util.ArrayList(); foreach (string attribute in attributeArray) { weka.core.Attribute newAttribute = new weka.core.Attribute(attribute); attributeList.Add(newAttribute); } attributeList.add(classHeaderName); weka.core.Instances data = new weka.core.Instances("TestInstances", attributeList, 0); data.setClassIndex(data.numAttributes() - 1); // Set instance's values for the attributes weka.core.Instance inst_co = new DenseInstance(data.numAttributes()); for (int i = 0; i < data.numAttributes() - 1; i++) { inst_co.setValue(i, dataValues.ElementAt(i)); } inst_co.setValue(classHeaderName, defaultclass); data.add(inst_co); java.io.File path = new java.io.File("/models/"); weka.classifiers.meta.Bagging clRead = loadBaggingModel(modelName, path); weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(data); data = weka.filters.Filter.useFilter(data, myRandom); double classValue = clRead.classifyInstance(data.get(0)); string classValueString = classLabel.get(Int32.Parse(classValue.ToString())).ToString(); return(classValueString); }
/// <summary> /// 预测薪资 /// </summary> /// <param name="jobScore">职位得分</param> /// <param name="schoolScore">学校得分</param> /// <param name="degreeScore">学历得分</param> /// <param name="addrScore">地区得分</param> /// <param name="year">工作年限</param> /// <returns>预估薪资</returns> public double predicate(double jobScore, double schoolScore, double degreeScore, double addrScore, double year) { Instance inst = new DenseInstance(header.numAttributes()); inst.setDataset(header); // 职位得分 inst.setValue(0, jobScore); // 学校得分 inst.setValue(1, schoolScore); // 学历得分 inst.setValue(2, degreeScore); // 地区得分 inst.setValue(3, addrScore); // 工作年限 inst.setValue(4, year); // 薪资(待预测) inst.setValue(5, 0); return cl.classifyInstance(inst); }
private Instance createSingleWhyInstance(FastVector fvWhy, Token candidate) { //first word-n attribute number int wordsBeforeFirstAttributeNumber = 7; //first pos-n attribute number int posBeforeFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whyWordsBefore + whyWordsAfter; //word+1 attribute number int wordsAfterFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whyWordsBefore; //pos+1 attribute number int posAfterFirstAttributeNumber = posBeforeFirstAttributeNumber + whyWordsBefore; int totalAttributeCount = wordsBeforeFirstAttributeNumber + whyWordsBefore * 2 + whyWordsAfter * 2 + 1; Instance whyCandidate = new DenseInstance(totalAttributeCount); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(0), candidate.Value); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(1), candidate.Value.Split(' ').Count()); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(2), candidate.Sentence); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(3), candidate.Score); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(4), candidate.NumWho); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(5), candidate.NumWhen); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(6), candidate.NumWhere); for (int i = whyWordsBefore; i > 0; i--) { if (candidate.Position - i - 1 >= 0) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(whyWordsBefore - i + wordsBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].Value); if (articleCurrent[candidate.Position - i - 1].PartOfSpeech != null) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(whyWordsBefore - i + posBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].PartOfSpeech); } } } for (int i = 0; i < whyWordsAfter; i++) { if (candidate.Position + i < articleCurrent.Count) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(wordsAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].Value); if (articleCurrent[candidate.Position + i].PartOfSpeech != null) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(posAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].PartOfSpeech); } } } return(whyCandidate); }
public void Train(string text, IList<int> tags) { Instance ins = new DenseInstance(tagsNb+1); ins.setDataset(dataSet); ins.setValue(0, text); for (int i = 0; i < tagsNb; i++) { if (tags.Contains(i)) { ins.setValue(i + 1, 1); } else { ins.setValue(i + 1, 0); } } dataSet.add(ins); }
public IEnumerable<double> Tag(string text) { Instance ins = new DenseInstance(tagsNb+1); ins.setDataset(dataSet); ins.setValue(0, text); MultiLabelOutput mlo = cl.makePrediction(ins); List<double> outp = new List<Double>(); foreach (bool b in mlo.getBipartition()) { if (b) { outp.Add(1); } else { outp.Add(0); } } return outp; }
public static void CreateArffFiles() { java.util.ArrayList atts; java.util.ArrayList attsRel; java.util.ArrayList attVals; java.util.ArrayList attValsRel; Instances data; Instances dataRel; double[] vals; double[] valsRel; int i; // 1. set up attributes atts = new java.util.ArrayList(); // - numeric atts.Add(new weka.core.Attribute("att1")); // - nominal attVals = new java.util.ArrayList(); for (i = 0; i < 5; i++) { attVals.add("val" + (i + 1)); } weka.core.Attribute nominal = new weka.core.Attribute("att2", attVals); atts.add(nominal); // - string atts.add(new weka.core.Attribute("att3", (java.util.ArrayList)null)); // - date atts.add(new weka.core.Attribute("att4", "yyyy-MM-dd")); // - relational attsRel = new java.util.ArrayList(); // -- numeric attsRel.add(new weka.core.Attribute("att5.1")); // -- nominal attValsRel = new java.util.ArrayList(); for (i = 0; i < 5; i++) { attValsRel.Add("val5." + (i + 1)); } attsRel.add(new weka.core.Attribute("att5.2", attValsRel)); dataRel = new Instances("att5", attsRel, 0); atts.add(new weka.core.Attribute("att5", dataRel, 0)); // 2. create Instances object data = new Instances("MyRelation", atts, 0); // 3. fill with data // first instance vals = new double[data.numAttributes()]; // - numeric vals[0] = Math.PI; // - nominal vals[1] = attVals.indexOf("val3"); // - string vals[2] = data.attribute(2).addStringValue("This is a string!"); // - date vals[3] = data.attribute(3).parseDate("2001-11-09"); // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.PI + 1; valsRel[1] = attValsRel.indexOf("val5.3"); weka.core.Instance inst = new DenseInstance(2); inst.setValue(1, valsRel[0]); inst.setValue(1, valsRel[1]); dataRel.add(inst); // -- second instance valsRel = new double[2]; valsRel[0] = Math.PI + 2; valsRel[1] = attValsRel.indexOf("val5.2"); dataRel.add(inst); vals[4] = data.attribute(4).addRelation(dataRel); // add weka.core.Instance inst2 = new DenseInstance(4); inst2.setValue(1, vals[0]); inst2.setValue(1, vals[1]); inst2.setValue(1, vals[2]); inst2.setValue(1, vals[3]); data.add(inst2); // second instance vals = new double[data.numAttributes()]; // important: needs NEW array! // - numeric vals[0] = Math.E; // - nominal vals[1] = attVals.indexOf("val1"); // - string vals[2] = data.attribute(2).addStringValue("And another one!"); // - date vals[3] = data.attribute(3).parseDate("2000-12-01"); // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.E + 1; valsRel[1] = attValsRel.indexOf("val5.4"); dataRel.add(inst); // -- second instance valsRel = new double[2]; valsRel[0] = Math.E + 2; valsRel[1] = attValsRel.indexOf("val5.1"); dataRel.add(inst); vals[4] = data.attribute(4).addRelation(dataRel); // add data.add(inst2); data.setClassIndex(data.numAttributes() - 1); // 4. output data for (int x = 0; x < data.numInstances(); x++) { weka.core.Instance ins = data.instance(x); System.Console.WriteLine(ins.value(x).ToString()); } return; }
/// <summary> /// Converts Neuroph data set to Weka data set </summary> /// <param name="neurophDataset"> DataSet Neuroph data set </param> /// <returns> instances Weka data set </returns> public static Instances convertNeurophToWekaDataset(DataSet neurophDataset) { IDictionary <double[], string> classValues = getClassValues(neurophDataset); Instances instances = createEmptyWekaDataSet(neurophDataset.InputSize, neurophDataset.size(), classValues); int numInputs = neurophDataset.InputSize; // int numOutputs = neurophDataset.getOutputSize(); int numOutputs = 1; // why is this, and the above line is commented? probably because weka instances.ClassIndex = numInputs; IEnumerator <DataSetRow> iterator = neurophDataset.GetEnumerator(); while (iterator.MoveNext()) // iterate all dataset rows { DataSetRow row = iterator.Current; if (numOutputs > 0) // if it is supervised (has outputs) { Instance instance = new DenseInstance(numInputs + numOutputs); for (int i = 0; i < numInputs; i++) { instance.setValue(i, row.Input[i]); } instance.Dataset = instances; // set output attribute, as String and double value of class foreach (KeyValuePair <double[], string> entry in classValues) { if (entry.Value.Equals(row.Label)) { instance.setValue(numInputs, entry.Value); double[] rowDouble = row.DesiredOutput; for (int i = 0; i < rowDouble.Length; i++) { if (rowDouble[i] == 1) { instance.setValue(numInputs, i); } break; } break; } } instances.add(instance); } // if it is unsupervised - has only inputs else { // create new instance Instance instance = new DenseInstance(numInputs); // set all input values for (int i = 0; i < numInputs; i++) { instance.setValue(i, row.Input[i]); } // and add instance to weka dataset instance.Dataset = instances; instances.add(instance); } } return(instances); }
public List <double> testSMOUsingWeka(string[] attributeArray, string[] classNames, double[] dataValues, string classHeader, string defaultclass, string modelName, int hiddelLayers = 7, double learningRate = 0.03, double momentum = 0.4, int decimalPlaces = 2, int trainingTime = 1000) { java.util.ArrayList classLabel = new java.util.ArrayList(); foreach (string className in classNames) { classLabel.Add(className); } weka.core.Attribute classHeaderName = new weka.core.Attribute(classHeader, classLabel); java.util.ArrayList attributeList = new java.util.ArrayList(); foreach (string attribute in attributeArray) { weka.core.Attribute newAttribute = new weka.core.Attribute(attribute); attributeList.Add(newAttribute); } attributeList.add(classHeaderName); weka.core.Instances data = new weka.core.Instances("TestInstances", attributeList, 0); data.setClassIndex(data.numAttributes() - 1); // Set instance's values for the attributes weka.core.Instance inst_co = new DenseInstance(data.numAttributes()); for (int i = 0; i < data.numAttributes() - 1; i++) { inst_co.setValue(i, Math.Round(dataValues.ElementAt(i), 5)); } inst_co.setValue(classHeaderName, defaultclass); data.add(inst_co); weka.core.Instance currentInst = data.get(0); int j = 0; //foreach (float value in dataValues) //{ // // double roundedValue = Math.Round(value); // //var rounded = Math.Floor(value * 100) / 100; // if (array.ElementAt(j) != value) // { // System.Console.WriteLine("Masla occur"); // } // j++; //} //double predictedClass = cl.classifyInstance(data.get(0)); weka.classifiers.functions.SMO clRead = new weka.classifiers.functions.SMO(); try { java.io.File path = new java.io.File("/models/"); clRead = loadSMOModel(modelName, path); } catch (Exception e) { //string p1 = Assembly.GetExecutingAssembly().Location; string ClassifierName = Path.GetFileName(Path.GetFileName(modelName)); string Path1 = HostingEnvironment.MapPath(@"~//libs//models//" + ClassifierName); //string Path1 = HostingEnvironment.MapPath(@"~//libs//models//FusionCustomized.model"); clRead = (weka.classifiers.functions.SMO)weka.core.SerializationHelper.read(modelName); } // weka.classifiers.functions.SMO clRead = loadSMOModel(modelName, path); clRead.setBatchSize("100"); clRead.setCalibrator(new weka.classifiers.functions.Logistic()); clRead.setKernel(new weka.classifiers.functions.supportVector.PolyKernel()); clRead.setEpsilon(1.02E-12); clRead.setC(1.0); clRead.setDebug(false); clRead.setChecksTurnedOff(false); clRead.setFilterType(new SelectedTag(weka.classifiers.functions.SMO.FILTER_NORMALIZE, weka.classifiers.functions.SMO.TAGS_FILTER)); double classValue = clRead.classifyInstance(data.get(0)); double[] predictionDistribution = clRead.distributionForInstance(data.get(0)); //for (int predictionDistributionIndex = 0; // predictionDistributionIndex < predictionDistribution.Count(); // predictionDistributionIndex++) //{ // string classValueString1 = classLabel.get(predictionDistributionIndex).ToString(); // double prob= predictionDistribution[predictionDistributionIndex]*100; // System.Console.WriteLine(classValueString1 + ":" + prob); //} List <double> prediction = new List <double>(); prediction.Add(classValue); //prediction.AddRange(predictionDistribution); return(prediction); }
public IEnumerable<double> Tag(string text) { Instance ins = new DenseInstance(tagsNb + 1); ins.setDataset(oDataSet); ins.setValue(0, text); stv.input(ins); ins = stv.output(); MultiLabelOutput mlo = lps.makePrediction(ins); List<double> outp = new List<Double>(); int i = 0; foreach (bool b in mlo.getBipartition()) { if (b) { outp.Add(mlo.getConfidences()[i++]/2 + 0.5); } else { outp.Add(0.5 - mlo.getConfidences()[i++]/2); } } return outp; }
private Instance createSingleWhyInstance(FastVector fvWhy, Token candidate) { //first word-n attribute number int wordsBeforeFirstAttributeNumber = 7; //first pos-n attribute number int posBeforeFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whyWordsBefore + whyWordsAfter; //word+1 attribute number int wordsAfterFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whyWordsBefore; //pos+1 attribute number int posAfterFirstAttributeNumber = posBeforeFirstAttributeNumber + whyWordsBefore; int totalAttributeCount = wordsBeforeFirstAttributeNumber + whyWordsBefore * 2 + whyWordsAfter * 2 + 1; Instance whyCandidate = new DenseInstance(totalAttributeCount); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(0), candidate.Value); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(1), candidate.Value.Split(' ').Count()); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(2), candidate.Sentence); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(3), candidate.Score); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(4), candidate.NumWho); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(5), candidate.NumWhen); whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(6), candidate.NumWhere); for (int i = whyWordsBefore; i > 0; i--) { if (candidate.Position - i - 1 >= 0) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(whyWordsBefore - i + wordsBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].Value); if (articleCurrent[candidate.Position - i - 1].PartOfSpeech != null) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(whyWordsBefore - i + posBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].PartOfSpeech); } } } for (int i = 0; i < whyWordsAfter; i++) { if (candidate.Position + i < articleCurrent.Count) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(wordsAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].Value); if (articleCurrent[candidate.Position + i].PartOfSpeech != null) { whyCandidate.setValue((weka.core.Attribute)fvWhy.elementAt(posAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].PartOfSpeech); } } } return whyCandidate; }
private Instance createSingleWhoInstance(FastVector fvWho, Token candidate) { //first word-n attribute number int wordsBeforeFirstAttributeNumber = 6; //first pos-n attribute number int posBeforeFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whoWordsBefore + whoWordsAfter; //word+1 attribute number int wordsAfterFirstAttributeNumber = wordsBeforeFirstAttributeNumber + whoWordsBefore; //pos+1 attribute number int posAfterFirstAttributeNumber = posBeforeFirstAttributeNumber + whoWordsBefore; int totalAttributeCount = wordsBeforeFirstAttributeNumber + whoWordsBefore * 2 + whoWordsAfter * 2 + 1; Instance whoCandidate = new DenseInstance(totalAttributeCount); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(0), candidate.Value); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(1), candidate.Value.Split(' ').Count()); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(2), candidate.Sentence); whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(3), candidate.Position); double sentenceStartProximity = -1; foreach (List<Token> tokenList in segregatedArticleCurrent) { if (tokenList.Count > 0 && tokenList[0].Sentence == candidate.Sentence) { sentenceStartProximity = (double)(candidate.Position - tokenList[0].Position) / (double)tokenList.Count; break; } } if (sentenceStartProximity > -1) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(4), sentenceStartProximity); } whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(5), candidate.Frequency); for (int i = whoWordsBefore; i > 0; i--) { if (candidate.Position - i - 1 >= 0) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(whoWordsBefore - i + wordsBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].Value); if (articleCurrent[candidate.Position - i - 1].PartOfSpeech != null) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(whoWordsBefore - i + posBeforeFirstAttributeNumber), articleCurrent[candidate.Position - i - 1].PartOfSpeech); } } } for (int i = 0; i < whoWordsAfter; i++) { if (candidate.Position + i < articleCurrent.Count) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(wordsAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].Value); if (articleCurrent[candidate.Position + i].PartOfSpeech != null) { whoCandidate.setValue((weka.core.Attribute)fvWho.elementAt(posAfterFirstAttributeNumber + i), articleCurrent[candidate.Position + i].PartOfSpeech); } } } return whoCandidate; }