private FastVector createWhereFastVector() { FastVector fvWhere = new FastVector(5 + whereWordsBefore * 2 + whereWordsAfter * 2); fvWhere.addElement(new weka.core.Attribute("word", (FastVector)null)); fvWhere.addElement(new weka.core.Attribute("wordCount")); fvWhere.addElement(new weka.core.Attribute("sentence")); //fvWhere.addElement(new weka.core.Attribute("position")); //fvWhere.addElement(new weka.core.Attribute("sentenceStartProximity")); fvWhere.addElement(new weka.core.Attribute("wordScore")); for (int i = whereWordsBefore; i > 0; i--) { fvWhere.addElement(new weka.core.Attribute("word-" + i, (FastVector)null)); } for (int i = 1; i <= whereWordsAfter; i++) { fvWhere.addElement(new weka.core.Attribute("word+" + i, (FastVector)null)); } for (int i = whereWordsBefore; i > 0; i--) { fvWhere.addElement(new weka.core.Attribute("postag-" + i, fvPOS)); } for (int i = 1; i <= whereWordsAfter; i++) { fvWhere.addElement(new weka.core.Attribute("postag+" + i, fvPOS)); } FastVector fvClass = new FastVector(2); fvClass.addElement("yes"); fvClass.addElement("no"); fvWhere.addElement(new weka.core.Attribute("where", fvClass)); return(fvWhere); }
private FastVector createWhyFastVector() { FastVector fvWhy = new FastVector(8 + whyWordsBefore * 2 + whyWordsAfter * 2); fvWhy.addElement(new weka.core.Attribute("candidate", (FastVector)null)); fvWhy.addElement(new weka.core.Attribute("wordCount")); fvWhy.addElement(new weka.core.Attribute("sentence")); fvWhy.addElement(new weka.core.Attribute("candidateScore")); fvWhy.addElement(new weka.core.Attribute("numWho")); fvWhy.addElement(new weka.core.Attribute("numWhen")); fvWhy.addElement(new weka.core.Attribute("numWhere")); for (int i = whereWordsBefore; i > 0; i--) { fvWhy.addElement(new weka.core.Attribute("word-" + i, (FastVector)null)); } for (int i = 1; i <= whereWordsAfter; i++) { fvWhy.addElement(new weka.core.Attribute("word+" + i, (FastVector)null)); } for (int i = whyWordsBefore; i > 0; i--) { fvWhy.addElement(new weka.core.Attribute("postag-" + i, fvPOS)); } for (int i = 1; i <= whyWordsAfter; i++) { fvWhy.addElement(new weka.core.Attribute("postag+" + i, fvPOS)); } FastVector fvClass = new FastVector(2); fvClass.addElement("yes"); fvClass.addElement("no"); fvWhy.addElement(new weka.core.Attribute("why", fvClass)); return(fvWhy); }
private static Instances CreateInstanceOnFly(double[] a, double[] b) { FastVector atts; Instances data; double[] vals; // 1. set up attributes atts = new FastVector(); // - numeric atts.addElement(new Attribute("att1")); atts.addElement(new Attribute("att2")); // 2. create Instances object data = new Instances("MyRelation", atts, 0); for (int i = 0; i < a.Length; ++i) { // 3. fill with data // first instance vals = new double[data.numAttributes()]; // - numeric vals[0] = a[i]; // - nominal vals[1] = b[i]; data.add(new weka.core.DenseInstance(1.0, vals)); } return(data); }
public FastVector GetDataSetAtts() { if (_fvWekaAttributes != null) { return(_fvWekaAttributes); } // Declare features FastVector fvWekaAttributes = new FastVector(DomPool.SelectorFeatures.Count() + 1); foreach (Feature currFeature in DomPool.SelectorFeatures) { weka.core.Attribute feature = new weka.core.Attribute(currFeature.ToString()); fvWekaAttributes.addElement(feature); } // Declare the class attribute along with its values FastVector fvClassVal = new FastVector(2); fvClassVal.addElement("yes"); fvClassVal.addElement("no"); weka.core.Attribute ClassAttribute = new weka.core.Attribute("theClass", fvClassVal); // Declare the feature vector fvWekaAttributes.addElement(ClassAttribute); _fvWekaAttributes = fvWekaAttributes; return(_fvWekaAttributes); }
public Identifier(Boolean isAnnotated, WhyTrainer wt) { this.isAnnotated = isAnnotated; this.wt = wt; listWhoCandidates = new List <Candidate>(); listWhenCandidates = new List <Candidate>(); listWhereCandidates = new List <Candidate>(); listWhatCandidates = new List <List <Token> >(); listWhyCandidates = new List <List <Token> >(); listSecondaryWhyCandidates = new List <Candidate>(); fvPOS = new FastVector(Token.PartOfSpeechTags.Length); foreach (String POS in Token.PartOfSpeechTags) { fvPOS.addElement(POS); } whoClassifier = (Classifier)SerializationHelper.read(@"..\..\IdentifierModels\who.model"); whenClassifier = (Classifier)SerializationHelper.read(@"..\..\IdentifierModels\when.model"); whereClassifier = (Classifier)SerializationHelper.read(@"..\..\IdentifierModels\where.model"); whyClassifier = (Classifier)SerializationHelper.read(@"..\..\IdentifierModels\why.model"); initializeAnnotations(); }
private static Instances DefineBucketAttributes(int numBuckets) { FastVector attributes = new FastVector(); FastVector numberOfTimesPregnant = new FastVector(); FastVector plasmaGlucoseConcentration = new FastVector(); FastVector diastolicBloodPressure = new FastVector(); FastVector tricepsSkinFoldThickness = new FastVector(); FastVector twoHourSerumInsulin = new FastVector(); FastVector bmi = new FastVector(); FastVector diabetesPedigreeFunction = new FastVector(); FastVector age = new FastVector(); for (int i = 0; i < numBuckets; i++) { string attributeValueName = i.ToString(); numberOfTimesPregnant.addElement(attributeValueName); plasmaGlucoseConcentration.addElement(attributeValueName); diastolicBloodPressure.addElement(attributeValueName); tricepsSkinFoldThickness.addElement(attributeValueName); twoHourSerumInsulin.addElement(attributeValueName); bmi.addElement(attributeValueName); diabetesPedigreeFunction.addElement(attributeValueName); age.addElement(attributeValueName); } attributes.addElement(new weka.core.Attribute("numberOfTimesPregnant", numberOfTimesPregnant)); attributes.addElement(new weka.core.Attribute("plasmaGlucoseConcentration", plasmaGlucoseConcentration)); attributes.addElement(new weka.core.Attribute("diastolicBloodPressure", diastolicBloodPressure)); attributes.addElement(new weka.core.Attribute("tricepsSkinFoldThickness", tricepsSkinFoldThickness)); attributes.addElement(new weka.core.Attribute("twoHourSerumInsulin", twoHourSerumInsulin)); attributes.addElement(new weka.core.Attribute("bmi", bmi)); attributes.addElement(new weka.core.Attribute("diabetesPedigreeFunction", diabetesPedigreeFunction)); attributes.addElement(new weka.core.Attribute("age", age)); FastVector diabetes = new FastVector(); diabetes.addElement("0"); // negative diabetes.addElement("1"); // positive attributes.addElement(new weka.core.Attribute("diagnosis", diabetes)); Instances instances = new Instances("diabetes", attributes, 0); return(instances); }
private static FastVector PrepareFastVector() { var fastVector = new FastVector(_attributes.Count); foreach (var attribute in _attributes) { fastVector.addElement(attribute); } return(fastVector); }
/// <summary> /// Creates and returns empty weka data set </summary> /// <param name="numOfAttr"> int Number of attributes without class attribute </param> /// <param name="capacity"> int Capacity of sample </param> /// <returns> empty weka data set </returns> private static Instances createEmptyWekaDataSet(int numOfAttr, int capacity, IDictionary <double[], string> classValues) { //Vector for class attribute possible values FastVector fvClassVal = new FastVector(); //Map double value for every possible class value Hashtable classVals = new Dictionary <string, double?>(); //Map class label with double key value Hashtable classValsDoubleAsKey = new Dictionary <double?, string>(); //ind represents double value for class attribute int ind = 0; //loop through possible class values foreach (KeyValuePair <double[], string> values in classValues) { //add value to vector fvClassVal.addElement(values.Value); //map double value for class value classVals[values.Value] = new double?(ind); //map class label for double key value classValsDoubleAsKey[new double?(ind)] = values.Value; ind++; } //Class attribute with possible values Attribute classAttribute = new Attribute("theClass", fvClassVal, classValues.Count); //Creating attribute vector for Instances class instance FastVector fvWekaAttributes = new FastVector(numOfAttr + 1); //Fill vector with simple attributes for (int i = 0; i < numOfAttr; i++) { fvWekaAttributes.addElement(new Attribute(i + "", i)); } //Add class attribute to vector fvWekaAttributes.addElement(classAttribute); //newDataSet as Instances class instance Instances newDataSet = new Instances("newDataSet", fvWekaAttributes, capacity); return(newDataSet); }
/// /// <summary> * Sets the format of output instances. </summary> /// private void setOutputFormat() { // give names to the new attributes FastVector newAtts = new FastVector(); string foName = null; for (int i = 0; i < getInputFormat().numAttributes(); i++) { string attName = getInputFormat().attribute(i).name(); foName = "'FT " + attName.Replace('\'', ' ').Trim() + " (re)\'"; Attribute newAttribX = new Attribute(foName); newAtts.addElement(newAttribX); foName = "'FT " + attName.Replace('\'', ' ').Trim() + " (im)\'"; Attribute newAttribY = new Attribute(foName); newAtts.addElement(newAttribY); } setOutputFormat(new Instances(getInputFormat().relationName(), newAtts, getNumCoeffs())); }
//code taken from here http://stackoverflow.com/questions/9616872/classification-of-instances-in-weka/14876081#14876081 // This creates the data set's attributes vector public static FastVector CreateFastVector(int size) { var fv = new FastVector(); weka.core.Attribute att; foreach (int key in TrainingTesting_SharedVariables._trainTopIGFeatures) { if (key != TrainingTesting_SharedVariables._trainTopIGFeatures[TrainingTesting_SharedVariables._trainTopIGFeatures.Length - 1]) { att = new weka.core.Attribute("att_" + (key + 1).ToString()); fv.addElement(att); } } { var classValues = new FastVector(1); //it doesnt matter if its 3 or 1, when addElement is used the fastvector grows. List <string> labels = GuiPreferences.Instance.getLabels(); GuiPreferences.Instance.setLog("automatically! adding " + (labels.Count - 1).ToString() + " classes 2 -> " + (labels.Count + 1).ToString() + " to fast vector, based on protocol labels"); //baseline is ignored, we start from the second event in the protocol for (int l = 1; l < labels.Count; l++) { classValues.addElement((l + 1).ToString()); } //classValues.addElement("2"); //classValues.addElement("3"); //classValues.addElement("4"); //classValues.addElement("5"); var classAttribute = new weka.core.Attribute("class", classValues); fv.addElement(classAttribute); } return(fv); }
// Define all the attributes for the diabetes dataset private static Instances DefineCategoricalAttributes() { FastVector attributes = new FastVector(); FastVector numberOfTimesPregnant = new FastVector(); numberOfTimesPregnant.addElement("zero"); // 0 numberOfTimesPregnant.addElement("low"); // 1-4 numberOfTimesPregnant.addElement("medium"); // 5-9 numberOfTimesPregnant.addElement("high"); // 10+ attributes.addElement(new weka.core.Attribute("numberOfTimesPregnant", numberOfTimesPregnant)); FastVector plasmaGlucoseConcentration = new FastVector(); plasmaGlucoseConcentration.addElement("normal"); // < 140 plasmaGlucoseConcentration.addElement("high"); // >= 140 attributes.addElement(new weka.core.Attribute("plasmaGlucoseConcentration", plasmaGlucoseConcentration)); FastVector diastolicBloodPressure = new FastVector(); diastolicBloodPressure.addElement("low"); // < 60 diastolicBloodPressure.addElement("normal"); // 60 to 79 diastolicBloodPressure.addElement("pre-high"); // 80 to 89 diastolicBloodPressure.addElement("high"); // 90+ attributes.addElement(new weka.core.Attribute("diastolicBloodPressure", diastolicBloodPressure)); FastVector tricepsSkinFoldThickness = new FastVector(); tricepsSkinFoldThickness.addElement("low"); // < 4.5 tricepsSkinFoldThickness.addElement("normal"); // 4.5 to 36.5 tricepsSkinFoldThickness.addElement("high"); // > 36.5 attributes.addElement(new weka.core.Attribute("tricepsSkinFoldThickness", tricepsSkinFoldThickness)); FastVector twoHourSerumInsulin = new FastVector(); twoHourSerumInsulin.addElement("normal"); // < 166 twoHourSerumInsulin.addElement("high"); // >= 166 attributes.addElement(new weka.core.Attribute("twoHourSerumInsulin", twoHourSerumInsulin)); FastVector bmi = new FastVector(); bmi.addElement("underweight"); // < 18.5 bmi.addElement("normal"); // 18.5 to 25 bmi.addElement("overweight"); // 25 to 30 bmi.addElement("obese"); // 30+ attributes.addElement(new weka.core.Attribute("bmi", bmi)); FastVector diabetesPedigreeFunction = new FastVector(); diabetesPedigreeFunction.addElement("low"); // < 0.2 diabetesPedigreeFunction.addElement("normal"); // 0.2 to 0.7 diabetesPedigreeFunction.addElement("high"); // > 0.7 attributes.addElement(new weka.core.Attribute("diabetesPedigreeFunction", diabetesPedigreeFunction)); FastVector age = new FastVector(); age.addElement("young"); // under 30 age.addElement("middle"); // 30 to 50 inclusive age.addElement("old"); // over 50 attributes.addElement(new weka.core.Attribute("age", age)); FastVector diabetes = new FastVector(); diabetes.addElement("0"); // negative diabetes.addElement("1"); // positive attributes.addElement(new weka.core.Attribute("diagnosis", diabetes)); Instances instances = new Instances("diabetes", attributes, 0); return(instances); }