/// <summary> /// constructor for a faster init, that also enables to set the values of the dataset /// </summary> /// <param name="name"></param> /// <param name="set"></param> /// <param name="type"></param> public Story(string name, string[] set, EArffTypes type) { this.isNumeric = false; this.name = name; this.type = type; if (type == EArffTypes.ATTRIBUTE) { this.values = set; } else if (type == EArffTypes.DATA) { this.data = set; } else { throw new CekaException("can not set any values or dataset(elements) for the type " + type.ToString()); } }
/// <summary> /// constructor for a faster init, that also enables to set the values of the dataset /// </summary> /// <param name="name"></param> /// <param name="set"></param> /// <param name="type"></param> public Story(string name, string[] set, EArffTypes type) { this.isNumeric = false; this.name = name; this.type = type; if (type == EArffTypes.ATTRIBUTE) this.values = set; else if (type == EArffTypes.DATA) this.data = set; else throw new CekaException("can not set any values or dataset(elements) for the type " + type.ToString()); }
/// <summary> /// this function takes care of the definition of the ARFF File line (that is passed as parameter) and sets the value(s) and type of the Story /// </summary> /// <param name="al"></param> private void parseType(string al) { //making sure we sort out the empty or new lines first if (string.IsNullOrEmpty(al) || string.IsNullOrWhiteSpace(al) || al.Length <= 2) { this.value = ""; this.type = EArffTypes.NONE; return; } //next up is a fast check if the story might be a higher type if (al[0].Equals(ArffFile.ATT_PRE)) { //find out what kind of type it is string[] _data = al.Split(ArffFile.ARFF_SPACE); if (_data.Length <= 0) throw new CekaException("this story (" + al + ") seems to be an attribute" + " but there is something wrong with the whitespaces in your file."); //switch on behalf of the attribute's type //(just make sure everything is put in place here, the rest is done by the library later on) switch(_data[0]) { case ArffFile.ARFF_RELATION: this.type = EArffTypes.RELATION; //a relation just ends on the 2n index this.name = _data[1]; this.value = "relation-type"; break; case ArffFile.ARFF_ATTRIBUTE: this.type = EArffTypes.ATTRIBUTE; al = al.Replace(_data[0], ""); if (testForNumericContent(al)) { //value indicates that this attribute is a numeric or REAL type, parse it! parseNumeric(al); } else { //value and name have to be splitted filterNameAndValue(al); //to make sure we even drill down into the single elements of attributes values furtherSplitOfValue(); } break; case ArffFile.ARFF_DATA: this.type = EArffTypes.DATA_ATT; //beware of the difference between DATA_ATT and DATA! //nothing to do here, its just the end of the header section == @data this.value = "data-type"; this.name = "none"; break; default: throw new CekaException("this story (" + al + ") seemed to be alright, but the attribute " + "type (" + _data[0] + ") is unknown."); } } else { //it is no higher type, its a clean data-line if (al.Contains(ArffFile.ARFF_SPACE)) throw new CekaException("this story (data) (" + al + ") is including whitespaces."); //simply add the data of the line to this story this.data = al.Split(ArffFile.STORY_DELIMITTER); if (this.data.Length <= 0) throw new CekaException("this story (" + al + ") seems to be a " + "dataset, although its not using any '" + ArffFile.STORY_DELIMITTER + "'."); StringBuilder sb = null; //an remove any "/'" s (should be clean of whitespaces already) for (int i = 0; i < this.data.Length; i++) { sb = new StringBuilder(); foreach (char c in data[i]) { if (!c.Equals(ArffFile.ATT_NAME_COMB)) sb.Append(c); } data[i] = sb.ToString(); } this.type = EArffTypes.DATA; this.value = "data-value"; } }
/// <summary> /// this function takes care of the definition of the ARFF File line (that is passed as parameter) and sets the value(s) and type of the Story /// </summary> /// <param name="al"></param> private void parseType(string al) { //making sure we sort out the empty or new lines first if (string.IsNullOrEmpty(al) || string.IsNullOrWhiteSpace(al) || al.Length <= 2) { this.value = ""; this.type = EArffTypes.NONE; return; } //next up is a fast check if the story might be a higher type if (al[0].Equals(ArffFile.ATT_PRE)) { //find out what kind of type it is string[] _data = al.Split(ArffFile.ARFF_SPACE); if (_data.Length <= 0) { throw new CekaException("this story (" + al + ") seems to be an attribute" + " but there is something wrong with the whitespaces in your file."); } //switch on behalf of the attribute's type //(just make sure everything is put in place here, the rest is done by the library later on) switch (_data[0]) { case ArffFile.ARFF_RELATION: this.type = EArffTypes.RELATION; //a relation just ends on the 2n index this.name = _data[1]; this.value = "relation-type"; break; case ArffFile.ARFF_ATTRIBUTE: this.type = EArffTypes.ATTRIBUTE; al = al.Replace(_data[0], ""); if (testForNumericContent(al)) { //value indicates that this attribute is a numeric or REAL type, parse it! parseNumeric(al); } else { //value and name have to be splitted filterNameAndValue(al); //to make sure we even drill down into the single elements of attributes values furtherSplitOfValue(); } break; case ArffFile.ARFF_DATA: this.type = EArffTypes.DATA_ATT; //beware of the difference between DATA_ATT and DATA! //nothing to do here, its just the end of the header section == @data this.value = "data-type"; this.name = "none"; break; default: throw new CekaException("this story (" + al + ") seemed to be alright, but the attribute " + "type (" + _data[0] + ") is unknown."); } } else { //it is no higher type, its a clean data-line if (al.Contains(ArffFile.ARFF_SPACE)) { throw new CekaException("this story (data) (" + al + ") is including whitespaces."); } //simply add the data of the line to this story this.data = al.Split(ArffFile.STORY_DELIMITTER); if (this.data.Length <= 0) { throw new CekaException("this story (" + al + ") seems to be a " + "dataset, although its not using any '" + ArffFile.STORY_DELIMITTER + "'."); } StringBuilder sb = null; //an remove any "/'" s (should be clean of whitespaces already) for (int i = 0; i < this.data.Length; i++) { sb = new StringBuilder(); foreach (char c in data[i]) { if (!c.Equals(ArffFile.ATT_NAME_COMB)) { sb.Append(c); } } data[i] = sb.ToString(); } this.type = EArffTypes.DATA; this.value = "data-value"; } }