Beispiel #1
0
        public void LoadTrainingFile(string path)
        {
            try
            {
                string line;
                // First File Lists
                line = "";
                bool     isFirst = true;
                string[] strCn   = new string[5];
                using (StreamReader reader = new StreamReader(path))
                {
                    while ((line = reader.ReadLine()) != null)
                    {
                        if (!String.IsNullOrWhiteSpace(line) && !String.IsNullOrEmpty(line))
                        {
                            string s = line.Replace(" =", "=").Replace("'", "\"");
                            line = s;
                            if (line.IndexOf("<Rule ") != -1)
                            {
                                string RuleClass = StringOperations.GetAttrVal("Category", line);
                                if (!DistinctClasses.ContainsKey(RuleClass))
                                {
                                    PredictedClassLabelAnalysis p = new PredictedClassLabelAnalysis();
                                    p.className = RuleClass;
                                    p.Rules_ClassCount++;
                                    DistinctClasses.Add(RuleClass, p);
                                }
                                else
                                {
                                    PredictedClassLabelAnalysis p = (PredictedClassLabelAnalysis)DistinctClasses[RuleClass];
                                    //p.className = RuleClass;
                                    p.Rules_ClassCount++;
                                    DistinctClasses[RuleClass] = p;
                                }
                                FirstFileAsSubsetsClasses.Add(line);
                            }
                            if (line.IndexOf("</Rule>") > -1)
                            {
                                FirstFileAsSubsets.Add((ArrayList)SubSet.Clone());
                                SubSet.Clear();
                            }
                            if (line.IndexOf("<Tuple") > -1)
                            {
                                if (!SubSet.Contains(line))
                                {
                                    SubSet.Add(line);
                                }
                            }
                            if (line.IndexOf("<Root") > -1)
                            {
                                NrOfInstances = Int32.Parse(StringOperations.GetAttrVal("M", line));
                                string z = StringOperations.GetAttrVal("CategoriesN", line);// In this section Get each category with nr of cases in training set
                                strCn = z.Split(',');
                                if (strCn.Length > 0)
                                {
                                    for (int ii = 0; ii < strCn.Length - 1; ii += 2)
                                    {
                                        CategoriesN.Add(strCn[ii], strCn[ii + 1]);
                                    }
                                }
                            }
                        }
                    }
                }
                for (int g = 0; g < strCn.Length - 1; g += 2)
                {
                    PredictedClassLabelAnalysis p = (PredictedClassLabelAnalysis)DistinctClasses[strCn[g]];
                    p.ds_ClassCount           = Int32.Parse(strCn[g + 1]);
                    DistinctClasses[strCn[g]] = p;
                }
                int NumberOfRules = FirstFileAsSubsetsClasses.Count;
                for (int i = 0; i < NumberOfRules; i++)
                {
                    //if (!FirstFileDs.Contains(FirstFileAsSubsetsClasses[i].ToString()))
                    //{
                    //    FirstFileDs.Add(FirstFileAsSubsetsClasses[i].ToString());
                    //}
                    for (int j = 0; j < ((ArrayList)FirstFileAsSubsets[i]).Count; j++)
                    {
                        if (!FirstFileFs.Contains(((ArrayList)FirstFileAsSubsets[i])[j].ToString()))
                        {
                            FirstFileFs.Add(((ArrayList)FirstFileAsSubsets[i])[j].ToString());
                        }
                    }
                }
                ArrayList tmpFs = new ArrayList();
                foreach (string x in FirstFileFs)                      // need to extract all features
                {
                    string y = StringOperations.GetAttrVal("Prop", x); // x.Substring(x.IndexOf("<Rule ") + ("<Rule ").Length + 2, x.IndexOf("=")-(x.IndexOf("<Rule ") + ("<Rule ").Length)-2);
                    string v = StringOperations.GetAttrVal("Val", x);

                    if (!tmpFs.Contains(y))
                    {
                        tmpFs.Add(y);
                        FeaturesCharacteristics fc = new FeaturesCharacteristics();
                        fc.FeatureName = y;
                        fc.FeatureValues.Add(v);
                        TrainingSetMetaData.Add(y, fc);
                    }
                    else
                    {
                        ((FeaturesCharacteristics)TrainingSetMetaData[y]).FeatureValues.Add(v);
                    }
                    if (((FeaturesCharacteristics)TrainingSetMetaData[y]).isContinuous != false)
                    {
                        if (IsNumber(v))
                        {
                            ((FeaturesCharacteristics)TrainingSetMetaData[y]).isContinuous = true;
                        }
                    }
                }

                foreach (DictionaryEntry de in TrainingSetMetaData)
                {
                    TrainingSetMetaDataList.Add((FeaturesCharacteristics)de.Value);
                }


                for (int i = 0; i < FirstFileAsSubsets.Count; i++)
                {
                    ArrayList tmp     = new ArrayList();
                    ArrayList itemset = new ArrayList();
                    tmp = (ArrayList)(FirstFileAsSubsets[i]);
                    foreach (string tuple in tmp)
                    {
                        int featureIndex = 0;
                        foreach (FeaturesCharacteristics fc in TrainingSetMetaDataList)
                        {
                            if (StringOperations.GetAttrVal("Prop", tuple) == fc.FeatureName) // recoginze feature value
                            {
                                int ValueIndex = 0;
                                foreach (string val in fc.FeatureValues)
                                {
                                    if (StringOperations.GetAttrVal("Val", tuple) == val)
                                    {
                                        itemset.Add(featureIndex.ToString() + "," + ValueIndex.ToString());
                                        break;
                                    }
                                    ValueIndex++;
                                }
                            }
                            featureIndex++;
                        }
                    }
                    TrainingSubsets.Add((ArrayList)itemset.Clone());
                }
            }
            catch
            { }
        }
Beispiel #2
0
        public void LoadTrainingFile(string path)
        {
            try
            {
                string line;
                // First File Lists
                line = "";
                bool isFirst = true;

                using (StreamReader reader = new StreamReader(path))
                {
                    while ((line = reader.ReadLine()) != null)
                    {
                        if (!String.IsNullOrWhiteSpace(line) && !String.IsNullOrEmpty(line))
                        {
                            string s = line.Replace(" =", "=").Replace("'", "\"");
                            line = s;
                            if (line.IndexOf("<Rule ") != -1)
                            {
                                FirstFileAsSubsetsClasses.Add(line);
                                if (isFirst)
                                {
                                    isFirst = false;
                                }
                                else
                                {
                                    FirstFileAsSubsets.Add((ArrayList)SubSet.Clone());
                                    SubSet.Clear();
                                }
                            }

                            if (line.IndexOf("<Tuple") > -1)
                            {
                                if (!SubSet.Contains(line))
                                {
                                    SubSet.Add(line);
                                }
                            }
                            // finished
                            if (line.IndexOf("</Root>") > -1)
                            {
                                FirstFileAsSubsets.Add((ArrayList)SubSet.Clone());
                                SubSet.Clear();
                            }
                        }
                    }
                }

                int NumberOfRules = FirstFileAsSubsetsClasses.Count;
                for (int i = 0; i < NumberOfRules; i++)
                {
                    if (!FirstFileDs.Contains(FirstFileAsSubsetsClasses[i].ToString()))
                    {
                        FirstFileDs.Add(FirstFileAsSubsetsClasses[i].ToString());
                    }
                    for (int j = 0; j < ((ArrayList)FirstFileAsSubsets[i]).Count; j++)
                    {
                        if (!FirstFileFs.Contains(((ArrayList)FirstFileAsSubsets[i])[j].ToString()))
                        {
                            FirstFileFs.Add(((ArrayList)FirstFileAsSubsets[i])[j].ToString());
                        }
                    }
                }
                ArrayList tmpFs = new ArrayList();
                foreach (string x in FirstFileFs)
                {
                    string y = x.Substring(x.IndexOf("<Rule ") + ("<Rule ").Length + 2, x.IndexOf("=") - (x.IndexOf("<Rule ") + ("<Rule ").Length) - 2);
                    if (!tmpFs.Contains(y))
                    {
                        tmpFs.Add(y);
                    }
                }
                foreach (string s in tmpFs)
                {
                    FeaturesCharacteristics fc = new FeaturesCharacteristics();
                    fc.FeatureName = s;
                    for (int i = 0; i < NumberOfRules; i++)
                    {
                        for (int j = 0; j < ((ArrayList)FirstFileAsSubsets[i]).Count; j++)
                        {
                            string cc = ((ArrayList)FirstFileAsSubsets[i])[j].ToString();
                            if (cc.Contains(" " + s + "="))
                            {
                                string h = cc.Substring(cc.IndexOf("=") + 2, cc.LastIndexOf("\"") - cc.IndexOf("=") - 2);
                                if (!fc.FeatureValues.Contains(h))
                                {
                                    fc.FeatureValues.Add(h.Trim());
                                }
                            }
                        }
                    }
                    foreach (string str in fc.FeatureValues)
                    {
                        if (str.Contains("=") || str.Contains("<") || str.Contains(">"))
                        {
                            fc.isContinuous = true;
                            break;
                        }
                    }
                    TrainingSetMetaData.Add(fc);
                }



                for (int i = 0; i < FirstFileAsSubsets.Count; i++)
                {
                    ArrayList tmp     = new ArrayList();
                    ArrayList itemset = new ArrayList();
                    tmp = (ArrayList)(FirstFileAsSubsets[i]);
                    foreach (string tuple in tmp)
                    {
                        int featureIndex = 0;
                        foreach (FeaturesCharacteristics fc in TrainingSetMetaData)
                        {
                            if (tuple.Contains(" " + fc.FeatureName + "="))
                            {
                                int ValueIndex = 0;
                                foreach (string val in fc.FeatureValues)
                                {
                                    if (tuple.Contains(val))
                                    {
                                        itemset.Add(featureIndex.ToString() + "," + ValueIndex.ToString());
                                    }
                                    ValueIndex++;
                                }
                            }
                            featureIndex++;
                        }
                    }
                    TrainingSubsets.Add((ArrayList)itemset.Clone());
                }
            }
            catch
            { }
        }