Exemplo n.º 1
0
        public void create_arff_file(string filename, List <element> _list, string template_id)
        {
            rules _r = new rules();

            _ht_res = _r.select_rules_comments(template_id);

            StreamWriter file;

            if (!File.Exists(filename))
            {
                file = new StreamWriter(filename, false);
                file.WriteLine("@relation class_relation");
                foreach (string item in deg_real)
                {
                    file.WriteLine("@attribute '" + item + "' real");
                }
                file.WriteLine("@attribute 'tagName' {div, td, li}");
                file.WriteLine("@attribute 'hastagORID' real");
                file.WriteLine("@attribute 'repeat_tag_count' real");
                file.WriteLine("@ATTRIBUTE class {0,1}");
                file.WriteLine("@data");
                file.Close();
            }

            file = new StreamWriter(filename, true);
            foreach (element _e in _list)
            {
                //int repeat_count = (int)_ht_tag_count[_e.tagName_Orginal];

                string line = _e.wordCount.ToString() + "," + _e.DensityinHTML.ToString() + "," + _e.LinkCount.ToString() + ","
                              + _e.wordCountinLink.ToString() + "," + _e.meanofWordinLinks.ToString() + "," + _e.meanofWordinLinksAllWords.ToString() + ","
                              + _e.h1_count.ToString() + "," + _e.h2_count.ToString() + "," + _e.h3_count.ToString() + ","
                              + _e.h4_count.ToString() + "," + _e.h5_count.ToString() + "," + _e.h6_count.ToString() + ","
                              + _e.p_count.ToString() + "," + _e.br_count.ToString() + "," + _e.span_count.ToString() + ","
                              + _e.object_count.ToString() + "," + _e.ul_count.ToString() + "," + _e.li_count.ToString() + ","
                              + _e.input_count.ToString() + "," + _e.div_count.ToString() + "," + _e.td_count.ToString() + ","
                              + _e.dot_count.ToString() + "," +
                              _e.wordCount_AE.ToString() + "," + _e.DensityinHTML_AE.ToString() + "," + _e.LinkCount_AE.ToString() + ","
                              + _e.wordCountinLink_AE.ToString() + "," + _e.meanofWordinLinks_AE.ToString() + "," + _e.meanofWordinLinksAllWords_AE.ToString() + ","
                              + _e.h1_count_AE.ToString() + "," + _e.h2_count_AE.ToString() + "," + _e.h3_count_AE.ToString() + ","
                              + _e.h4_count_AE.ToString() + "," + _e.h5_count_AE.ToString() + "," + _e.h6_count_AE.ToString() + ","
                              + _e.p_count_AE.ToString() + "," + _e.br_count_AE.ToString() + "," + _e.span_count_AE.ToString() + ","
                              + _e.object_count_AE.ToString() + "," + _e.ul_count_AE.ToString() + "," + _e.li_count_AE.ToString() + ","
                              + _e.dot_count_AE.ToString() + ","
                              + _e.input_count_AE.ToString() + "," + _e.div_count_AE.ToString() + "," + _e.td_count_AE.ToString() + ","
                              + _e.tagName + "," + _e.tag_idORclass + "," + _e.repeat_tag_count.ToString();

                string _cls = "0";
                if (_ht_res.ContainsKey(_e.tagName_Orginal))
                {
                    _cls = "1";
                }

                file.WriteLine(line + "," + _cls);
            }
            file.Close();
        }