예제 #1
0
 // generate a compact dictionary using TFIDF measure
 static void GenerateTFIDFDictionary()
 {
     InitDict();
     BoWModelDB docDB = new BoWModelDB(wordDict);
     docDB.LoadFromDB();
     docDB.GenerateTFIDFDictionary();
 }
예제 #2
0
        static void CompileDataSet()
        {
            InitTFIDFDict();
            BoWModelDB docDB = new BoWModelDB(tfidfDict);
            docDB.LoadFromDB();
            Dictionary<int, int> clsCounts = new Dictionary<int, int>();
            for (int i = 0; i < docDB.Count; i++)
            {
                if (((BoWModel)docDB[i]).ClassLabels != null)
                {
                    foreach (int cls in ((BoWModel)docDB[i]).ClassLabels)
                    {
                        int count = 0;
                        if (clsCounts.TryGetValue(cls, out count))
                            clsCounts[cls] = count + 1;
                        else
                            clsCounts.Add(cls, 1);
                    }
                }
            }

            StreamWriter writer = new StreamWriter(new FileStream("doc_set_cls_1000", FileMode.Create));
            for (int i = 0; i < docDB.Count; i++)
            {
                bool selected = false;
                if (((BoWModel)docDB[i]).ClassLabels != null)
                {
                    foreach (int cls in ((BoWModel)docDB[i]).ClassLabels)
                    {
                        if (clsCounts[cls] >= 1000)
                        {
                            selected = true;
                            break;
                        }
                    }
                }
                if (selected)
                {
                    writer.WriteLine(docDB[i].DocID);
                }
            }
            writer.Close();
        }
예제 #3
0
        static void Main(string[] args)
        {
            ClassLabelDictionary classLabelDict = new ClassLabelDictionary();
            classLabelDict.LoadFromDB();

            WordDictionary wordDict = new WordDictionary();
            wordDict.LoadFromDB();

            TFIDFDictionary tfidfDict = new TFIDFDictionary();
            tfidfDict.LoadFromDB();

            BoWModelDB docDB = new BoWModelDB(wordDict);
            docDB.LoadFromDBByDataSet("doc_set_cls_1000");

            PModel.PrecedenceModel pModel = new PModel.PrecedenceModel(tfidfDict, classLabelDict);
            pModel.DiscoverPrecedence();

            PrecedenceQuery pQuery = new PrecedenceQuery(pModel, wordDict, classLabelDict);
            pQuery.TestQuery(docDB);
        }