// generate a compact dictionary using TFIDF measure static void GenerateTFIDFDictionary() { InitDict(); BoWModelDB docDB = new BoWModelDB(wordDict); docDB.LoadFromDB(); docDB.GenerateTFIDFDictionary(); }
static void CompileDataSet() { InitTFIDFDict(); BoWModelDB docDB = new BoWModelDB(tfidfDict); docDB.LoadFromDB(); Dictionary<int, int> clsCounts = new Dictionary<int, int>(); for (int i = 0; i < docDB.Count; i++) { if (((BoWModel)docDB[i]).ClassLabels != null) { foreach (int cls in ((BoWModel)docDB[i]).ClassLabels) { int count = 0; if (clsCounts.TryGetValue(cls, out count)) clsCounts[cls] = count + 1; else clsCounts.Add(cls, 1); } } } StreamWriter writer = new StreamWriter(new FileStream("doc_set_cls_1000", FileMode.Create)); for (int i = 0; i < docDB.Count; i++) { bool selected = false; if (((BoWModel)docDB[i]).ClassLabels != null) { foreach (int cls in ((BoWModel)docDB[i]).ClassLabels) { if (clsCounts[cls] >= 1000) { selected = true; break; } } } if (selected) { writer.WriteLine(docDB[i].DocID); } } writer.Close(); }
static void Main(string[] args) { ClassLabelDictionary classLabelDict = new ClassLabelDictionary(); classLabelDict.LoadFromDB(); WordDictionary wordDict = new WordDictionary(); wordDict.LoadFromDB(); TFIDFDictionary tfidfDict = new TFIDFDictionary(); tfidfDict.LoadFromDB(); BoWModelDB docDB = new BoWModelDB(wordDict); docDB.LoadFromDBByDataSet("doc_set_cls_1000"); PModel.PrecedenceModel pModel = new PModel.PrecedenceModel(tfidfDict, classLabelDict); pModel.DiscoverPrecedence(); PrecedenceQuery pQuery = new PrecedenceQuery(pModel, wordDict, classLabelDict); pQuery.TestQuery(docDB); }