// Load data from original text file static void LoadFromDB() { MongoServer server = MongoServer.Create(); MongoDatabase db = server.GetDatabase("pubmed"); MongoCollection<BsonDocument> coll = db.GetCollection<BsonDocument>("patientcare"); MongoCursor cursor = coll.FindAll(); int count = 0; foreach (BsonDocument article in cursor) { BoWModel bowDoc = new BoWModel(); bowDoc.DocID = article["ArticleId"].AsString; if (!article["Title"].IsBsonNull) { SplitWords(article["Title"].AsString, bowDoc); } if (!article["MeshHeadings"].IsBsonNull) { foreach (BsonValue s in article["MeshHeadings"].AsBsonArray) { string s1 = s.AsString.Replace(".", ""); int classLabel = classLabelDict.GetValue(s1); bowDoc.AddClassLabel(classLabel); } } if (!article["AbstractTexts"].IsBsonNull) { foreach (BsonDocument s in article["AbstractTexts"].AsBsonArray) { if (!s["Value"].IsBsonNull) { SplitWords(s["Value"].AsString, bowDoc); } } } docModelDB.StoreToDB(bowDoc.StoreToDB()); count++; if (count % 1000 ==0) { Console.WriteLine("Loading {0} records", count); } } coll = null; db = null; server.Disconnect(); classLabelDict.StoreToDB(); wordDict.StoreToDB(); }
static void SplitWords(string s, BoWModel docModel) { string l = s.ToLower(); Match m; Regex r = new Regex(@"[a-zA-Z]+[0-9]*"); for (m = r.Match(l); m.Success; m = m.NextMatch()) { int word = wordDict.GetValue(Detachment.Instance.Detach(m.Value)); docModel.AddWord(word); } }
public override DocModel LoadFromDB(BsonDocument bsonDoc) { BoWModel docModel = new BoWModel(); return docModel.LoadFromDB(bsonDoc, wordDict); }