Пример #1
0
        // Load data from original text file
        static void LoadFromDB()
        {
            MongoServer server = MongoServer.Create();
            MongoDatabase db = server.GetDatabase("pubmed");
            MongoCollection<BsonDocument> coll = db.GetCollection<BsonDocument>("patientcare");
            MongoCursor cursor = coll.FindAll();

            int count = 0;
            foreach (BsonDocument article in cursor)
            {
                BoWModel bowDoc = new BoWModel();
                bowDoc.DocID = article["ArticleId"].AsString;
                if (!article["Title"].IsBsonNull)
                {
                    SplitWords(article["Title"].AsString, bowDoc);
                }
                if (!article["MeshHeadings"].IsBsonNull)
                {
                    foreach (BsonValue s in article["MeshHeadings"].AsBsonArray)
                    {
                        string s1 = s.AsString.Replace(".", "");
                        int classLabel = classLabelDict.GetValue(s1);
                        bowDoc.AddClassLabel(classLabel);
                    }
                }
                if (!article["AbstractTexts"].IsBsonNull)
                {
                    foreach (BsonDocument s in article["AbstractTexts"].AsBsonArray)
                    {
                        if (!s["Value"].IsBsonNull)
                        {
                            SplitWords(s["Value"].AsString, bowDoc);
                        }
                    }
                }

                docModelDB.StoreToDB(bowDoc.StoreToDB());

                count++;
                if (count % 1000 ==0)
                {
                    Console.WriteLine("Loading {0} records", count);
                }

            }
            coll = null;
            db = null;

            server.Disconnect();
            classLabelDict.StoreToDB();
            wordDict.StoreToDB();
        }
Пример #2
0
 static void SplitWords(string s, BoWModel docModel)
 {
     string l = s.ToLower();
     Match m;
     Regex r = new Regex(@"[a-zA-Z]+[0-9]*");
     for (m = r.Match(l); m.Success; m = m.NextMatch())
     {
         int word = wordDict.GetValue(Detachment.Instance.Detach(m.Value));
         docModel.AddWord(word);
     }
 }
Пример #3
0
 public override DocModel LoadFromDB(BsonDocument bsonDoc)
 {
     BoWModel docModel = new BoWModel();
     return docModel.LoadFromDB(bsonDoc, wordDict);
 }