static void CreateForwordIndex() { PanGu.Segment.Init(); MongodbAccess mongo = new MongodbAccess(); HashSet<string> words = new HashSet<string>(); HashSet<string> crawled_urls = mongo.GetCrawledURLs(); foreach (string url in crawled_urls) { WebPage page = mongo.GetWebPageByURL(url); if (page == null) continue; Dictionary<string, int> dict = ContetnWordSegment(page.content); ForwardIndexItem forwarditem = new ForwardIndexItem(); forwarditem.webpage_id = page._id; forwarditem.words = new MongoDB.Bson.BsonDocument(); foreach (string word in dict.Keys) { if (!_check(word)) continue; forwarditem.words.Add(new MongoDB.Bson.BsonElement(word, dict[word])); if (!words.Contains(word)) words.Add(word); } mongo.InsertForwardIndexItem(forwarditem); } Util.log("total {0} words.", words.Count); mongo.SaveWordDict(words); }
public void InsertForwardIndexItem(ForwardIndexItem fitem) { MongoCollection <ForwardIndexItem> collection = mongo_database.GetCollection <ForwardIndexItem>("forwardindex"); SafeModeResult smr = collection.Insert <ForwardIndexItem>(fitem); }
public void InsertForwardIndexItem(ForwardIndexItem fitem) { MongoCollection<ForwardIndexItem> collection = mongo_database.GetCollection<ForwardIndexItem>("forwardindex"); SafeModeResult smr = collection.Insert<ForwardIndexItem>(fitem); }