// private static void PopulateAllProfessionNearestNeighbors(int n) { MongoClient dbClient = new MongoClient(MongoStrings.CONNECTION); IMongoDatabase database = dbClient.GetDatabase(MongoStrings.GRAPH_DB); var edge_collection = database.GetCollection <BsonDocument>("edges_professions"); var destination_collection = database.GetCollection <BsonDocument>("nearest_neighbors_profession"); var ForkReport = new ONETReport(); ForkReport.MasterOccupationList = JSON_IO.Import_OccupationList(Helper.Publics.FILENAMES.OCCUPATIONS + ".txt"); Console.WriteLine("Iterating Through Occupations..."); foreach (Occupation o in ForkReport.MasterOccupationList) { Console.WriteLine(" ..." + o.Name); var occupationFilterA = Builders <BsonDocument> .Filter.Eq("OccupationAName", o.Name); var targetAdjacenciesA = edge_collection.Find(occupationFilterA).ToList(); var occupationFilterB = Builders <BsonDocument> .Filter.Eq("OccupationBName", o.Name); var targetAdjacenciesB = edge_collection.Find(occupationFilterB).ToList(); var MasterAdjacencyList = new List <SimpleOccupationEdge>(); //retrieve adjacencies from database foreach (var a in targetAdjacenciesA) { MasterAdjacencyList.Add(BsonSerializer.Deserialize <SimpleOccupationEdge>(a)); } foreach (var b in targetAdjacenciesB) { MasterAdjacencyList.Add(BsonSerializer.Deserialize <SimpleOccupationEdge>(b)); } ProfessionNearestNeighbors newNeighborList = new ProfessionNearestNeighbors(o.Name); //sort adjacencies foreach (AttributeType foo in Enum.GetValues(typeof(AttributeType))) { if (foo == AttributeType.Word2VecWIKI || foo == AttributeType.Skill || foo == AttributeType.Net || foo == AttributeType.Knowledge || foo == AttributeType.Ability) { List <SimpleOccupationEdge> SortedAdjacencyList = MasterAdjacencyList.OrderBy(oy => oy.getDistance(foo)).ToList(); var newList = SortedAdjacencyList.Take(n).Select(c => c.getOtherName(o.Name)).ToList(); newNeighborList.NearestNeighbors.Add(new Tuple <AttributeType, List <string> >(foo, newList)); } } destination_collection.InsertOne(newNeighborList.ToBsonDocument()); } }
//ETL the ONET data from origial ONET dowloadable db format to MongoDB private static void ONET_ETL_NoProfession() { var ForkReport = new ONETReport(); ForkReport.MasterOccupationList = JSON_IO.Import_OccupationList(Helper.Publics.FILENAMES.OCCUPATIONS + ".txt"); ForkReport.MasterSkillList = JSON_IO.Import_AttributeList(Helper.Publics.FILENAMES.SKILLS + ".txt"); ForkReport.MasterAbilityList = JSON_IO.Import_AttributeList(Helper.Publics.FILENAMES.ABILITIES + ".txt"); ForkReport.MasterKnowledgeList = JSON_IO.Import_AttributeList(Helper.Publics.FILENAMES.KNOWLEDGE + ".txt"); MongoClient dbClient = new MongoClient(MongoStrings.CONNECTION); IMongoDatabase database = dbClient.GetDatabase(MongoStrings.GRAPH_DB); var skill_collection = database.GetCollection <BsonDocument>("node_skill"); var ability_collection = database.GetCollection <BsonDocument>("node_ability"); // var profession_collection = database.GetCollection<BsonDocument>("node_profession"); var knowledge_collection = database.GetCollection <BsonDocument>("node_knowledge"); //add those top 5 occupations //save to DB var EmpInfoArray = new List <BsonDocument>(); foreach (Attribute j in ForkReport.MasterSkillList) { EmpInfoArray.Add(j.ToBsonDocument()); } skill_collection.InsertMany(EmpInfoArray); EmpInfoArray = new List <BsonDocument>(); foreach (Attribute j in ForkReport.MasterKnowledgeList) { EmpInfoArray.Add(j.ToBsonDocument()); } knowledge_collection.InsertMany(EmpInfoArray); EmpInfoArray = new List <BsonDocument>(); foreach (Attribute j in ForkReport.MasterAbilityList) { EmpInfoArray.Add(j.ToBsonDocument()); } ability_collection.InsertMany(EmpInfoArray); //EmpInfoArray = new List<BsonDocument>(); /*foreach (Occupation j in ForkReport.MasterOccupationList) * { * EmpInfoArray.Add(j.ToBsonDocument()); * } * profession_collection.InsertMany(EmpInfoArray); */ }
//ETL the profession info, add top adjacencies to profession node (this part is bad and needs to go) private static void ONET_ETL_Profession() { var ForkReport = new ONETReport(); ForkReport.MasterOccupationList = JSON_IO.Import_OccupationList(Helper.Publics.FILENAMES.OCCUPATIONS + ".txt"); ForkReport.MasterSkillList = JSON_IO.Import_AttributeList(Helper.Publics.FILENAMES.SKILLS + ".txt"); ForkReport.MasterAbilityList = JSON_IO.Import_AttributeList(Helper.Publics.FILENAMES.ABILITIES + ".txt"); ForkReport.MasterKnowledgeList = JSON_IO.Import_AttributeList(Helper.Publics.FILENAMES.KNOWLEDGE + ".txt"); MongoClient dbClient = new MongoClient(MongoStrings.CONNECTION); IMongoDatabase database = dbClient.GetDatabase(MongoStrings.GRAPH_DB); var profession_collection = database.GetCollection <BsonDocument>("node_profession"); //add those top 5 occupations var collection = database.GetCollection <BsonDocument>("edges_professions"); foreach (Occupation o in ForkReport.MasterOccupationList) { // var occupationFilter = Builders<BsonDocument>.Filter.ElemMatch<BsonValue>( // "OccupationAName", // // ); var occupationFilterA = Builders <BsonDocument> .Filter.Eq("OccupationAName", o.Name); var targetAdjacenciesA = collection.Find(occupationFilterA).ToList(); var occupationFilterB = Builders <BsonDocument> .Filter.Eq("OccupationBName", o.Name); var targetAdjacenciesB = collection.Find(occupationFilterB).ToList(); var MasterAdjacencyList = new List <SimpleOccupationEdge>(); //retrieve adjacencies from database foreach (var a in targetAdjacenciesA) { MasterAdjacencyList.Add(BsonSerializer.Deserialize <SimpleOccupationEdge>(a)); } foreach (var b in targetAdjacenciesB) { MasterAdjacencyList.Add(BsonSerializer.Deserialize <SimpleOccupationEdge>(b)); } //sort adjacencies List <SimpleOccupationEdge> SortedAdjacencyList = MasterAdjacencyList.OrderBy(oy => oy.getDistance()).ToList(); //save top adjacencies //REMOVING - NOT HOW WE ARE MANAGING THIS INFO //o.TenMostAdjacent = SortedAdjacencyList.Take(10).Select(c => c.getOtherName(o.Name)).ToList(); } //save to DB var EmpInfoArray = new List <BsonDocument>(); foreach (Occupation j in ForkReport.MasterOccupationList) { EmpInfoArray.Add(j.ToBsonDocument()); } profession_collection.InsertMany(EmpInfoArray); }