Beispiel #1
0
        static void Main(string[] args)
        {
            bool import   = args.Length > 0 && args[0].ToLower() == "-import";
            bool dirExist = Directory.Exists(s_systemDir);

            SessionBase.ClearAllCachedObjectsWhenDetectingUpdatedDatabase = false;
            if (import || !dirExist)
            {
                if (dirExist)
                {
                    Directory.Delete(s_systemDir, true); // remove systemDir from prior runs and all its databases.
                }
                Directory.CreateDirectory(s_systemDir);
                using (SessionNoServer session = new SessionNoServer(s_systemDir))
                {
                    DataCache.MaximumMemoryUse = 12000000000;        // 12 GB, set this to what fits your case
                    SessionBase.BTreeAddFastTransientBatchSize = 10; // reduces memory usage
                    Vertex[] ratingVertices = new Vertex[10];
                    session.BeginUpdate();
                    session.DefaultDatabaseLocation().CompressPages = PageInfo.compressionKind.LZ4;
                    Graph g = new Graph(session);

                    // SCHEMA
                    VertexType   userType   = g.NewVertexType("User");
                    PropertyType genderType = userType.NewProperty("Gender", DataType.Integer, PropertyKind.Indexed);

                    VertexType   ratingType = g.NewVertexType("Rating");
                    PropertyType ratingValuePropertyType = ratingType.NewProperty("RatingValue", DataType.Integer, PropertyKind.Indexed);

                    EdgeType ratingEdgeType = g.NewEdgeType("UserToRating", true, userType, ratingType);

                    EdgeType     ratingOfType           = g.NewEdgeType("RatingOf", false, userType, userType);
                    PropertyType ratingEdgePropertyType = ratingOfType.NewProperty("Rating", DataType.Integer, PropertyKind.Indexed);

                    // DATA
                    using (FileStream stream = File.OpenRead(System.IO.Path.Combine(s_inputDataDir, "gender.dat")))
                    {
                        using (StreamReader file = new System.IO.StreamReader(stream))
                        {
                            string line;
                            int    lineNumber = 0;
                            while ((line = file.ReadLine()) != null)
                            {
                                lineNumber++;
                                string[] fields = line.Split(',');
                                Vertex   aUser  = userType.NewVertex();
                                aUser.SetProperty(genderType, (int)fields[1][0] == 'M' ? Gender.Male : fields[1][0] == 'F' ? Gender.Female : Gender.Unknown);
                            }
                            Console.WriteLine("Done importing " + lineNumber + " users");
                        }
                    }

                    using (FileStream stream = File.OpenRead(System.IO.Path.Combine(s_inputDataDir, "ratings.dat")))
                    {
                        using (StreamReader file = new System.IO.StreamReader(stream))
                        {
                            string line;
                            int    lineNumber = 0;
                            Vertex rater      = null;
                            int    raterId;
                            int    priorRaterId = -1;

                            while ((line = file.ReadLine()) != null)
                            {
                                lineNumber++;
                                if (lineNumber % 1000000 == 0)
                                {
                                    Console.WriteLine("Parsing rating # " + lineNumber);
                                }
                                string[] fields = line.Split(',');
                                raterId = int.Parse(fields[0]);
                                if (raterId != priorRaterId)
                                {
                                    rater = userType.GetVertex(raterId);
                                }
                                priorRaterId = raterId;
                                int    ratedId      = int.Parse(fields[1]);
                                int    rating       = int.Parse(fields[2]);
                                Vertex ratingVertex = ratingVertices[rating - 1];
                                if (ratingVertex == null)
                                {
                                    ratingVertex = ratingType.NewVertex();
                                    ratingVertex.SetProperty(ratingValuePropertyType, rating);
                                    ratingVertices[rating - 1] = ratingVertex;
                                }
                                Vertex rated     = userType.GetVertex(ratedId);
                                Edge   aRatingOf = ratingOfType.NewEdge(rater, rated);
                                aRatingOf.SetProperty(ratingEdgePropertyType, rating);
                                Edge userRating = ratingEdgeType.NewEdge(rated, ratingVertex);
                            }
                            Console.WriteLine("Done importing " + lineNumber + " ratings");
                        }
                    }
                    session.Commit();
                }
            }
            // Query
            using (SessionNoServer session = new SessionNoServer(s_systemDir))
            {
                session.BeginRead();
                Graph        g          = Graph.Open(session);
                VertexType   userType   = g.FindVertexType("User");
                PropertyType genderType = userType.FindProperty("Gender");

                VertexType   ratingType = g.FindVertexType("Rating");
                PropertyType ratingValuePropertyType = ratingType.FindProperty("RatingValue");

                EdgeType ratingEdgeType = g.FindEdgeType("UserToRating");

                EdgeType     ratingOfType           = g.FindEdgeType("RatingOf");
                PropertyType ratingEdgePropertyType = ratingOfType.FindProperty("Rating");
                // Complex queries
                int ct = 0;

// Given a user id, and based on the rated profiles, find other users who have rated similarly on the same profiles, and find other profiles to recommend based on what those other users have rated
                Vertex someUser       = userType.GetVertex(1);
                var    similarRatings = (from Edge e in someUser.GetEdges(ratingOfType, Direction.Out)
                                         from Edge edge in e.Head.GetEdges(ratingOfType, Direction.Out)
                                         where someUser != edge.Head
                                         where ((int)e.GetProperty(ratingEdgePropertyType) == (int)edge.GetProperty(ratingEdgePropertyType))
                                         select edge.Tail).Distinct();

                var someUserRated = from Edge e in someUser.GetEdges(ratingOfType, Direction.Out)
                                    select e.Head;

                var recommendedProfles = from v in similarRatings
                                         from Edge e in v.GetEdges(ratingOfType, Direction.Out)
                                         where someUserRated.Contains(e.Head) == false
                                         select e.Head;

                Console.WriteLine("Some user's rated profiles");
                ct = 0;
                foreach (Vertex v in someUserRated)
                {
                    if (ct++ % 50 == 0) // don't print them all !
                    {
                        Console.WriteLine("User id: " + v.VertexId);
                    }
                }
                Console.WriteLine("Number of some user's rated profiles: " + ct);

                Console.WriteLine("Given a user id, and based on the rated profiles, find other users who have rated similarly on the same profiles");
                ct = 0;
                foreach (Vertex v in similarRatings)
                {
                    if (ct++ % 50 == 0) // don't print them all !
                    {
                        Console.WriteLine("User id: " + v.VertexId);
                    }
                }
                Console.WriteLine("Number of matching profiles: " + ct);

                Console.WriteLine("Given a user id, and based on the rated profiles, find other users who have rated similarly on the same profiles, and find other profiles to recommend based on what those other users have rated");
                ct = 0;
                foreach (Vertex v in recommendedProfles)
                {
                    if (ct++ % 5000 == 0) // don't print them all !
                    {
                        Console.WriteLine("User id: " + v.VertexId);
                    }
                }
                Console.WriteLine("Number of matching profiles: " + ct);

// Get all female users with less than 50 ratings
                Console.WriteLine();
                var females = from u in userType.GetVertices()
                              where ((Gender)u.GetProperty(genderType)) == Gender.Female
                              select u;
                var femalesLess50Ratings = from f in females
                                           where f.GetNumberOfEdges(ratingEdgeType, Direction.Out) < 50
                                           select f;
                Console.WriteLine("Female users with less than 50 ratings");
                ct = 0;
                foreach (Vertex f in femalesLess50Ratings)
                {
                    long count = f.GetNumberOfEdges(ratingEdgeType, Direction.Out);
                    if (ct++ % 5000 == 0) // don't print them all !
                    {
                        Console.WriteLine("User id: " + f.VertexId + "\tnumber of ratings: " + count);
                    }
                }
                Console.WriteLine("Number of females with fewer than 50 ratings: " + ct);
                Console.WriteLine();

// Get all male users with at least one 10 rating
                Console.WriteLine();
                var rated10vertex = (from v in ratingType.GetVertices()
                                     where ((int)v.GetProperty(ratingValuePropertyType)) == 10
                                     select v).First();

                var rated10 = (from e in rated10vertex.GetEdges(ratingEdgeType, Direction.In)
                               select e.GetVertex(Direction.Out)).Distinct();

                var rated10male = from Vertex v in rated10
                                  where ((Gender)v.GetProperty(genderType)) == Gender.Male
                                  select v;

                Console.WriteLine("Males with at least one 10 rating");
                ct = 0;
                foreach (Vertex v in rated10male)
                {
                    if (ct++ % 5000 == 0) // don't print them all !
                    {
                        Console.WriteLine("User id: " + v.VertexId);
                    }
                }
                Console.WriteLine("Number of males with at least one 10 rating: " + ct);
                Console.WriteLine();

// Get the first 10 male users who have rated at least 3 of the same profiles as the given user.
                Console.WriteLine("10 male users who have rated at least 3 of the same profiles as the given user");
                var males = from u in userType.GetVertices()
                            where ((Gender)u.GetProperty(genderType)) == Gender.Male
                            select u;

                var someUserHasRated = from Edge o in someUser.GetEdges(ratingOfType, Direction.Out)
                                       select o.Head;

                var first10withSame3ratedAs = from m in males
                                              where (from Edge r in m.GetEdges(ratingOfType, Direction.Out) select r.Head).Intersect(someUserHasRated).ToArray().Length >= 3
                                              select m;
                ct = 0;
                foreach (Vertex v in first10withSame3ratedAs)
                {
                    if (++ct > 10)
                    {
                        break;
                    }
                    Console.WriteLine("User id: " + v.VertexId);
                }
                Console.WriteLine();

                // Statistical queries
// Get the 20 profiles with the most ratings
                var top20mostRatings = (from v in userType.GetVertices()
                                        orderby v.GetNumberOfEdges(ratingEdgeType, Direction.Out) descending
                                        select v).Take(20);
                Console.WriteLine("20 profiles with the most ratings");
                ct = 0;
                foreach (Vertex v in top20mostRatings)
                {
                    int count = (int)v.GetNumberOfEdges(ratingEdgeType, Direction.Out);
                    Console.WriteLine("User id: " + v.VertexId + "\tnumber of ratings: " + count);
                }
                Console.WriteLine();

                var ratingsVertexEnum = from v in ratingType.GetVertices() orderby v.GetProperty(ratingValuePropertyType) descending select v;

                Vertex rating10Vertex = ratingsVertexEnum.First();
// Get the 20 best rated profiles regardless of gender
                var top = from u in userType.GetVertices()
                          let edgeCt = u.GetNumberOfEdges(ratingEdgeType, rating10Vertex, Direction.Out)
                                       orderby edgeCt descending
                                       select new { u, edgeCt };

                Console.WriteLine("20 best rated profiles regardless of gender");
                ct = 0;
                foreach (var v in top)
                {
                    if (++ct > 20)
                    {
                        break;
                    }
                    Console.WriteLine("User id: " + v.u.VertexId + "\t10 ratings: " + v.edgeCt);
                }
                Console.WriteLine();

// Get the 20 best rated males
                Console.WriteLine("20 best rated male profiles");
                var top20males = from u in userType.GetVertices()
                                 where ((Gender)u.GetProperty(genderType)) == Gender.Male
                                 let edgeCt = u.GetNumberOfEdges(ratingEdgeType, rating10Vertex, Direction.Out)
                                              orderby edgeCt descending
                                              select new { u, edgeCt };

                ct = 0;
                foreach (var v in top20males)
                {
                    if (++ct > 20)
                    {
                        break;
                    }
                    Console.WriteLine("Male User id: " + v.u.VertexId + " \t10 ratings: " + v.edgeCt);
                }
                Console.WriteLine();

// Get the 20 best rated females
                Console.WriteLine("20 best rated female profiles");
                var top20females = from u in userType.GetVertices()
                                   where ((Gender)u.GetProperty(genderType)) == Gender.Female
                                   let edgeCt = u.GetNumberOfEdges(ratingEdgeType, rating10Vertex, Direction.Out)
                                                orderby edgeCt descending
                                                select new { u, edgeCt };
                ct = 0;
                foreach (var v in top20females)
                {
                    if (++ct > 20)
                    {
                        break;
                    }
                    Console.WriteLine("Female User id: " + v.u.VertexId + "\t10 ratings: " + v.edgeCt);
                }
                session.Commit();
            }
        }
Beispiel #2
0
 public void CreateEdges()
 {
     Create1Vertices(true);
     using (SessionNoServer session = new SessionNoServer(systemDir, 5000, false, true))
     {
         session.BeginUpdate();
         Graph      g                    = Graph.Open(session); // it takes a while to open graph fresh from databases
         VertexType userType             = g.FindVertexType("User");
         VertexType locationType         = g.FindVertexType("Location");
         EdgeType   friendEdgeType       = g.FindEdgeType("Friend");
         EdgeType   userLocationEdgeType = g.FindEdgeType("UserLocation");
         int        lineNumber           = 0;
         long       fiendsCt             = 0;
         foreach (string line in File.ReadLines(inputData))
         {
             string[] fields     = line.Split(' ');
             Vertex   aUser      = new Vertex(g, userType, ++lineNumber);
             int      locationCt = 1;
             foreach (string s in fields)
             {
                 if (s.Length > 0)
                 {
                     ++fiendsCt;
                     Vertex aFriend   = new Vertex(g, userType, int.Parse(s));
                     Vertex aLocation = new Vertex(g, locationType, locationCt++);
                     friendEdgeType.NewEdge(aUser, aFriend);
                     userLocationEdgeType.NewEdge(aUser, aLocation);
                 }
             }
             if (lineNumber >= 5000)
             {
                 break;
             }
         }
         Console.WriteLine("Done importing " + lineNumber + " users with " + fiendsCt + " friends");
         session.Commit();
         session.BeginRead();
         foreach (var x in session.AllObjects <BTreeSet <Range <VertexId> > >(false, true))
         {
             Assert.True(x.ToDoBatchAddCount == 0);
         }
         foreach (var x in session.AllObjects <BTreeSet <EdgeType> >(false, true))
         {
             Assert.True(x.ToDoBatchAddCount == 0);
         }
         foreach (var x in session.AllObjects <BTreeSet <EdgeIdVertexId> >(false, true))
         {
             Assert.True(x.ToDoBatchAddCount == 0);
         }
         foreach (var x in session.AllObjects <BTreeMap <EdgeId, VelocityDbList <ElementId> > >(false, true))
         {
             Assert.True(x.ToDoBatchAddCount == 0);
         }
         foreach (var x in session.AllObjects <BTreeMap <string, PropertyType> >(false, true))
         {
             Assert.True(x.ToDoBatchAddCount == 0);
         }
         foreach (var x in session.AllObjects <BTreeMap <string, EdgeType> >(false, true))
         {
             Assert.True(x.ToDoBatchAddCount == 0);
         }
         foreach (var x in session.AllObjects <BTreeMap <string, VertexType> >(false, true))
         {
             Assert.True(x.ToDoBatchAddCount == 0);
         }
         foreach (var x in session.AllObjects <BTreeMap <VertexId, BTreeSet <EdgeIdVertexId> > >(false, true))
         {
             Assert.True(x.ToDoBatchAddCount == 0);
         }
         foreach (var x in session.AllObjects <BTreeMap <VertexType, BTreeMap <VertexId, BTreeSet <EdgeIdVertexId> > > >(false, true))
         {
             Assert.True(x.ToDoBatchAddCount == 0);
         }
         foreach (var x in session.AllObjects <BTreeMap <EdgeType, BTreeMap <VertexType, BTreeMap <VertexId, BTreeSet <EdgeIdVertexId> > > > >(false, true))
         {
             Assert.True(x.ToDoBatchAddCount == 0);
         }
         session.Commit();
         Validate();
     }
 }
        static readonly string systemDir = "QuickStartVelocityGraph"; // appended to SessionBase.BaseDatabasePath

        static void CreateGraph()
        {
            using (SessionNoServer session = new SessionNoServer(systemDir))
            {
                if (Directory.Exists(session.SystemDirectory))
                {
                    Directory.Delete(session.SystemDirectory, true); // remove systemDir from prior runs and all its databases.
                }
                // Start an update transaction
                session.BeginUpdate();
                Graph g = new Graph(session);
                session.Persist(g);

                // Add a node type for the movies, with a unique identifier and two indexed Propertys
                VertexType   movieType      = g.NewVertexType("Movie");
                PropertyType movieTitleType = g.NewVertexProperty(movieType, "title", DataType.String, PropertyKind.Indexed);
                PropertyType movieYearType  = g.NewVertexProperty(movieType, "year", DataType.Integer, PropertyKind.Indexed);

                // Add a node type for the actor
                VertexType   actorType     = g.NewVertexType("Actor");
                PropertyType actorNameType = g.NewVertexProperty(actorType, "name", DataType.String, PropertyKind.Indexed);

                // Add a directed edge type with a Property for the cast of a movie
                EdgeType     castType          = g.NewEdgeType("ACTS_IN", false);
                PropertyType castCharacterType = g.NewEdgeProperty(castType, "role", DataType.String, PropertyKind.Indexed);

                // Add some Movies
                Vertex matrix1 = movieType.NewVertex();
                matrix1.SetProperty(movieTitleType, "The Matrix");
                matrix1.SetProperty(movieYearType, (int)1999);

                Vertex matrix2 = movieType.NewVertex();
                matrix2.SetProperty(movieTitleType, "The Matrix Reloaded");
                matrix2.SetProperty(movieYearType, (int)2003);

                Vertex matrix3 = movieType.NewVertex();
                matrix3.SetProperty(movieTitleType, "The Matrix  Revolutions");
                matrix3.SetProperty(movieYearType, (int)2003);

                // Add some Actors
                Vertex keanu = actorType.NewVertex();
                keanu.SetProperty(actorNameType, "Keanu Reeves");

                Vertex laurence = actorType.NewVertex();
                laurence.SetProperty(actorNameType, "Laurence Fishburne");

                Vertex carrieanne = actorType.NewVertex();
                carrieanne.SetProperty(actorNameType, "Carrie-Anne Moss");

                // Add some edges
                Edge keanuAsNeo = castType.NewEdge(keanu, matrix1);
                keanuAsNeo.SetProperty(castCharacterType, "Neo");
                keanuAsNeo = castType.NewEdge(keanu, matrix2);
                keanuAsNeo.SetProperty(castCharacterType, "Neo");
                keanuAsNeo = castType.NewEdge(keanu, matrix3);
                keanuAsNeo.SetProperty(castCharacterType, "Neo");

                Edge laurenceAsMorpheus = castType.NewEdge(laurence, matrix1);
                laurenceAsMorpheus.SetProperty(castCharacterType, "Morpheus");
                laurenceAsMorpheus = castType.NewEdge(laurence, matrix2);
                laurenceAsMorpheus.SetProperty(castCharacterType, "Morpheus");
                laurenceAsMorpheus = castType.NewEdge(laurence, matrix3);
                laurenceAsMorpheus.SetProperty(castCharacterType, "Morpheus");

                Edge carrieanneAsTrinity = castType.NewEdge(carrieanne, matrix1);
                carrieanneAsTrinity.SetProperty(castCharacterType, "Trinity");
                carrieanneAsTrinity = castType.NewEdge(carrieanne, matrix2);
                carrieanneAsTrinity.SetProperty(castCharacterType, "Trinity");
                carrieanneAsTrinity = castType.NewEdge(carrieanne, matrix3);
                carrieanneAsTrinity.SetProperty(castCharacterType, "Trinity");

                // Commit the transaction
                session.Commit();
            }
        }
Beispiel #4
0
        public void ingestData()
        {
            if (Directory.Exists(Path.Combine(SessionBase.BaseDatabasePath, s_systemDir)))
            {
                Directory.Delete(Path.Combine(SessionBase.BaseDatabasePath, s_systemDir), true); // remove systemDir from prior runs and all its databases.
            }
            Directory.CreateDirectory(Path.Combine(SessionBase.BaseDatabasePath, s_systemDir));

            using (SessionNoServer session = new SessionNoServer(s_systemDir, 5000, false, false, CacheEnum.No))
            {
                session.BeginUpdate();
                session.DefaultDatabaseLocation().CompressPages = PageInfo.compressionKind.LZ4;
                Graph g = new Graph(session);

                // SCHEMA
                VertexType userType = g.NewVertexType("User");

                EdgeType friendEdgeType = g.NewEdgeType("Friend", true, userType, userType);

                PropertyType countryProperty = userType.NewProperty("country", DataType.String, PropertyKind.NotIndexed);

                PropertyType incomeProperty = userType.NewProperty("income", DataType.Long, PropertyKind.NotIndexed);

                PropertyType friendshipStartProperty = friendEdgeType.NewProperty("start", DataType.DateTime, PropertyKind.NotIndexed);

                // DATA
                int  lineNumber = 0;
                long fiendsCt   = 0;
                int  stop       = (int)Math.Pow(2, 26); // make sure to create enough of these
                for (int i = 1; i < stop; i++)
                {
                    g.NewVertex(userType);
                }
                session.Commit();
                session.BeginUpdate();
                foreach (string line in File.ReadLines(s_inputData))
                {
                    if (++lineNumber % 10000 == 0)
                    {
                        Console.WriteLine("Parsing user " + lineNumber + ", friends total: " + fiendsCt + " at " + DateTime.Now);
                    }
                    string[] fields = line.Split(' ');
                    Vertex   aUser  = null;
                    foreach (string s in fields)
                    {
                        if (s.Length > 0)
                        {
                            if (aUser == null)
                            {
                                aUser = new Vertex(g, userType, int.Parse(s));
                            }
                            else
                            {
                                ++fiendsCt;
                                Vertex aFriend = new Vertex(g, userType, int.Parse(s));
                                if (fiendsCt % 2 == 0)
                                {
                                    aFriend.SetProperty(countryProperty, "Sweden"); // just some random stuff
                                }
                                else
                                {
                                    aFriend.SetProperty(incomeProperty, fiendsCt); // just some random stuff
                                }
                                Edge edge = friendEdgeType.NewEdge(aUser, aFriend);
                                if (fiendsCt % 2 == 0)
                                {
                                    edge.SetProperty(friendshipStartProperty, DateTime.Now);
                                }
                            }
                        }
                    }
                    if (DataCache.MaximumMemoryUse <= 27000000000)
                    {
                        if (lineNumber >= 100000) // remove this condition if you have time to wait a long while...
                        {
                            break;
                        }
                    }
                }
                Console.WriteLine("Done importing " + lineNumber + " users with " + fiendsCt + " friends");
                session.Commit();
            }
        }