コード例 #1
0
        public void Run(IDictionary <string, string> args, ILogger logger)
        {
            var dataDirectory = args["dataDirectory"];
            var fileName      = args["fileName"];
            var collection    = args["collection"];
            var skip          = args.ContainsKey("skip") ? int.Parse(args["skip"]) : 0;
            var take          = args.ContainsKey("take") ? int.Parse(args["take"]) : int.MaxValue;
            var sampleSize    = args.ContainsKey("sampleSize") ? int.Parse(args["sampleSize"]) : 1000;
            var pageSize      = args.ContainsKey("pageSize") ? int.Parse(args["pageSize"]) : 100000;

            var collectionId  = collection.ToHash();
            var fieldsToStore = new HashSet <string> {
                "language", "wikibase_item", "title", "text", "url"
            };
            var fieldsToIndex = new HashSet <string> {
                "title", "text"
            };

            if (take == 0)
            {
                take = int.MaxValue;
            }

            var model   = new BagOfCharsModel();
            var payload = WikipediaHelper.ReadWP(fileName, skip, take, fieldsToStore, fieldsToIndex);

            using (var sessionFactory = new SessionFactory(dataDirectory, logger))
            {
                var debugger = new IndexDebugger(logger, sampleSize);

                using (var writeSession = new WriteSession(new DocumentWriter(collectionId, sessionFactory)))
                {
                    foreach (var page in payload.Batch(pageSize))
                    {
                        using (var indexStream = new WritableIndexStream(collectionId, sessionFactory, logger: logger))
                            using (var indexSession = new IndexSession <string>(model, model))
                            {
                                foreach (var document in page)
                                {
                                    writeSession.Put(document);

                                    foreach (var field in document.IndexableFields)
                                    {
                                        indexSession.Put(document.Id, field.KeyId, (string)field.Value);
                                    }

                                    debugger.Step(indexSession);
                                }

                                indexStream.Write(indexSession.GetInMemoryIndex());

                                //foreach (var column in indexSession.InMemoryIndex)
                                //{
                                //    Print($"wikipedia.{column.Key}", column.Value);
                                //}
                            }
                    }
                }
            }
        }
コード例 #2
0
        private static void WriteEmbeddings(
            SessionFactory sessionFactory,
            WriteSession writeSession,
            WordEmbeddingsSession indexSession,
            string fileName,
            string collection,
            IStringModel model,
            ILogger logger,
            string refFileName)
        {
            var documents         = ReadWatFile(fileName, refFileName);
            var collectionId      = collection.ToHash();
            var time              = Stopwatch.StartNew();
            var storedFieldNames  = new HashSet <string>();
            var indexedFieldNames = new HashSet <string>
            {
                "title", "description", "url"
            };

            sessionFactory.CreateWordEmbeddings(
                new WriteJob(
                    collectionId,
                    documents,
                    model,
                    storedFieldNames,
                    indexedFieldNames),
                writeSession,
                indexSession,
                reportSize: 1000);

            logger.LogInformation($"indexed {fileName} in {time.Elapsed}");
        }
コード例 #3
0
 public void FinalizeWriteSession()
 {
     if (m_WriteSession != null)
     {
         m_WriteSession.Dispose();
         m_WriteSession = null;
     }
 }
コード例 #4
0
        private async void GetLoginData(LoginRequest _credentialValue, object sender)
        {
            SetDialogHostStatus("True");
            SetStatusUpdate("Logging in...");
            for (int i = 0; i < 50; i++)
            {
                SetProgressBarValue(i);
            }
            //BeginLoading("Making Dummy Data", "Checking Credentials");
            SetStatusUpdate("Checking Permisson...");
            readLoginResponse = new ReadLoginResponse();
            LoginResponse loginResponse = await readLoginResponse.GetLoginAsync(_credentialValue);

            if (loginResponse.error == null)
            {
                for (int i = 50; i < 100; i++)
                {
                    SetProgressBarValue(i);
                }
                writeSession = new WriteSession();
                Session sessionData = new Session
                {
                    user_id       = loginResponse.result.res_user.id.ToString(),
                    user_name     = loginResponse.result.res_user.name,
                    user_login    = _credentialValue.username,
                    token         = loginResponse.result.token,
                    refresh_token = loginResponse.result.refresh_token,
                    token_live    = loginResponse.result.token_live.ToString()
                };
                writeSession.UpdateSession(sessionData);
                SetStatusUpdate("Logged in.");
                ConfigList     = readConfig.GetAllConfigs();
                IpAddressValue = ConfigList[0].current_ip;
                SetDialogHostStatus("False");
                SetProgressBarValue(0);
                UsernameValue = "";
                if (await readPosSession.GetPosSessionAsync(IpAddressValue) == null)
                {
                    ShowDepositWindow(sender);
                }
                else
                {
                    ShowMainWindow(sender);
                }
            }
            else
            {
                for (int i = 50; i < 100; i++)
                {
                    SetProgressBarValue(i);
                }
                MaterialMessageBox.ShowDialog(loginResponse.error.message, loginResponse.error.code.ToString(), MessageBoxButton.OK, PackIconKind.Error, PrimaryColor.LightBlue);
                UsernameValue = "";
                SetDialogHostStatus("False");
                SetProgressBarValue(0);
            }
            //EndLoading();
        }
コード例 #5
0
 public void InitializeWriteSession()
 {
     if (m_WriteSession != null)
     {
         this.FinalizeWriteSession();
     }
     this.OpenOrCreateShapeFile();
     m_WriteSession = new WriteSession(FeatureClass);
 }
コード例 #6
0
 public void InitializeWriteSession(bool a_Overwrite)
 {
     if (m_WriteSession != null)
     {
         this.FinalizeWriteSession();
     }
     this.OpenOrCreateResultsShapeFile();
     m_WriteSession = new WriteSession(ResultsFeatureClass, a_Overwrite);
 }
コード例 #7
0
        public void Can_search_filestreamed_with_multiple_pages()
        {
            var          model        = new BagOfCharsModel();
            const string collection   = "Can_search_streamed_with_one_page_per_document";
            var          collectionId = collection.ToHash();
            const string fieldName    = "description";

            _sessionFactory.Truncate(collectionId);

            using (var stream = new WritableIndexStream(collectionId, _sessionFactory))
                using (var writeSession = new WriteSession(new DocumentWriter(collectionId, _sessionFactory)))
                {
                    var keyId = writeSession.EnsureKeyExists(fieldName);

                    for (long i = 0; i < _data.Length; i++)
                    {
                        var data = _data[i];

                        using (var indexSession = new IndexSession <string>(model, model))
                        {
                            var doc = new Document(new Field[] { new Field(fieldName, data, index: true, store: true) });

                            writeSession.Put(doc);
                            indexSession.Put(doc.Id, keyId, data);
                            stream.Write(indexSession.GetInMemoryIndex());
                        }
                    }
                }

            var queryParser = new QueryParser <string>(_sessionFactory, model);

            using (var searchSession = new SearchSession(_sessionFactory, model, new PostingsReader(_sessionFactory)))
            {
                Assert.DoesNotThrow(() =>
                {
                    foreach (var word in _data)
                    {
                        var query    = queryParser.Parse(collection, word, fieldName, fieldName, and: true, or: false);
                        var result   = searchSession.Search(query, 0, 1);
                        var document = result.Documents.FirstOrDefault();

                        if (document == null)
                        {
                            throw new Exception($"unable to find {word}.");
                        }

                        if (document.Score < model.IdenticalAngle)
                        {
                            throw new Exception($"unable to score {word}.");
                        }

                        Debug.WriteLine($"{word} matched with {document.Score * 100}% certainty.");
                    }
                });
            }
        }
コード例 #8
0
        private static void RunMonoSyncTest(int entityCount)
        {
            Console.WriteLine();
            Console.WriteLine($"Benchmark with {entityCount} entities");

            // Initialization
            Console.Write("Initializing: ");
            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();
            var world          = new MonoSyncWorld();
            var syncSourceRoot = new SourceSynchronizerRoot(world);

            using (world.Entities.BeginMassUpdate())
            {
                for (int i = 0; i < entityCount; i++)
                {
                    world.Entities.Add(i, new Entity());
                }
            }
            stopwatch.Stop();
            Console.WriteLine(stopwatch.ElapsedMilliseconds + "MS");
            stopwatch.Reset();

            // Full write
            Console.Write("Full write: ");
            stopwatch.Start();


            WriteSession fullWriteSession = syncSourceRoot.BeginWrite();
            var          syncTargetRoot   = new TargetSynchronizerRoot <MonoSyncWorld>(fullWriteSession.WriteFull());

            fullWriteSession.Dispose();

            stopwatch.Stop();
            Console.WriteLine(stopwatch.ElapsedMilliseconds + "MS");
            stopwatch.Reset();

            int changes = entityCount / 10;

            Console.Write($"{changes} changes write: ");
            stopwatch.Start();

            for (int i = 0; i < changes; i++)
            {
                world.Entities[i].XPos = 2;
            }
            using (WriteSession writeSession = syncSourceRoot.BeginWrite())
            {
                var data = writeSession.WriteChanges().SetTick(TimeSpan.Zero);
                syncTargetRoot.Read(data);
            }
            stopwatch.Stop();
            Console.WriteLine(stopwatch.ElapsedMilliseconds + "MS");
        }
コード例 #9
0
        public void Run(IDictionary <string, string> args, ILogger logger)
        {
            var dataDirectory = args["dataDirectory"];
            var fileName      = args["fileName"];
            var collection    = args["collection"];
            var skip          = args.ContainsKey("skip") ? int.Parse(args["skip"]) : 0;
            var take          = args.ContainsKey("take") ? int.Parse(args["take"]) : int.MaxValue;
            var sampleSize    = args.ContainsKey("sampleSize") ? int.Parse(args["sampleSize"]) : 1000;
            var pageSize      = args.ContainsKey("pageSize") ? int.Parse(args["pageSize"]) : 100000;

            var collectionId  = collection.ToHash();
            var fieldsToStore = new HashSet <string> {
                "language", "wikibase_item", "title", "text"
            };
            var fieldsToIndex = new HashSet <string> {
                "language", "title", "text"
            };

            if (take == 0)
            {
                take = int.MaxValue;
            }

            var model    = new BagOfCharsModel();
            var payload  = WikipediaHelper.ReadWP(fileName, skip, take, fieldsToStore, fieldsToIndex);
            var debugger = new BatchDebugger(logger, sampleSize);

            using (var sessionFactory = new SessionFactory(dataDirectory, logger))
            {
                using (var writeSession = new WriteSession(new DocumentWriter(collectionId, sessionFactory)))
                {
                    foreach (var page in payload.Batch(pageSize))
                    {
                        using (var indexSession = new IndexSession <string>(model, model))
                        {
                            foreach (var document in page)
                            {
                                writeSession.Put(document);

                                foreach (var field in document.IndexableFields)
                                {
                                    foreach (var token in model.Tokenize((string)field.Value))
                                    {
                                        debugger.Step();
                                    }
                                }
                            }
                        }
                    }

                    logger.LogInformation($"tokenized {debugger.StepCount} in {debugger.Time}.");
                }
            }
        }
コード例 #10
0
ファイル: CCHelper.cs プロジェクト: theolivenbaum/resin
        public static void WriteWatSegment(
            string dataDirectory,
            string fileName,
            string collection,
            IModel <string> model,
            ILogger logger,
            string refFileName)
        {
            var time         = Stopwatch.StartNew();
            var collectionId = collection.ToHash();
            var storeFields  = new HashSet <string>
            {
                "title", "description", "url", "filename"
            };
            var indexFields = new HashSet <string>
            {
                "title", "description", "url"
            };

            using (var sessionFactory = new SessionFactory(dataDirectory, logger))
                using (var writeSession = new WriteSession(new DocumentWriter(collectionId, sessionFactory)))
                    using (var indexSession = new IndexSession <string>(model, model))
                    {
                        using (var queue = new ProducerConsumerQueue <Document>(document =>
                        {
                            sessionFactory.Write(document, writeSession, indexSession);
                        }))
                        {
                            foreach (var document in ReadWatFile(fileName, refFileName).Select(dic =>
                                                                                               new Document(
                                                                                                   dic.Select(kvp => new Field(
                                                                                                                  kvp.Key,
                                                                                                                  kvp.Value,
                                                                                                                  index: indexFields.Contains(kvp.Key),
                                                                                                                  store: storeFields.Contains(kvp.Key))).ToList())))
                            {
                                queue.Enqueue(document);
                            }
                        }

                        using (var stream = new WritableIndexStream(collectionId, sessionFactory, logger: logger))
                        {
                            stream.Write(indexSession.GetInMemoryIndex());
                        }
                    }

            logger.LogInformation($"indexed {fileName} in {time.Elapsed}");
        }
コード例 #11
0
        public void Run(IDictionary <string, string> args, ILogger logger)
        {
            var        time          = Stopwatch.StartNew();
            var        dataDirectory = args["dataDirectory"];
            var        collectionId  = args["collection"].ToHash();
            var        images        = new MnistReader(args["imageFileName"], args["labelFileName"]).Read();
            VectorNode tree;
            var        debugger = new IndexDebugger(logger);
            var        model    = new LinearClassifierImageModel();

            using (var sessionFactory = new SessionFactory(dataDirectory, logger))
            {
                sessionFactory.Truncate(collectionId);

                using (var writeSession = new WriteSession(new DocumentWriter(collectionId, sessionFactory)))
                    using (var indexSession = new IndexSession <IImage>(model, model))
                    {
                        var imageIndexId = writeSession.EnsureKeyExists("image");

                        foreach (var image in images)
                        {
                            var imageField = new Field("image", image.Pixels, index: true, store: true);
                            var labelField = new Field("label", image.Label, index: false, store: true);
                            var document   = new Document(new Field[] { imageField, labelField });

                            writeSession.Put(document);
                            indexSession.Put(document.Id, imageField.KeyId, image);

                            debugger.Step(indexSession);
                        }

                        var indices = indexSession.GetInMemoryIndex();

                        tree = indices[imageIndexId];

                        using (var stream = new WritableIndexStream(collectionId, sessionFactory, logger: logger))
                        {
                            stream.Write(indices);
                        }
                    }
            }

            logger.LogInformation($"indexed {debugger.Steps} mnist images in {time.Elapsed}");

            Print(tree);
        }
コード例 #12
0
        public void Run(IDictionary <string, string> args, ILogger logger)
        {
            var dataDirectory = args["dataDirectory"];
            var fileName      = args["fileName"];
            var collection    = args["collection"];
            var skip          = args.ContainsKey("skip") ? int.Parse(args["skip"]) : 0;
            var take          = args.ContainsKey("take") ? int.Parse(args["take"]) : int.MaxValue;
            var sampleSize    = args.ContainsKey("sampleSize") ? int.Parse(args["sampleSize"]) : 1000;

            var collectionId  = collection.ToHash();
            var fieldsToStore = new HashSet <string> {
                "language", "wikibase_item", "title", "text", "url"
            };
            var fieldsToIndex = new HashSet <string>();

            if (take == 0)
            {
                take = int.MaxValue;
            }

            var payload = WikipediaHelper.ReadWP(fileName, skip, take, fieldsToStore, fieldsToIndex);

            using (var sessionFactory = new SessionFactory(dataDirectory, logger))
            {
                sessionFactory.Truncate(collectionId);

                var debugger = new BatchDebugger(logger, sampleSize);

                using (var writeSession = new WriteSession(new DocumentWriter(collectionId, sessionFactory)))
                {
                    foreach (var document in payload)
                    {
                        writeSession.Put(document);

                        debugger.Step();
                    }
                }
            }
        }
コード例 #13
0
 public static byte[] WriteFullAndDispose(this SourceSynchronizerRoot SourceSynchronizerRoot)
 {
     using WriteSession session = SourceSynchronizerRoot.BeginWrite();
     return(session.WriteFull());
 }
コード例 #14
0
 public static SynchronizationPacket WriteChangesAndDispose(this SourceSynchronizerRoot SourceSynchronizerRoot)
 {
     using WriteSession session = SourceSynchronizerRoot.BeginWrite();
     return(session.WriteChanges());
 }
コード例 #15
0
 public static byte[] WriteFullAndDispose(this SyncSourceRoot syncSourceRoot)
 {
     using WriteSession session = syncSourceRoot.BeginWrite();
     return(session.WriteFull());
 }