Exemple #1
0
        public void PostTokenizationAssignment(TokenizationAssignment assignment)
        {
            if (assignment == null)
            {
                throw new ArgumentNullException(nameof(assignment));
            }

            nodeMessages.Enqueue(assignment);
        }
        public void PostTokenizationAssignment(TokenizationAssignment assignment)
        {
            if (assignment == null)
            {
                throw new ArgumentNullException(nameof(assignment));
            }

            assignmentsSubject.OnNext(assignment);
        }
        private void ExecuteTokenization(TokenizationAssignment assignment)
        {
            if (assignment == null)
            {
                throw new ArgumentNullException(nameof(assignment));
            }

            if (!File.Exists(assignment.Filepath))
            {
                throw new InvalidOperationException("Cannot tokenize a file that does not exist: " + assignment.Filepath);
            }

            Debug.WriteLine($"Node {Id} - Running tokenization on {assignment.Filepath}");

            Document tokenizedDocument;

            using (var fileStream = File.OpenRead(assignment.Filepath)) {
                var filename = Path.GetFileName(assignment.Filepath);
                tokenizedDocument = documentTokenizer.TokenizeStream(fileStream, filename);
            }

            messageSink.PostTokenizedDocument(tokenizedDocument);
        }
Exemple #4
0
        private async Task WorkLoopAsync()
        {
            var nodes = await nodesManager.CreateWorkerNodesAsync();

            workerNodes = nodes.ToArray();

            var nodesEnumerator = nodesStream().GetEnumerator();

            nodesEnumerator.MoveNext();

            var assignments = assignmentsStream();

            foreach (var assignmentFunction in assignments)
            {
                var assignedNode = nodesEnumerator.Current;
                var advanceNode  = await assignmentFunction(assignedNode);

                if (advanceNode)
                {
                    nodesEnumerator.MoveNext();
                }
            }

            await nodesManager.StopWorkerNodesAsync();

            IEnumerable <Func <int, Task <bool> > > assignmentsStream()
            {
                while (filepathsToTokenize.Any())
                {
                    var nextFilePath = filepathsToTokenize.Dequeue();

                    yield return((nodeId) => {
                        var assignment = new TokenizationAssignment(nodeId, nextFilePath);
                        messageSink.PostTokenizationAssignment(assignment);

                        return Task.FromResult(true);
                    });
                }

                yield return((nodeId) => tokenizationDoneTCS.Task.ContinueWith(_ => false));

                foreach (var node in workerNodes)
                {
                    yield return((nodeId) => {
                        var assignment = new ConfigureNormalizationAssignment(nodeId, generatedDocuments.Count(), termDocumentAppearances);
                        messageSink.PostConfigureNormalizationAssignment(assignment);

                        return Task.FromResult(true);
                    });
                }

                while (documentsToNormalize.Any())
                {
                    var nextDocument = documentsToNormalize.Dequeue();

                    yield return((nodeId) => {
                        var assignment = new NormalizationAssignment(nodeId, nextDocument);
                        messageSink.PostNormalizationAssignment(assignment);

                        return Task.FromResult(true);
                    });
                }

                yield break;
            }

            IEnumerable <int> nodesStream()
            {
                var currentNodeId = 0;

                while (true)
                {
                    currentNodeId++;
                    if (currentNodeId > workerNodes.Length)
                    {
                        currentNodeId = 0;
                    }

                    yield return(currentNodeId);
                }
            }
        }