Example #1
0
        public static void WordCountExample()
        {
#if local
			// This overload runs the computation on your local computer using a single worker
            var config = new DryadLinqContext(1);

            var lines = new LineRecord[] { new LineRecord("This is a dummy line for a short job") };
            // You can create inputs from any IEnumerable source using this method
            var input = config.FromEnumerable(lines);
#else
			string clusterName = "Replace with your HDInsight 3.0 cluster name";
            // to use the davinci.txt example input below, select your cluster's default
            // storage account and container, which automatically includes the sample text
			string accountName = "Replace with a storage account name";
			string containerName = "Replace with a storage container name";

			// This overload creates an Azure-based computation
            var config = new DryadLinqContext(clusterName);
            config.JobFriendlyName = "DryadLINQ Sample Wordcount";

            // plain text files should be read as type LineRecord
			var input = config.FromStore<LineRecord>(AzureUtils.ToAzureUri(accountName, containerName,
					                                 "example/data/gutenberg/davinci.txt"));
#endif

            var words = input.SelectMany(x => x.Line.Split(' '));
            var groups = words.GroupBy(x => x);
            var counts = groups.Select(x => new KeyValuePair<string, int>(x.Key, x.Count()));
            var toOutput = counts.Select(x => new LineRecord(String.Format("{0}: {1}", x.Key, x.Value)));

#if local
            // any collection computed by the query can be materialized back at the client,
            // not just the 'output' collection. For large collections this is expensive!
            foreach (LineRecord line in toOutput)
            {
                Console.WriteLine(line.Line);
            }
#else
            // the 'true' parameter to ToStore means the output will be over-written if you run
            // the job more than once
            var info = toOutput.ToStore(AzureUtils.ToAzureUri(accountName, containerName,
			           "wc-out.txt"), true).SubmitAndWait();
#endif
        }
Example #2
0
        // Helper for IncrementalMeasureLinesAfterInsert, IncrementalMeasureLinesAfterDelete.
        // Formats line until we hit a synchronization point, a position where we know
        // following lines could not be affected by the change.
        private void SyncLineMetrics(DirtyTextRange range, double constraintWidth, LineProperties lineProperties, TextBoxLine line,
            bool endOfParagraph, int lineIndex, int lineOffset)
        {
            bool offsetSyncOk = (range.PositionsAdded == 0 || range.PositionsRemoved == 0);
            int lastCoveredCharOffset = range.StartIndex + Math.Max(range.PositionsAdded, range.PositionsRemoved);

            // Keep updating lines until we find a synchronized position.
            while (!endOfParagraph &&
                   (lineIndex == _lineMetrics.Count ||
                    !offsetSyncOk ||
                    lineOffset != _lineMetrics[lineIndex].Offset))
            {
                if (lineIndex < _lineMetrics.Count &&
                    lineOffset >= _lineMetrics[lineIndex].EndOffset)
                {
                    // If the current line offset starts past the current line metric offset,
                    // remove the metric.  This happens when the previous line
                    // frees up enough space to completely consume the following line.
                    // We can't simply replace the record without potentially missing our
                    // [....] position.
                    _lineMetrics.RemoveAt(lineIndex); // 
                    RemoveLineVisualRange(lineIndex, 1);
                }
                else
                {
                    using (line)
                    {
                        line.Format(lineOffset, constraintWidth, constraintWidth, lineProperties, _cache.TextRunCache, _cache.TextFormatter);

                        LineRecord record = new LineRecord(lineOffset, line);

                        if (lineIndex == _lineMetrics.Count ||
                            lineOffset + line.Length <= _lineMetrics[lineIndex].Offset)
                        {
                            // The new line preceeds the old line, insert a new record.

                            // 
                            _lineMetrics.Insert(lineIndex, record);
                            AddLineVisualPlaceholder(lineIndex);
                        }
                        else
                        {
                            // We expect to be colliding with the old line directly.
                            // If we extend past it, we're in danger of needlessly
                            // re-formatting the entire doc (ie, we miss the real
                            // [....] position and don't stop until EndOfParagraph).
                            Invariant.Assert(lineOffset < _lineMetrics[lineIndex].EndOffset);

                            _lineMetrics[lineIndex] = record;
                            ClearLineVisual(lineIndex);

                            // If this line ends past the invalidated region, and it
                            // has a hard line break, it's safe to synchronize on the next
                            // line metric with a matching start offset.
                            offsetSyncOk |= lastCoveredCharOffset <= record.EndOffset && line.HasLineBreak;
                        }

                        lineIndex++;
                        lineOffset += line.Length;
                        endOfParagraph = line.EndOfParagraph;
                    }
                }
            }

            // Remove any trailing lines that got absorbed into the new last line.
            if (endOfParagraph && lineIndex < _lineMetrics.Count)
            {
                int count = _lineMetrics.Count - lineIndex;
                _lineMetrics.RemoveRange(lineIndex, count);
                RemoveLineVisualRange(lineIndex, count);
            }
        }
Example #3
0
        // Helper for IncrementalMeasureLinesAfterInsert, IncrementalMeasureLinesAfterDelete.
        // Formats the line preceding the first directly affected line after a TextContainer change.
        // In general this line might grow as content in the following line is absorbed.
        private void FormatFirstIncrementalLine(int lineIndex, double constraintWidth, LineProperties lineProperties, TextBoxLine line,
            out int lineOffset, out bool endOfParagraph)
        {
            int originalEndOffset = _lineMetrics[lineIndex].EndOffset;
            lineOffset = _lineMetrics[lineIndex].Offset;

            using (line)
            {
                line.Format(lineOffset, constraintWidth, constraintWidth, lineProperties, _cache.TextRunCache, _cache.TextFormatter);

                _lineMetrics[lineIndex] = new LineRecord(lineOffset, line);

                lineOffset += line.Length;
                endOfParagraph = line.EndOfParagraph;
            }

            // Don't clear the cached Visual unless something changed.
            if (originalEndOffset != _lineMetrics[lineIndex].EndOffset)
            {
                ClearLineVisual(lineIndex);
            }
        }
Example #4
0
        // Measures content invalidated due to a TextContainer change.
        private void IncrementalMeasureLinesAfterDelete(double constraintWidth, LineProperties lineProperties, DirtyTextRange range, ref Size desiredSize)
        {
            int delta = range.PositionsAdded - range.PositionsRemoved;
            Invariant.Assert(delta < 0);

            int firstLineIndex = GetLineIndexFromOffset(range.StartIndex);

            // Clip the scope of the affected lines to the region of the document
            // we've already inspected.  Clipping happens when background layout
            // has not yet completed but an incremental update happens.
            int endOffset = range.StartIndex + -delta - 1;
            if (endOffset > _lineMetrics[_lineMetrics.Count - 1].EndOffset)
            {
                Invariant.Assert(this.IsBackgroundLayoutPending);
                endOffset = _lineMetrics[_lineMetrics.Count - 1].EndOffset;
                if (range.StartIndex == endOffset)
                {
                    // Nothing left to do until background layout runs.
                    return;
                }
            }

            int lastLineIndex = GetLineIndexFromOffset(endOffset);

            // Increment the offsets of all following lines.
            // 
            for (int i = lastLineIndex + 1; i < _lineMetrics.Count; i++)
            {
                _lineMetrics[i].Offset += delta;
            }

            TextBoxLine line = new TextBoxLine(this);
            int lineIndex = firstLineIndex;
            int lineOffset;
            bool endOfParagraph;

            // We need to re-format the previous line, because if someone inserted
            // a hard break, the first directly affected line might now be shorter
            // and mergeable with its predecessor.
            if (lineIndex > 0) // 
            {
                FormatFirstIncrementalLine(lineIndex - 1, constraintWidth, lineProperties, line, out lineOffset, out endOfParagraph);
            }
            else
            {
                lineOffset = _lineMetrics[lineIndex].Offset;
                endOfParagraph = false;
            }

            // 



            // Update the first affected line.  If it's completely covered, remove it entirely below.
            if (!endOfParagraph &&
                (range.StartIndex > lineOffset || range.StartIndex + -delta < _lineMetrics[lineIndex].EndOffset))
            {
                // Only part of the line is covered, reformat it.
                using (line)
                {
                    line.Format(lineOffset, constraintWidth, constraintWidth, lineProperties, _cache.TextRunCache, _cache.TextFormatter);

                    _lineMetrics[lineIndex] = new LineRecord(lineOffset, line);

                    lineOffset += line.Length;
                    endOfParagraph = line.EndOfParagraph;
                }
                ClearLineVisual(lineIndex);
                lineIndex++;
            }

            // Remove all the following lines that are completely covered.
            // 
            _lineMetrics.RemoveRange(lineIndex, lastLineIndex - lineIndex + 1);
            RemoveLineVisualRange(lineIndex, lastLineIndex - lineIndex + 1);

            // Recalc the following lines not directly affected as needed.
            SyncLineMetrics(range, constraintWidth, lineProperties, line, endOfParagraph, lineIndex, lineOffset);

            desiredSize = BruteForceCalculateDesiredSize();
        }
Example #5
0
        // Measures content invalidated due to a TextContainer change.
        private void IncrementalMeasureLinesAfterInsert(double constraintWidth, LineProperties lineProperties, DirtyTextRange range, ref Size desiredSize)
        {
            int delta = range.PositionsAdded - range.PositionsRemoved;
            Invariant.Assert(delta >= 0);
            
            int lineIndex = GetLineIndexFromOffset(range.StartIndex, LogicalDirection.Forward);

            if (delta > 0)
            {
                // Increment of the offsets of all following lines.
                // 
                for (int i = lineIndex + 1; i < _lineMetrics.Count; i++)
                {
                    _lineMetrics[i].Offset += delta;
                }
            }

            TextBoxLine line = new TextBoxLine(this);
            int lineOffset;
            bool endOfParagraph = false;

            // We need to re-format the previous line, because if someone inserted
            // a hard break, the first directly affected line might now be shorter
            // and mergeable with its predecessor.
            if (lineIndex > 0) // 
            {
                FormatFirstIncrementalLine(lineIndex - 1, constraintWidth, lineProperties, line, out lineOffset, out endOfParagraph);
            }
            else
            {
                lineOffset = _lineMetrics[lineIndex].Offset;
            }

            // Format the line directly affected by the change.
            // If endOfParagraph == true, then the line was absorbed into its
            // predessor (because its new content is thinner, or because the
            // TextWrapping property changed).
            if (!endOfParagraph)
            {
                using (line)
                {
                    line.Format(lineOffset, constraintWidth, constraintWidth, lineProperties, _cache.TextRunCache, _cache.TextFormatter);

                    _lineMetrics[lineIndex] = new LineRecord(lineOffset, line);

                    lineOffset += line.Length;
                    endOfParagraph = line.EndOfParagraph;
                }
                ClearLineVisual(lineIndex);
                lineIndex++;
            }

            // Recalc the following lines not directly affected as needed.
            SyncLineMetrics(range, constraintWidth, lineProperties, line, endOfParagraph, lineIndex, lineOffset);

            desiredSize = BruteForceCalculateDesiredSize();
        }
        public static void WordCountExample()
        {
#if local
			// This overload runs the computation on your local computer using a single worker
            var config = new DryadLinqContext(1);

            var lines = new LineRecord[] { new LineRecord("This is a dummy line for a short job") };
            // You can create inputs from any IEnumerable source using this method
            var input = config.FromEnumerable(lines);
#else
#if azure
			string clusterName = "Replace with your HDInsight 3.1 cluster name";
            // to use the davinci.txt example input below, select your cluster's default
            // storage account and container, which automatically includes the sample text
			string accountName = "Replace with a storage account name";
			string containerName = "Replace with a storage container name";

			// This overload creates an Azure-based computation
            var config = new DryadLinqContext(clusterName);
            config.JobFriendlyName = "DryadLINQ Sample Wordcount";

            // plain text files should be read as type LineRecord
			var input = config.FromStore<LineRecord>(Utils.ToAzureUri(accountName, containerName,
					                                 "example/data/gutenberg/davinci.txt"));
#else
            // to use a yarn cluster, fill in the username, resource node machine name and port, and name node and hdfs port below (use -1 for the default hdfs port).
            string user = "Replace with your username";
            string resourceNode = "Replace with the name of the computer your resource node is running on";
            int rmPort = 8088;
            string nameNode = "Replace with the name of the computer your name node is running on";
            int hdfsPort = -1;
            // set the YARN queue to submit your job on below. Leave null to use the default queue
            string queue = null;
            // set the number of worker containers to start for the DryadLINQ job below
            int numberOfWorkers = 2;
            // set the amount of memory requested for the DryadLINQ job manager container below: 8GB should be enough for even the largest jobs, and 2GB will normally suffice
            int amMemoryMB = 2000;
            // set the amount of memory requested for the DryadLINQ worker containers below. The amount needed will depend on the code you are running
            int workerMemoryMB = 8000;
			// This overload runs the computation on your local computer using a single worker
            var cluster = new DryadLinqYarnCluster(user, numberOfWorkers, amMemoryMB, workerMemoryMB, queue, resourceNode, rmPort, nameNode, hdfsPort);

            var config = new DryadLinqContext(cluster);

            var lines = new LineRecord[] { new LineRecord("This is a dummy line for a short job") };
            // You can create inputs from any IEnumerable source using this method
            var input = config.FromEnumerable(lines);
#endif
#endif

            var words = input.SelectMany(x => x.Line.Split(' '));
            var groups = words.GroupBy(x => x);
            var counts = groups.Select(x => new KeyValuePair<string, int>(x.Key, x.Count()));
            var toOutput = counts.Select(x => new LineRecord(String.Format("{0}: {1}", x.Key, x.Value)));

#if azure
            // the 'true' parameter to ToStore means the output will be over-written if you run
            // the job more than once
            var info = toOutput.ToStore(Utils.ToAzureUri(accountName, containerName,
			           "wc-out.txt"), true).SubmitAndWait();
#else
            // any collection computed by the query can be materialized back at the client,
            // not just the 'output' collection. For large collections this is expensive!
            foreach (LineRecord line in toOutput)
            {
                Console.WriteLine(line.Line);
            }
#endif
        }