Exemplo n.º 1
0
        private String8Set SplitRows(String8 block, PartialArray <int> rowPositionArray)
        {
            // Split the block into lines (and save the split for use splitting columns)
            _blockLines = block.Split(UTF8.Newline, _lineArray);

            // Reset where which line the next row begins with
            _nextLineIndex = 0;

            rowPositionArray.Clear();
            rowPositionArray.Add(0);

            for (int i = 0; i < _blockLines.Count - 1; ++i)
            {
                String8 line = _blockLines[i];

                // An empty line (or \n\r\n) indicates a new logical row
                if (line.Length == 0 || (line.Length == 1 && line[0] == UTF8.CR))
                {
                    rowPositionArray.Add(_lineArray[i + 1]);
                }
            }

            rowPositionArray.Add(block.Length + 1);

            return(new String8Set(block, 1, rowPositionArray));
        }
        public static string SplitAndJoin(string value)
        {
            String8 value8 = String8.Convert(value, new byte[String8.GetLength(value)]);

            PartialArray <int> boundaryArray = new PartialArray <int>();
            String8Set         set           = AlphanumericSplitter.Split(value8, ref boundaryArray);

            bool firstPart = true;

            StringBuilder result = new StringBuilder();

            using (StringWriter writer = new StringWriter(result))
            {
                for (int i = 0; i < set.Count; ++i)
                {
                    String8 part = set[i];
                    if (!part.IsEmpty() && AlphanumericSplitter.IsAlphaNumeric(part[0]))
                    {
                        if (!firstPart)
                        {
                            writer.Write("|");
                        }
                        firstPart = false;

                        part.WriteTo(writer);
                    }
                }
            }

            return(result.ToString());
        }
Exemplo n.º 3
0
        public static int FindByPath(ItemTree tree, StringStore strings, string path, char delimiter = '\\')
        {
            String8    path8      = String8.Convert(path, new byte[String8.GetLength(path)]);
            String8Set pathSplit8 = path8.Split(delimiter, new int[String8Set.GetLength(path8, delimiter)]);

            return(tree.FindByPath(0, pathSplit8, strings));
        }
Exemplo n.º 4
0
        public int FindByPath(int startIndex, String8Set path, StringStore strings)
        {
            int matchingIndex;

            if (TryFindByPath(startIndex, path, strings, out matchingIndex))
            {
                return(matchingIndex);
            }

            StringBuilder message = new StringBuilder();

            using (StringWriter writer = new StringWriter(message))
            {
                writer.Write("Could not find '");
                path.Value.WriteTo(writer);
                writer.WriteLine("' in tree.");

                int partsFound = this.GetDepth(matchingIndex) - this.GetDepth(startIndex);
                writer.Write("Could not find '");
                path[partsFound].WriteTo(writer);
                writer.Write("' under '");
                this.GetPath(matchingIndex, strings, '|').WriteTo(writer);
                writer.WriteLine("' with children:");
                WriteChildrenOf(writer, strings, matchingIndex);
                writer.WriteLine();
            }

            throw new TreeItemNotFoundException(message.ToString());
        }
        public void AlphanumericSplitter_Performance()
        {
            String8            code           = AllCodeText.AllCode8;
            String8Set         set            = default(String8Set);
            PartialArray <int> matchContainer = new PartialArray <int>();

            int iterations      = 10;
            int totalWordsSplit = 0;
            int validCount      = 0;

            // Split Goal: 75k per millisecond [75 MB/sec]
            Verify.PerformanceByBytes(75 * LongExtensions.Megabyte, () =>
            {
                for (int iteration = 0; iteration < iterations; ++iteration)
                {
                    set = AlphanumericSplitter.Split(code, ref matchContainer);

                    totalWordsSplit += set.Count;
                    if (set.Count > 0)
                    {
                        int matchIndex = AlphanumericSplitter.IsAlphaNumeric(set[0][0]) ? 0 : 1;
                        for (; matchIndex < set.Count; matchIndex += 2)
                        {
                            String8 word = set[matchIndex];
                            if (word.Length > 2)
                            {
                                validCount++;
                            }
                        }
                    }
                }

                return(iterations * code.Length);
            });
        }
Exemplo n.º 6
0
        public void Writer_WriteValidUsingAllOverloads(Stream stream, Func <Stream, ITabularWriter> buildWriter)
        {
            String8Set names = String8Set.Split(String8.Convert("Jeff,Bill,Todd,\\Barry\\", new byte[30]), UTF8.Comma, new int[5]);

            using (ITabularWriter w = buildWriter(stream))
            {
                Assert.AreEqual(0, w.RowCountWritten);
                w.SetColumns(new string[] { "ID", "IsEven", "Backslash", "Today", "Name", "Description" });
                Assert.AreEqual(0, w.RowCountWritten);

                for (int i = 0; i < 10; ++i)
                {
                    w.Write(i);
                    w.Write(i % 2 == 0);
                    w.Write(UTF8.Backslash);
                    w.Write(new DateTime(2017, 05, 03, 0, 0, 0, DateTimeKind.Utc));
                    w.Write(names[i % names.Count]);

                    w.WriteValueStart();
                    w.WriteValuePart(i + 1);
                    w.WriteValuePart(i % 2 == 1);
                    w.WriteValuePart(UTF8.Quote);
                    w.WriteValuePart(new DateTime(2017, 05, 01, 0, 0, 0, DateTimeKind.Utc));
                    w.WriteValuePart(names[i % names.Count]);
                    w.WriteValueEnd();

                    Assert.AreEqual(i, w.RowCountWritten);
                    w.NextRow();
                    Assert.AreEqual(i + 1, w.RowCountWritten);

                    Assert.AreEqual(stream.Position, w.BytesWritten);
                }
            }
        }
Exemplo n.º 7
0
        public void String8_ShiftBack()
        {
            String8Block block = new String8Block();

            // Goal: Split on semi-colon, collapse semi-colon and spaces in-place
            String8 shiftable  = "One; Two;Three; Four".TestConvert();
            int     totalShift = 0;

            String8Set parts = shiftable.Split(UTF8.Semicolon, new PartialArray <int>(5, false));

            for (int i = 0; i < parts.Count; ++i)
            {
                String8 part = parts[i];

                totalShift++;
                if (part.StartsWith(UTF8.Space))
                {
                    part = part.Substring(1);
                    totalShift++;
                }

                String8 beforeShift = block.GetCopy(part);
                String8 shifted     = part.ShiftBack(totalShift);
                Assert.AreEqual(beforeShift, shifted);
            }

            String8 result = shiftable.Substring(0, shiftable.Length - totalShift);

            Assert.AreNotEqual("OneTwoThreeFour", result.ToString());
        }
Exemplo n.º 8
0
        /// <summary>
        ///  NextBlock is called by NextRow before reading the last row in _currentBlock.
        ///  Since the file is read in blocks, the last row is usually incomplete.
        ///
        ///  If there's more file content, NextBlock should copy the last row to the start
        ///  of the buffer, read more content, and reset _currentBlock to the new split rows
        ///  and _nextRowIndexInBlock to zero (telling NextRow to read that row next).
        ///
        ///  If there's no more file, the last row is complete. NextBlock must return
        ///  without changing _currentBlock or _nextRowIndexInBlock to tell NextRow it's safe
        ///  to return to the user.
        ///
        ///  NextRow will call NextBlock *again* after the last row. NextBlock must again
        ///  not change anything to tell NextRow that there's nothing left.
        ///
        ///  So, NextBlock must:
        ///   - Copy the last row to the start of the buffer (if not already there)
        ///   - Read more content to fill the buffer
        ///   - Split the buffer into rows
        ///   - Stop at end-of-file or when a full row was read
        ///   - Double the buffer until one of these conditions is met
        ///
        ///   - Reset nextRowInIndexBlock *only if* a row was shifted or read
        /// </summary>
        private void NextBlock()
        {
            int bufferLengthFilledStart = 0;

            // Copy the last row to the start of the buffer (if not already there)
            if (_currentBlock.Count > 1)
            {
                String8 lastRow = _currentBlock[_currentBlock.Count - 1];
                lastRow.WriteTo(_buffer, 0);
                bufferLengthFilledStart = lastRow.Length;

                // Reset the next row to read (since we shifted a row)
                _nextRowIndexInBlock = 0;
            }

            int bufferLengthFilled = bufferLengthFilledStart;

            while (true)
            {
                // Read more content to fill the buffer
                bufferLengthFilled += _stream.Read(_buffer, bufferLengthFilled, _buffer.Length - bufferLengthFilled);

                String8 block = new String8(_buffer, 0, bufferLengthFilled);

                // Strip leading UTF8 BOM, if found, on first block
                if (_stream.Position == bufferLengthFilled)
                {
                    if (block.Length >= 3 && block[0] == 0xEF && block[1] == 0xBB && block[2] == 0xBF)
                    {
                        block = block.Substring(3);
                    }
                }

                // Split the buffer into rows
                _currentBlock = _splitRows(block, _rowPositionArray);

                // Stop at end-of-file (read didn't fill buffer)
                if (bufferLengthFilled < _buffer.Length)
                {
                    break;
                }

                // Stop when a full row was read (split found at least two parts)
                if (_currentBlock.Count > 1)
                {
                    break;
                }

                // Otherwise, double the buffer (until a full row or end of file)
                byte[] newBuffer = new byte[_buffer.Length * 2];
                _buffer.CopyTo(newBuffer, 0);
                _buffer = newBuffer;
            }

            // If we read new content, reset the next row to read
            if (bufferLengthFilled > bufferLengthFilledStart)
            {
                _nextRowIndexInBlock = 0;
            }
        }
Exemplo n.º 9
0
        public bool TryFindByPath(int startIndex, String8Set path, StringStore strings, out int matchingIndex)
        {
            // If we find nothing we should return 0 (the root was the last matching thing we found)
            matchingIndex = 0;

            return(TryFindByPath(startIndex, 0, path, strings, ref matchingIndex));
        }
Exemplo n.º 10
0
        /// <summary>
        ///  Move the reader to the next row. This must be called before
        ///  reading the first row.
        /// </summary>
        /// <returns>True if another row exists, False if the TSV is out of content</returns>
        public virtual bool NextRow()
        {
            String8 row = _reader.NextRow();

            if (row.IsEmpty())
            {
                return(false);
            }

            // Split the line into cells
            _currentRowColumns = SplitCells(row, _cellPositionArray);

            this.RowCountRead++;

            // Allocate a set of reusable String8TabularValues to avoid per-cell-value allocation or boxing.
            if (_valueBoxes == null || _valueBoxes.Length < _currentRowColumns.Count)
            {
                _valueBoxes = new String8TabularValue[_currentRowColumns.Count];

                for (int i = 0; i < _valueBoxes.Length; ++i)
                {
                    _valueBoxes[i] = new String8TabularValue();
                }
            }

            return(true);
        }
Exemplo n.º 11
0
        private string CsvSplitAndJoin(string value)
        {
            String8    value8  = String8.Convert(value, new byte[String8.GetLength(value)]);
            String8Set set     = value8.SplitAndDecodeCsvCells(new PartialArray <int>());
            String8    joined8 = set.Join(UTF8.Pipe, new byte[set.Value.Length]);

            return(joined8.ToString());
        }
Exemplo n.º 12
0
        private string SplitOutsideQuotesAndJoin(string value, byte delimiter)
        {
            String8    value8  = String8.Convert(value, new byte[String8.GetLength(value)]);
            String8Set set     = value8.SplitOutsideQuotes(delimiter, new PartialArray <int>());
            String8    joined8 = set.Join(UTF8.Pipe, new byte[set.Value.Length]);

            return(joined8.ToString());
        }
        public void AlphanumericSplitter_EndToEndPerformance()
        {
            String8            code           = AllCodeText.AllCode8;
            String8Set         set            = default(String8Set);
            PartialArray <int> matchContainer = new PartialArray <int>(2048);

            HashSet <String8> uniqueWords = new HashSet <String8>();
            StringStore       strings     = new StringStore();
            MemberIndex       index       = new MemberIndex();

            int iterations      = 10;
            int totalWordsSplit = 0;

            // Split, Add, Index Goal: 30k per millisecond [30 MB/sec]
            Verify.PerformanceByBytes(30 * LongExtensions.Megabyte, () =>
            {
                for (int iteration = 0; iteration < iterations; ++iteration)
                {
                    String8Set codeByLine = code.Split(UTF8.Newline, new PartialArray <int>());
                    for (int lineIndex = 0; lineIndex < codeByLine.Count; ++lineIndex)
                    {
                        // Convert and Split the line
                        String8 line = codeByLine[lineIndex];
                        set          = AlphanumericSplitter.Split(line, ref matchContainer);

                        totalWordsSplit += set.Count;

                        if (set.Count > 0)
                        {
                            int matchIndex = AlphanumericSplitter.IsAlphaNumeric(set[0][0]) ? 0 : 1;
                            for (; matchIndex < set.Count; matchIndex += 2)
                            {
                                // If the word is long enough...
                                String8 word = set[matchIndex];
                                if (word.Length > 2)
                                {
                                    if (!uniqueWords.Contains(word))
                                    {
                                        int wordIdentifier = strings.FindOrAddString(word);
                                        uniqueWords.Add(strings[wordIdentifier]);
                                        index.AddItem(wordIdentifier, lineIndex);
                                    }
                                }
                            }
                        }
                    }
                }

                return(iterations * code.Length);
            });
        }
Exemplo n.º 14
0
        public void String8Set_Basics()
        {
            String8[] samples = { String8.CopyExpensive("One"), String8.CopyExpensive("Two"), String8.CopyExpensive("Three") };

            String8Set set = new String8Set();

            // Initial state
            Assert.AreEqual(0, set.Count);
            Assert.AreEqual(0, set.LengthBytes);

            // Add
            for (int i = 0; i < samples.Length; ++i)
            {
                set.Add(samples[i]);
            }
            Assert.AreEqual(3, set.Count);
            Assert.AreEqual(11, set.LengthBytes);

            // Indexer
            for (int i = 0; i < samples.Length; ++i)
            {
                Assert.AreEqual(samples[i], set[i]);
            }

            // Enumerate
            int count = 0;

            foreach (String8 value in set)
            {
                Assert.AreEqual(samples[count++], value);
            }

            // IndexOf
            Assert.AreEqual(1, set.IndexOf(samples[1]));
            Assert.AreEqual(-1, set.IndexOf(String8.CopyExpensive("Four")));

            // Remove
            Assert.IsTrue(set.Remove(samples[1]));
            Assert.AreEqual(2, set.Count);
            Assert.AreEqual(8, set.LengthBytes);
            Assert.AreEqual(samples[0], set[0]);
            Assert.AreEqual(samples[2], set[1]);

            // Clear
            set.Clear();
            Assert.AreEqual(0, set.Count);
            Assert.AreEqual(0, set.LengthBytes);
        }
Exemplo n.º 15
0
        /// <summary>
        ///  Move the reader to the next row. This must be called before
        ///  reading the first row.
        /// </summary>
        /// <returns>True if another row exists, False if the TSV is out of content</returns>
        public bool NextRow()
        {
            // If we're on the last row, ask for more (we don't read the last row in case it was only partially read into the buffer)
            if (_nextRowIndexInBlock >= _currentBlock.Count - 1)
            {
                NextBlock();
            }

            // If there are no more rows, return false
            if (_nextRowIndexInBlock >= _currentBlock.Count)
            {
                return(false);
            }

            // Get the next (complete) row from the current block
            String8 currentLine = _currentBlock[_nextRowIndexInBlock];

            // Strip leading UTF8 BOM, if found, on first row
            if (_rowCountRead == 0)
            {
                if (currentLine.Length >= 3 && currentLine[0] == 0xEF && currentLine[1] == 0xBB && currentLine[2] == 0xBF)
                {
                    currentLine = currentLine.Substring(3);
                }
            }

            // Split the line into cells
            _currentRow = SplitCells(currentLine, _cellPositionArray);

            _rowCountRead++;
            _nextRowIndexInBlock++;

            // Allocate a set of reusable String8TabularValues to avoid per-cell-value allocation or boxing.
            if (_valueBoxes == null || _valueBoxes.Length < _currentRow.Count)
            {
                _valueBoxes = new String8TabularValue[_currentRow.Count];

                for (int i = 0; i < _valueBoxes.Length; ++i)
                {
                    _valueBoxes[i] = new String8TabularValue();
                }
            }

            return(true);
        }
Exemplo n.º 16
0
        public int AddPath(int rootIndex, String8Set path, StringStore strings)
        {
            int currentIndex = rootIndex;

            for (int i = 0; i < path.Count; ++i)
            {
                String8 part = path[i];
                int     partNameIdentifier = strings.FindOrAddString(part);

                int foundNode;
                if (!TryFindChildByName(currentIndex, partNameIdentifier, out foundNode))
                {
                    foundNode = Add(currentIndex, partNameIdentifier);
                }

                currentIndex = foundNode;
            }

            return(currentIndex);
        }
Exemplo n.º 17
0
        private bool TryFindByPath(int parentNodeIndex, int currentPathPartIndex, String8Set path, StringStore strings, ref int matchingIndex)
        {
            // Try to find the string identifier for the next path part name
            Range currentPartIdentifier;

            if (!strings.TryFindString(path[currentPathPartIndex], out currentPartIdentifier))
            {
                return(false);
            }

            // Try to find nodes matching this name part and search within each of them
            int child = this.GetFirstChild(parentNodeIndex);

            while (child > 0)
            {
                if (currentPartIdentifier.Contains(this.GetNameIdentifier(child)))
                {
                    // If we've matched all parts, return success.
                    if (currentPathPartIndex == path.Count - 1)
                    {
                        matchingIndex = child;
                        return(true);
                    }
                    else
                    {
                        // Record that we matched up to this point
                        matchingIndex = child;

                        // If not, search under here for the remaining parts (for each match at this level)
                        if (TryFindByPath(child, currentPathPartIndex + 1, path, strings, ref matchingIndex))
                        {
                            return(true);
                        }
                    }
                }

                child = this.GetNextSibling(child);
            }

            return(false);
        }
Exemplo n.º 18
0
        public void SetLocation(int memberIndex, string filePath, ushort line, ushort charInLine)
        {
            // TODO: Handle '/' or '\' in ItemTree to avoid canonicalizing
            if (!String.IsNullOrEmpty(filePath) && (filePath.StartsWith("http:", StringComparison.OrdinalIgnoreCase) || filePath.StartsWith("https:", StringComparison.OrdinalIgnoreCase)))
            {
                filePath = filePath.Replace('/', '\\');
            }

            // Find (or add) the file path to the File Tree
            int fileIndex = 0;

            if (!String.IsNullOrEmpty(filePath))
            {
                String8    path8      = String8.Convert(filePath, new byte[String8.GetLength(filePath)]);
                String8Set splitPath8 = path8.Split('\\', new int[String8Set.GetLength(path8, '\\')]);
                fileIndex = this.FileTree.AddPath(0, splitPath8, this.StringStore);
            }

            // Write the updated location
            SymbolLocation location;

            location.FileIndex  = fileIndex;
            location.Line       = line;
            location.CharInLine = charInLine;

            if (this.DeclaredMemberLocations.Count == memberIndex)
            {
                this.DeclaredMemberLocations.Add(location);
            }
            else if (this.DeclaredMemberLocations.Count > memberIndex)
            {
                this.DeclaredMemberLocations[memberIndex] = location;
            }
            else
            {
                throw new InvalidOperationException(String.Format(Resources.DatabaseArraysOutOfSync, "DeclaredMemberLocations"));
            }
        }
Exemplo n.º 19
0
        public MutableSymbol FindOrAddPath(string path, char delimiter, SymbolType pathPartType)
        {
            String8 path8 = String8.Convert(path, new byte[String8.GetLength(path)]);
            String8Set splitPath8 = path8.Split(delimiter, new int[String8Set.GetLength(path8, delimiter)]);

            int currentIndex = _index;
            for (int i = 0; i < splitPath8.Count; ++i)
            {
                String8 part = splitPath8[i];
                int partNameIdentifier = _database.StringStore.FindOrAddString(part);

                int foundNode;
                if (!_database.DeclaredMembers.TryFindChildByName(currentIndex, partNameIdentifier, out foundNode))
                {
                    foundNode = _database.DeclaredMembers.Add(currentIndex, partNameIdentifier);
                    _database.DeclaredMemberDetails.Add(new SymbolDetails() { Type = pathPartType });
                    _database.DeclaredMemberLocations.Add(default(SymbolLocation));
                }

                currentIndex = foundNode;
            }

            return new MutableSymbol(_database, currentIndex);
        }
Exemplo n.º 20
0
 public MutableSymbol FindByFullName(string path, char delimiter)
 {
     String8 path8 = String8.Convert(path, new byte[String8.GetLength(path)]);
     String8Set splitPath8 = path8.Split(delimiter, new int[String8Set.GetLength(path8, delimiter)]);
     return new MutableSymbol(_database, _database.DeclaredMembers.FindByPath(_index, splitPath8, _database.StringStore));
 }