Beispiel #1
0
        private ColumnWriterDetails GetColumnWriterDetails <T>(IColumnWriter <T> columnWriter, PropertyInfo propertyInfo,
                                                               Func <object, T> valueGetter, ColumnTypeKind columnKind)
        {
            var state = new List <T>();

            return(new ColumnWriterDetails
            {
                PropertyName = propertyInfo.Name,
                ColumnWriter = columnWriter,
                AddValueToState = classInstance =>
                {
                    var value = valueGetter(classInstance);
                    state.Add(value);
                },
                WriteValuesFromState = () =>
                {
                    columnWriter.AddBlock(state);
                    state.Clear();
                },
                ColumnType = new ColumnType
                {
                    Kind = columnKind
                }
            });
        }
Beispiel #2
0
        public static IColumnWriter TryGetColumnWriter(IStreamProvider streamProvider, Type columnType, string columnPath)
        {
            IColumnWriter writer = null;

            // Build a direct writer for the column type, if available
            ITypeProvider columnTypeProvider = TryGet(columnType);

            if (columnTypeProvider != null)
            {
                writer = columnTypeProvider.BinaryWriter(streamProvider, columnPath);
            }

            // If the column type doesn't have a provider or writer, convert to String8 and write that
            if (writer == null)
            {
                Func <XArray, XArray> converter = TypeConverterFactory.GetConverter(columnType, typeof(String8));
                if (converter == null)
                {
                    return(null);
                }

                writer = TypeProviderFactory.TryGet(typeof(String8)).BinaryWriter(streamProvider, columnPath);
                writer = new ConvertingWriter(writer, converter);
            }

            // Wrap with a NullableWriter to handle null persistence
            writer = new NullableWriter(streamProvider, columnPath, writer);

            // Wrap with an EnumWriter to write as an EnumColumn while possible.
            // Try for *all types* [even bool, byte, ushort] because Enum columns can roll nulls into the column itself and accelerate groupBy
            writer = new EnumWriter(streamProvider, columnPath, columnType, writer);

            return(writer);
        }
Beispiel #3
0
        public ExcelSimilarityWriter(string testTakerTestId, List <IClusterableMatch> matches, string fileName, string fileNameSuffix)
        {
            _fileName = string.IsNullOrEmpty(fileNameSuffix) ? fileName : FileUtils.AddSuffixToFilename(fileName, fileNameSuffix);
            _p        = new ExcelPackage();
            _ws       = _p.Workbook.Worksheets.Add("similarity");
            var writers = new IColumnWriter[]
            {
                new CountWriter(),
                _overlapWriter,
                new NameWriter(true),
                matches.Any(match => !string.IsNullOrEmpty(match.Match.TestGuid)) ? new TestIdWriter() : null,
                !string.IsNullOrEmpty(testTakerTestId) ? new LinkWriter(testTakerTestId) : null,
                new SharedCentimorgansWriter(),
                matches.Any(match => match.Match.SharedSegments > 0) ? new SharedSegmentsWriter() : null,
                matches.Any(match => match.Match.LongestBlock > 0) ? new LongestBlockWriter() : null,
                matches.Any(match => !string.IsNullOrEmpty(match.Match.TreeUrl)) ? new TreeUrlWriter(testTakerTestId) : null,
                matches.Any(match => match.Match.TreeType != SavedData.TreeType.Undetermined) ? new TreeTypeWriter() : null,
                matches.Any(match => match.Match.TreeSize > 0) ? new TreeSizeWriter() : null,
                matches.Any(match => match.Match.Starred) ? new StarredWriter() : null,
                matches.Any(match => match.Match.HasHint) ? new SharedAncestorHintWriter() : null,
                new NoteWriter(),
            }.Where(writer => writer != null).ToArray();

            _writers = new ColumnWritersCollection(_p, _ws, writers, testTakerTestId);

            // Rotate the entire top row by 90 degrees
            _ws.Row(_row).Style.TextRotation = 90;

            _col = _writers.WriteHeaders(_row, _col);
            ++_row;
        }
 public void Dispose()
 {
     if (_writer != null)
     {
         _writer.Dispose();
         _writer = null;
     }
 }
 public void Dispose()
 {
     if (_convertedValueWriter != null)
     {
         _convertedValueWriter.Dispose();
         _convertedValueWriter = null;
     }
 }
Beispiel #6
0
        public EnumWriter(IStreamProvider streamProvider, string columnPath, Type columnType, IColumnWriter valueWriter)
        {
            _streamProvider = streamProvider;
            _columnPath     = columnPath;
            _valueWriter    = valueWriter;

            _dictionary     = (IEnumColumnDictionary)Allocator.ConstructGenericOf(typeof(EnumColumnDictionary <>), columnType);
            _rowIndexWriter = new PrimitiveArrayWriter <byte>(streamProvider.OpenWrite(Path.Combine(_columnPath, RowIndexFileName)));
        }
        public VariableIntegerWriter(IStreamProvider streamProvider, string columnPathPrefix)
        {
            _streamProvider   = streamProvider;
            _columnPathPrefix = columnPathPrefix;

            // Initialize writing values in just one byte (while we don't have any values too big)
            WritingAsType = typeof(byte);
            _writer       = BuildDirectWriter(_streamProvider, WritingAsType, PathForType(_columnPathPrefix, WritingAsType));
            _converter    = TypeConverterFactory.GetConverter(typeof(int), typeof(byte));
        }
Beispiel #8
0
 public void Serialize(
     Bits instance,
     IColumnWriter writer,
     ISerializationContext context)
 {
     using (var w = new System.IO.StreamWriter(writer.BaseStream))
     {
         var bitstring = instance.ToString();
         w.Write(bitstring);
         w.Flush();
     }
 }
        public void Dispose()
        {
            if (_valueWriter != null)
            {
                _valueWriter.Dispose();
                _valueWriter = null;
            }

            if (_nullWriter != null)
            {
                _nullWriter.Dispose();
                _nullWriter = null;
            }
        }
        private void Upconvert(Type toType)
        {
            // Close the current writer
            _writer.Dispose();
            _writer = null;

            // Determine previous and new file paths
            string columnValuesFullPath    = PathForType(_columnPathPrefix, WritingAsType);
            string columnConvertedFullPath = PathForType(_columnPathPrefix, toType);

            // Build a writer for the larger type
            IColumnWriter writer = BuildDirectWriter(_streamProvider, toType, columnConvertedFullPath);

            // Convert already written values (if any)
            if (_rowCountWritten > 0)
            {
                // Build a converter to convert the values
                Func <XArray, XArray> converter = TypeConverterFactory.GetConverter(WritingAsType, toType);

                // Stream them in, convert them, and write them out
                using (IColumnReader reader = TypeProviderFactory.TryGetColumnReader(_streamProvider, WritingAsType, columnValuesFullPath))
                {
                    int           rowCount = reader.Count;
                    ArraySelector page     = ArraySelector.All(0).NextPage(rowCount, 10240);

                    while (page.Count > 0)
                    {
                        XArray original  = reader.Read(page);
                        XArray converted = converter(original);
                        writer.Append(converted);

                        page = page.NextPage(rowCount, 10240);
                    }
                }
            }

            // Delete the original file
            _streamProvider.Delete(columnValuesFullPath);

            // Re-initialize for the new writer
            WritingAsType = toType;
            _writer       = writer;
            _converter    = (toType == typeof(int) ? null : TypeConverterFactory.GetConverter(typeof(int), toType));
        }
Beispiel #11
0
        public void Dispose()
        {
            // If we're still an enum column, write the distinct values out
            if (_dictionary != null)
            {
                _valueWriter.Append(_dictionary.Values());
                _dictionary = null;
            }

            if (_valueWriter != null)
            {
                _valueWriter.Dispose();
                _valueWriter = null;
            }

            if (_rowIndexWriter != null)
            {
                _rowIndexWriter.Dispose();
                _rowIndexWriter = null;
            }
        }
Beispiel #12
0
        private void Convert()
        {
            // Close the row index writer
            _rowIndexWriter.Dispose();
            _rowIndexWriter = null;

            // If we wrote any rows we need to convert...
            if (_rowCountWritten > 0)
            {
                // Get the set of unique values and get rid of the value dictionary
                XArray values = _dictionary.Values();

                // Convert the indices previously written into raw values
                Func <XArray, XArray> converter = TypeConverterFactory.GetConverter(typeof(byte), typeof(int));
                using (IColumnReader rowIndexReader = new PrimitiveArrayReader <byte>(_streamProvider.OpenRead(Path.Combine(_columnPath, RowIndexFileName))))
                {
                    int           rowCount = rowIndexReader.Count;
                    ArraySelector page     = ArraySelector.All(0).NextPage(rowCount, 10240);
                    while (page.Count > 0)
                    {
                        // Read an XArray of indices and convert to int[]
                        XArray rowIndices = converter(rowIndexReader.Read(page));

                        // Write the corresponding values
                        // Reselect is safe because 'values' are converted to a contiguous array
                        _valueWriter.Append(values.Reselect(ArraySelector.Map((int[])rowIndices.Array, rowIndices.Count)));

                        page = page.NextPage(rowCount, 10240);
                    }
                }
            }

            // Remove the Dictionary (so future rows are streamed out as-is)
            _dictionary = null;

            // Delete the row index file
            _streamProvider.Delete(Path.Combine(_columnPath, RowIndexFileName));
        }
Beispiel #13
0
        public async Task <List <string> > OutputCorrelationAsync(List <ClusterNode> nodes, Dictionary <int, IClusterableMatch> matchesByIndex, Dictionary <int, int> indexClusterNumbers)
        {
            if (string.IsNullOrEmpty(_correlationFilename))
            {
                return(new List <string>());
            }

            if (nodes.Count == 0)
            {
                return(new List <string>());
            }

            // All nodes, in order. These will become rows/columns in the Excel file.
            var leafNodes = nodes.First().GetOrderedLeafNodes().ToList();

            // Excel has a limit of 16,384 columns.
            // If there are more than 16,000 matches, split into files containing at most 10,000 columns.
            var numOutputFiles = 1;

            if (leafNodes.Count > MaxColumns)
            {
                numOutputFiles = leafNodes.Count / MaxColumnsPerSplit + 1;
            }

            _progressData.Reset("Saving clusters", leafNodes.Count * numOutputFiles);

            // Ancestry never shows matches lower than 20 cM as shared matches.
            // The distant matches will be included as rows in the Excel file, but not as columns.
            // That means that correlation diagrams that include distant matches will be rectangular (tall and narrow)
            // rather than square.
            var matches = leafNodes
                          .Where(leafNode => matchesByIndex.ContainsKey(leafNode.Index))
                          .Select(leafNode => matchesByIndex[leafNode.Index])
                          .ToList();
            var lowestClusterableCentimorgans = matches
                                                .SelectMany(match => match.Coords.Where(coord => coord != match.Index && matchesByIndex.ContainsKey(coord)))
                                                .Distinct()
                                                .Min(coord => matchesByIndex[coord].Match.SharedCentimorgans);
            var nonDistantMatches = matches
                                    .Where(match => match.Match.SharedCentimorgans >= lowestClusterableCentimorgans)
                                    .ToList();

            var orderedIndexes = nonDistantMatches
                                 .Select(match => match.Index)
                                 .ToList();

            // Because very strong matches are included in so many clusters,
            // excluding the strong matches makes it easier to identify edges of the clusters.
            var immediateFamilyIndexes = new HashSet <int>(
                matchesByIndex.Values
                .Where(match => match.Match.SharedCentimorgans > 200)
                .Select(match => match.Index)
                );

            var files = new List <string>();

            for (var fileNum = 0; fileNum < numOutputFiles; ++fileNum)
            {
                using (var p = new ExcelPackage())
                {
                    await Task.Run(() =>
                    {
                        var ws = p.Workbook.Worksheets.Add("heatmap");

                        // Start at the top left of the sheet
                        var row = 1;
                        var col = 1;

                        // Rotate the entire top row by 90 degrees
                        ws.Row(row).Style.TextRotation = 90;

                        // Fixed columns
                        var clusterNumberWriter = new ClusterNumberWriter(indexClusterNumbers);
                        var writers             = new IColumnWriter[]
                        {
                            clusterNumberWriter,
                            new NameWriter(false),
                            matches.Any(match => !string.IsNullOrEmpty(match.Match.TestGuid)) ? new TestIdWriter() : null,
                            !string.IsNullOrEmpty(_testTakerTestId) ? new LinkWriter(_testTakerTestId) : null,
                            new SharedCentimorgansWriter(),
                            matches.Any(match => match.Match.SharedSegments > 0) ? new SharedSegmentsWriter() : null,
                            matches.Any(match => match.Match.LongestBlock > 0) ? new LongestBlockWriter() : null,
                            matches.Any(match => !string.IsNullOrEmpty(match.Match.TreeUrl)) ? new TreeUrlWriter(_testTakerTestId) : null,
                            matches.Any(match => match.Match.TreeType != SavedData.TreeType.Undetermined) ? new TreeTypeWriter() : null,
                            matches.Any(match => match.Match.TreeSize > 0) ? new TreeSizeWriter() : null,
                            matches.Any(match => match.Match.Starred) ? new StarredWriter() : null,
                            matches.Any(match => match.Match.HasHint) ? new SharedAncestorHintWriter() : null,
                            new CorrelatedClustersWriter(leafNodes, immediateFamilyIndexes, indexClusterNumbers, clusterNumberWriter, _minClusterSize),
                            new NoteWriter(),
                        }.Where(writer => writer != null).ToArray();
                        var columnWriters = new ColumnWritersCollection(p, ws, writers, _testTakerTestId);

                        col = columnWriters.WriteHeaders(row, col);

                        var firstMatrixDataRow    = row + 1;
                        var firstMatrixDataColumn = col;

                        // Column headers for each match
                        var matchColumns = nonDistantMatches.Skip(fileNum *MaxColumnsPerSplit).Take(MaxColumnsPerSplit).ToList();
                        foreach (var nonDistantMatch in matchColumns)
                        {
                            ws.Cells[row, col++].Value = nonDistantMatch.Match.Name;
                        }

                        // One row for each match
                        foreach (var leafNode in leafNodes)
                        {
                            var match = matchesByIndex[leafNode.Index];
                            row++;

                            // Row headers
                            col = 1;
                            col = columnWriters.WriteColumns(row, col, match, leafNode);

                            // Correlation data
                            foreach (var coordAndIndex in leafNode.GetCoordsArray(orderedIndexes)
                                     .Zip(orderedIndexes, (c, i) => new { Coord = c, Index = i })
                                     .Skip(fileNum *MaxColumnsPerSplit).Take(MaxColumnsPerSplit))
                            {
                                if (coordAndIndex.Coord != 0)
                                {
                                    ws.Cells[row, col].Value = coordAndIndex.Coord;
                                }
                                col++;
                            }

                            _progressData.Increment();
                        }

                        // Heatmap color scale
                        var correlationData               = new ExcelAddress(firstMatrixDataRow, firstMatrixDataColumn, firstMatrixDataRow - 1 + leafNodes.Count, firstMatrixDataColumn - 1 + matchColumns.Count);
                        var threeColorScale               = ws.ConditionalFormatting.AddThreeColorScale(correlationData);
                        threeColorScale.LowValue.Type     = eExcelConditionalFormattingValueObjectType.Num;
                        threeColorScale.LowValue.Value    = 0;
                        threeColorScale.LowValue.Color    = Color.Gainsboro;
                        threeColorScale.MiddleValue.Type  = eExcelConditionalFormattingValueObjectType.Num;
                        threeColorScale.MiddleValue.Value = 1;
                        threeColorScale.MiddleValue.Color = Color.Cornsilk;
                        threeColorScale.HighValue.Type    = eExcelConditionalFormattingValueObjectType.Num;
                        threeColorScale.HighValue.Value   = 2;
                        threeColorScale.HighValue.Color   = Color.DarkRed;

                        // Heatmap number format
                        ws.Cells[$"1:{matchColumns.Count}"].Style.Numberformat.Format = "General";

                        col = 1;
                        col = columnWriters.FormatColumns(row, col);

                        // Freeze the column and row headers
                        ws.View.FreezePanes(firstMatrixDataRow, firstMatrixDataColumn);
                    });

                    var fileName = _correlationFilename;
                    if (fileNum > 0)
                    {
                        fileName = FileUtils.AddSuffixToFilename(fileName, (fileNum + 1).ToString());
                    }

                    FileUtils.Save(p, fileName);

                    files.Add(fileName);
                }
            }
            return(files);
        }
 public ConvertingWriter(IColumnWriter convertedValueWriter, Func <XArray, XArray> converter)
 {
     _converter            = converter;
     _convertedValueWriter = convertedValueWriter;
 }
        public async Task ExportAsync(List <IClusterableMatch> matches, string exportFileName)
        {
            if (string.IsNullOrEmpty(exportFileName) || matches.Count == 0)
            {
                return;
            }

            _progressData.Reset("Exporting matches", matches.Count);

            using (var p = new ExcelPackage())
            {
                await Task.Run(() =>
                {
                    var ws = p.Workbook.Worksheets.Add("matches");

                    // Start at the top left of the sheet
                    var row = 1;
                    var col = 1;

                    // Rotate the entire top row by 90 degrees
                    ws.Row(row).Style.TextRotation = 90;

                    // Fixed columns
                    var writers = new IColumnWriter[]
                    {
                        new NameWriter(false),
                        matches.Any(match => !string.IsNullOrEmpty(match.Match.TestGuid)) ? new TestIdWriter() : null,
                        !string.IsNullOrEmpty(_testTakerTestId) ? new LinkWriter(_testTakerTestId, _ancestryHostName) : null,
                        new SharedCentimorgansWriter(),
                        matches.Any(match => match.Match.SharedSegments > 0) ? new SharedSegmentsWriter() : null,
                        matches.Any(match => match.Match.LongestBlock > 0) ? new LongestBlockWriter() : null,
                        matches.Any(match => !string.IsNullOrEmpty(match.Match.TreeUrl)) ? new TreeUrlWriter(_testTakerTestId) : null,
                        matches.Any(match => match.Match.TreeType != SavedData.TreeType.Undetermined) ? new TreeTypeWriter() : null,
                        matches.Any(match => match.Match.TreeSize > 0) ? new TreeSizeWriter() : null,
                        matches.Any(match => match.Match.CommonAncestors?.Count > 0) ? new CommonAncestorsWriter() : null,
                        matches.Any(match => match.Match.Starred) ? new StarredWriter() : null,
                        matches.Any(match => match.Match.HasHint) ? new SharedAncestorHintWriter() : null,
                        new NoteWriter(),
                    }.Where(writer => writer != null).ToArray();
                    var columnWriters = new ColumnWritersCollection(p, ws, writers, _testTakerTestId);

                    col = columnWriters.WriteHeaders(row, col);

                    var firstMatrixDataRow    = row + 1;
                    var firstMatrixDataColumn = col;

                    // One row for each match
                    foreach (var match in matches)
                    {
                        row++;

                        // Row headers
                        col = 1;
                        col = columnWriters.WriteColumns(row, col, match, null);

                        _progressData.Increment();
                    }

                    col = 1;
                    col = columnWriters.FormatColumns(row, col);

                    // Freeze the column and row headers
                    ws.View.FreezePanes(firstMatrixDataRow, firstMatrixDataColumn);
                });

                FileUtils.Save(p, exportFileName);
            }
        }
 public NullableWriter(IStreamProvider streamProvider, string columnPath, IColumnWriter valueWriter)
 {
     _streamProvider = streamProvider;
     _columnPath     = columnPath;
     _valueWriter    = valueWriter;
 }