private ColumnWriterDetails GetColumnWriterDetails <T>(IColumnWriter <T> columnWriter, PropertyInfo propertyInfo, Func <object, T> valueGetter, ColumnTypeKind columnKind) { var state = new List <T>(); return(new ColumnWriterDetails { PropertyName = propertyInfo.Name, ColumnWriter = columnWriter, AddValueToState = classInstance => { var value = valueGetter(classInstance); state.Add(value); }, WriteValuesFromState = () => { columnWriter.AddBlock(state); state.Clear(); }, ColumnType = new ColumnType { Kind = columnKind } }); }
public static IColumnWriter TryGetColumnWriter(IStreamProvider streamProvider, Type columnType, string columnPath) { IColumnWriter writer = null; // Build a direct writer for the column type, if available ITypeProvider columnTypeProvider = TryGet(columnType); if (columnTypeProvider != null) { writer = columnTypeProvider.BinaryWriter(streamProvider, columnPath); } // If the column type doesn't have a provider or writer, convert to String8 and write that if (writer == null) { Func <XArray, XArray> converter = TypeConverterFactory.GetConverter(columnType, typeof(String8)); if (converter == null) { return(null); } writer = TypeProviderFactory.TryGet(typeof(String8)).BinaryWriter(streamProvider, columnPath); writer = new ConvertingWriter(writer, converter); } // Wrap with a NullableWriter to handle null persistence writer = new NullableWriter(streamProvider, columnPath, writer); // Wrap with an EnumWriter to write as an EnumColumn while possible. // Try for *all types* [even bool, byte, ushort] because Enum columns can roll nulls into the column itself and accelerate groupBy writer = new EnumWriter(streamProvider, columnPath, columnType, writer); return(writer); }
public ExcelSimilarityWriter(string testTakerTestId, List <IClusterableMatch> matches, string fileName, string fileNameSuffix) { _fileName = string.IsNullOrEmpty(fileNameSuffix) ? fileName : FileUtils.AddSuffixToFilename(fileName, fileNameSuffix); _p = new ExcelPackage(); _ws = _p.Workbook.Worksheets.Add("similarity"); var writers = new IColumnWriter[] { new CountWriter(), _overlapWriter, new NameWriter(true), matches.Any(match => !string.IsNullOrEmpty(match.Match.TestGuid)) ? new TestIdWriter() : null, !string.IsNullOrEmpty(testTakerTestId) ? new LinkWriter(testTakerTestId) : null, new SharedCentimorgansWriter(), matches.Any(match => match.Match.SharedSegments > 0) ? new SharedSegmentsWriter() : null, matches.Any(match => match.Match.LongestBlock > 0) ? new LongestBlockWriter() : null, matches.Any(match => !string.IsNullOrEmpty(match.Match.TreeUrl)) ? new TreeUrlWriter(testTakerTestId) : null, matches.Any(match => match.Match.TreeType != SavedData.TreeType.Undetermined) ? new TreeTypeWriter() : null, matches.Any(match => match.Match.TreeSize > 0) ? new TreeSizeWriter() : null, matches.Any(match => match.Match.Starred) ? new StarredWriter() : null, matches.Any(match => match.Match.HasHint) ? new SharedAncestorHintWriter() : null, new NoteWriter(), }.Where(writer => writer != null).ToArray(); _writers = new ColumnWritersCollection(_p, _ws, writers, testTakerTestId); // Rotate the entire top row by 90 degrees _ws.Row(_row).Style.TextRotation = 90; _col = _writers.WriteHeaders(_row, _col); ++_row; }
public void Dispose() { if (_writer != null) { _writer.Dispose(); _writer = null; } }
public void Dispose() { if (_convertedValueWriter != null) { _convertedValueWriter.Dispose(); _convertedValueWriter = null; } }
public EnumWriter(IStreamProvider streamProvider, string columnPath, Type columnType, IColumnWriter valueWriter) { _streamProvider = streamProvider; _columnPath = columnPath; _valueWriter = valueWriter; _dictionary = (IEnumColumnDictionary)Allocator.ConstructGenericOf(typeof(EnumColumnDictionary <>), columnType); _rowIndexWriter = new PrimitiveArrayWriter <byte>(streamProvider.OpenWrite(Path.Combine(_columnPath, RowIndexFileName))); }
public VariableIntegerWriter(IStreamProvider streamProvider, string columnPathPrefix) { _streamProvider = streamProvider; _columnPathPrefix = columnPathPrefix; // Initialize writing values in just one byte (while we don't have any values too big) WritingAsType = typeof(byte); _writer = BuildDirectWriter(_streamProvider, WritingAsType, PathForType(_columnPathPrefix, WritingAsType)); _converter = TypeConverterFactory.GetConverter(typeof(int), typeof(byte)); }
public void Serialize( Bits instance, IColumnWriter writer, ISerializationContext context) { using (var w = new System.IO.StreamWriter(writer.BaseStream)) { var bitstring = instance.ToString(); w.Write(bitstring); w.Flush(); } }
public void Dispose() { if (_valueWriter != null) { _valueWriter.Dispose(); _valueWriter = null; } if (_nullWriter != null) { _nullWriter.Dispose(); _nullWriter = null; } }
private void Upconvert(Type toType) { // Close the current writer _writer.Dispose(); _writer = null; // Determine previous and new file paths string columnValuesFullPath = PathForType(_columnPathPrefix, WritingAsType); string columnConvertedFullPath = PathForType(_columnPathPrefix, toType); // Build a writer for the larger type IColumnWriter writer = BuildDirectWriter(_streamProvider, toType, columnConvertedFullPath); // Convert already written values (if any) if (_rowCountWritten > 0) { // Build a converter to convert the values Func <XArray, XArray> converter = TypeConverterFactory.GetConverter(WritingAsType, toType); // Stream them in, convert them, and write them out using (IColumnReader reader = TypeProviderFactory.TryGetColumnReader(_streamProvider, WritingAsType, columnValuesFullPath)) { int rowCount = reader.Count; ArraySelector page = ArraySelector.All(0).NextPage(rowCount, 10240); while (page.Count > 0) { XArray original = reader.Read(page); XArray converted = converter(original); writer.Append(converted); page = page.NextPage(rowCount, 10240); } } } // Delete the original file _streamProvider.Delete(columnValuesFullPath); // Re-initialize for the new writer WritingAsType = toType; _writer = writer; _converter = (toType == typeof(int) ? null : TypeConverterFactory.GetConverter(typeof(int), toType)); }
public void Dispose() { // If we're still an enum column, write the distinct values out if (_dictionary != null) { _valueWriter.Append(_dictionary.Values()); _dictionary = null; } if (_valueWriter != null) { _valueWriter.Dispose(); _valueWriter = null; } if (_rowIndexWriter != null) { _rowIndexWriter.Dispose(); _rowIndexWriter = null; } }
private void Convert() { // Close the row index writer _rowIndexWriter.Dispose(); _rowIndexWriter = null; // If we wrote any rows we need to convert... if (_rowCountWritten > 0) { // Get the set of unique values and get rid of the value dictionary XArray values = _dictionary.Values(); // Convert the indices previously written into raw values Func <XArray, XArray> converter = TypeConverterFactory.GetConverter(typeof(byte), typeof(int)); using (IColumnReader rowIndexReader = new PrimitiveArrayReader <byte>(_streamProvider.OpenRead(Path.Combine(_columnPath, RowIndexFileName)))) { int rowCount = rowIndexReader.Count; ArraySelector page = ArraySelector.All(0).NextPage(rowCount, 10240); while (page.Count > 0) { // Read an XArray of indices and convert to int[] XArray rowIndices = converter(rowIndexReader.Read(page)); // Write the corresponding values // Reselect is safe because 'values' are converted to a contiguous array _valueWriter.Append(values.Reselect(ArraySelector.Map((int[])rowIndices.Array, rowIndices.Count))); page = page.NextPage(rowCount, 10240); } } } // Remove the Dictionary (so future rows are streamed out as-is) _dictionary = null; // Delete the row index file _streamProvider.Delete(Path.Combine(_columnPath, RowIndexFileName)); }
public async Task <List <string> > OutputCorrelationAsync(List <ClusterNode> nodes, Dictionary <int, IClusterableMatch> matchesByIndex, Dictionary <int, int> indexClusterNumbers) { if (string.IsNullOrEmpty(_correlationFilename)) { return(new List <string>()); } if (nodes.Count == 0) { return(new List <string>()); } // All nodes, in order. These will become rows/columns in the Excel file. var leafNodes = nodes.First().GetOrderedLeafNodes().ToList(); // Excel has a limit of 16,384 columns. // If there are more than 16,000 matches, split into files containing at most 10,000 columns. var numOutputFiles = 1; if (leafNodes.Count > MaxColumns) { numOutputFiles = leafNodes.Count / MaxColumnsPerSplit + 1; } _progressData.Reset("Saving clusters", leafNodes.Count * numOutputFiles); // Ancestry never shows matches lower than 20 cM as shared matches. // The distant matches will be included as rows in the Excel file, but not as columns. // That means that correlation diagrams that include distant matches will be rectangular (tall and narrow) // rather than square. var matches = leafNodes .Where(leafNode => matchesByIndex.ContainsKey(leafNode.Index)) .Select(leafNode => matchesByIndex[leafNode.Index]) .ToList(); var lowestClusterableCentimorgans = matches .SelectMany(match => match.Coords.Where(coord => coord != match.Index && matchesByIndex.ContainsKey(coord))) .Distinct() .Min(coord => matchesByIndex[coord].Match.SharedCentimorgans); var nonDistantMatches = matches .Where(match => match.Match.SharedCentimorgans >= lowestClusterableCentimorgans) .ToList(); var orderedIndexes = nonDistantMatches .Select(match => match.Index) .ToList(); // Because very strong matches are included in so many clusters, // excluding the strong matches makes it easier to identify edges of the clusters. var immediateFamilyIndexes = new HashSet <int>( matchesByIndex.Values .Where(match => match.Match.SharedCentimorgans > 200) .Select(match => match.Index) ); var files = new List <string>(); for (var fileNum = 0; fileNum < numOutputFiles; ++fileNum) { using (var p = new ExcelPackage()) { await Task.Run(() => { var ws = p.Workbook.Worksheets.Add("heatmap"); // Start at the top left of the sheet var row = 1; var col = 1; // Rotate the entire top row by 90 degrees ws.Row(row).Style.TextRotation = 90; // Fixed columns var clusterNumberWriter = new ClusterNumberWriter(indexClusterNumbers); var writers = new IColumnWriter[] { clusterNumberWriter, new NameWriter(false), matches.Any(match => !string.IsNullOrEmpty(match.Match.TestGuid)) ? new TestIdWriter() : null, !string.IsNullOrEmpty(_testTakerTestId) ? new LinkWriter(_testTakerTestId) : null, new SharedCentimorgansWriter(), matches.Any(match => match.Match.SharedSegments > 0) ? new SharedSegmentsWriter() : null, matches.Any(match => match.Match.LongestBlock > 0) ? new LongestBlockWriter() : null, matches.Any(match => !string.IsNullOrEmpty(match.Match.TreeUrl)) ? new TreeUrlWriter(_testTakerTestId) : null, matches.Any(match => match.Match.TreeType != SavedData.TreeType.Undetermined) ? new TreeTypeWriter() : null, matches.Any(match => match.Match.TreeSize > 0) ? new TreeSizeWriter() : null, matches.Any(match => match.Match.Starred) ? new StarredWriter() : null, matches.Any(match => match.Match.HasHint) ? new SharedAncestorHintWriter() : null, new CorrelatedClustersWriter(leafNodes, immediateFamilyIndexes, indexClusterNumbers, clusterNumberWriter, _minClusterSize), new NoteWriter(), }.Where(writer => writer != null).ToArray(); var columnWriters = new ColumnWritersCollection(p, ws, writers, _testTakerTestId); col = columnWriters.WriteHeaders(row, col); var firstMatrixDataRow = row + 1; var firstMatrixDataColumn = col; // Column headers for each match var matchColumns = nonDistantMatches.Skip(fileNum *MaxColumnsPerSplit).Take(MaxColumnsPerSplit).ToList(); foreach (var nonDistantMatch in matchColumns) { ws.Cells[row, col++].Value = nonDistantMatch.Match.Name; } // One row for each match foreach (var leafNode in leafNodes) { var match = matchesByIndex[leafNode.Index]; row++; // Row headers col = 1; col = columnWriters.WriteColumns(row, col, match, leafNode); // Correlation data foreach (var coordAndIndex in leafNode.GetCoordsArray(orderedIndexes) .Zip(orderedIndexes, (c, i) => new { Coord = c, Index = i }) .Skip(fileNum *MaxColumnsPerSplit).Take(MaxColumnsPerSplit)) { if (coordAndIndex.Coord != 0) { ws.Cells[row, col].Value = coordAndIndex.Coord; } col++; } _progressData.Increment(); } // Heatmap color scale var correlationData = new ExcelAddress(firstMatrixDataRow, firstMatrixDataColumn, firstMatrixDataRow - 1 + leafNodes.Count, firstMatrixDataColumn - 1 + matchColumns.Count); var threeColorScale = ws.ConditionalFormatting.AddThreeColorScale(correlationData); threeColorScale.LowValue.Type = eExcelConditionalFormattingValueObjectType.Num; threeColorScale.LowValue.Value = 0; threeColorScale.LowValue.Color = Color.Gainsboro; threeColorScale.MiddleValue.Type = eExcelConditionalFormattingValueObjectType.Num; threeColorScale.MiddleValue.Value = 1; threeColorScale.MiddleValue.Color = Color.Cornsilk; threeColorScale.HighValue.Type = eExcelConditionalFormattingValueObjectType.Num; threeColorScale.HighValue.Value = 2; threeColorScale.HighValue.Color = Color.DarkRed; // Heatmap number format ws.Cells[$"1:{matchColumns.Count}"].Style.Numberformat.Format = "General"; col = 1; col = columnWriters.FormatColumns(row, col); // Freeze the column and row headers ws.View.FreezePanes(firstMatrixDataRow, firstMatrixDataColumn); }); var fileName = _correlationFilename; if (fileNum > 0) { fileName = FileUtils.AddSuffixToFilename(fileName, (fileNum + 1).ToString()); } FileUtils.Save(p, fileName); files.Add(fileName); } } return(files); }
public ConvertingWriter(IColumnWriter convertedValueWriter, Func <XArray, XArray> converter) { _converter = converter; _convertedValueWriter = convertedValueWriter; }
public async Task ExportAsync(List <IClusterableMatch> matches, string exportFileName) { if (string.IsNullOrEmpty(exportFileName) || matches.Count == 0) { return; } _progressData.Reset("Exporting matches", matches.Count); using (var p = new ExcelPackage()) { await Task.Run(() => { var ws = p.Workbook.Worksheets.Add("matches"); // Start at the top left of the sheet var row = 1; var col = 1; // Rotate the entire top row by 90 degrees ws.Row(row).Style.TextRotation = 90; // Fixed columns var writers = new IColumnWriter[] { new NameWriter(false), matches.Any(match => !string.IsNullOrEmpty(match.Match.TestGuid)) ? new TestIdWriter() : null, !string.IsNullOrEmpty(_testTakerTestId) ? new LinkWriter(_testTakerTestId, _ancestryHostName) : null, new SharedCentimorgansWriter(), matches.Any(match => match.Match.SharedSegments > 0) ? new SharedSegmentsWriter() : null, matches.Any(match => match.Match.LongestBlock > 0) ? new LongestBlockWriter() : null, matches.Any(match => !string.IsNullOrEmpty(match.Match.TreeUrl)) ? new TreeUrlWriter(_testTakerTestId) : null, matches.Any(match => match.Match.TreeType != SavedData.TreeType.Undetermined) ? new TreeTypeWriter() : null, matches.Any(match => match.Match.TreeSize > 0) ? new TreeSizeWriter() : null, matches.Any(match => match.Match.CommonAncestors?.Count > 0) ? new CommonAncestorsWriter() : null, matches.Any(match => match.Match.Starred) ? new StarredWriter() : null, matches.Any(match => match.Match.HasHint) ? new SharedAncestorHintWriter() : null, new NoteWriter(), }.Where(writer => writer != null).ToArray(); var columnWriters = new ColumnWritersCollection(p, ws, writers, _testTakerTestId); col = columnWriters.WriteHeaders(row, col); var firstMatrixDataRow = row + 1; var firstMatrixDataColumn = col; // One row for each match foreach (var match in matches) { row++; // Row headers col = 1; col = columnWriters.WriteColumns(row, col, match, null); _progressData.Increment(); } col = 1; col = columnWriters.FormatColumns(row, col); // Freeze the column and row headers ws.View.FreezePanes(firstMatrixDataRow, firstMatrixDataColumn); }); FileUtils.Save(p, exportFileName); } }
public NullableWriter(IStreamProvider streamProvider, string columnPath, IColumnWriter valueWriter) { _streamProvider = streamProvider; _columnPath = columnPath; _valueWriter = valueWriter; }