/// <summary> /// Factory method to create a <see cref="TagServer"/>, uses the private Constructor <see cref="TagServer(List{Question})" /> /// </summary> public static TagServer CreateFromScratchAndSaveToDisk(List <Question> rawQuestions, string intermediateFilesFolder) { var tagServer = new TagServer(rawQuestions); var serializeTimer = Stopwatch.StartNew(); Logger.LogStartupMessage("Serialisation folder: {0}", intermediateFilesFolder); foreach (QueryType type in (QueryType[])Enum.GetValues(typeof(QueryType))) { var tagLookupFileName = "intermediate-Lookup-" + type + ".bin"; Serialisation.SerialiseToDisk(tagLookupFileName, intermediateFilesFolder, tagServer.GetTagLookupForQueryType(type)); var bitMapIndex = tagServer.GetTagBitMapIndexForQueryType(type); if (bitMapIndex.Count == 0) { continue; } var bitMapIndexFileName = String.Format("intermediate-BitMap-{0}.bin", type); Serialisation.SerialiseBitMapIndexToDisk(bitMapIndexFileName, intermediateFilesFolder, bitMapIndex); // Sanity-check, de-serialise the data we've just written to disk Serialisation.DeserialiseFromDisk(bitMapIndexFileName, intermediateFilesFolder); Logger.LogStartupMessage(); } // Now write out the AllTags Lookup, Tag -> Count (i.e. "C#" -> 579,321, "Java" -> 560,432) Serialisation.SerialiseToDisk(AllTagsFileName, intermediateFilesFolder, tagServer.AllTags); serializeTimer.Stop(); Logger.LogStartupMessage("\nTook {0} (in TOTAL) to serialise the intermediate data TO disk\n", serializeTimer.Elapsed); return(tagServer); }
/// <summary> /// Factory method to create a <see cref="TagServer"/>, uses the private Constructor /// <see cref="TagServer(List{Question}, TagLookup, Dictionary{QueryType, TagByQueryLookup}, Dictionary{QueryType, TagByQueryBitMapLookup})"/> /// </summary> public static TagServer CreateFromSerialisedData(List <Question> rawQuestions, string intermediateFilesFolder, bool deserialiseBitMapsIndexes = true) { var deserializeTimer = Stopwatch.StartNew(); Logger.LogStartupMessage("Deserialisation folder: {0}", intermediateFilesFolder); var queryTypes = (QueryType[])Enum.GetValues(typeof(QueryType)); var intermediateLookups = new Dictionary <QueryType, TagByQueryLookup>(queryTypes.Length); var intermediateBitMapIndexes = new Dictionary <QueryType, TagByQueryBitMapLookup>(queryTypes.Length); foreach (QueryType type in queryTypes) { if (deserialiseBitMapsIndexes) { var tagLookupFileName = "intermediate-Lookup-" + type + ".bin"; var tempLookup = Serialisation.DeserialiseFromDisk <TagByQueryLookup>(tagLookupFileName, intermediateFilesFolder); Logger.LogStartupMessage("{0,20} contains {1:N0} Tag Lookups", type, tempLookup.Count); intermediateLookups.Add(type, tempLookup); var bitMapIndexFileName = String.Format("intermediate-BitMap-{0}.bin", type); var tempBitMapIndexes = Serialisation.DeserialiseFromDisk(bitMapIndexFileName, intermediateFilesFolder); Logger.LogStartupMessage("{0,20} contains {1:N0} Tag BitMap Indexes", type, tempBitMapIndexes.Count); intermediateBitMapIndexes.Add(type, tempBitMapIndexes); Logger.LogStartupMessage(); } else { // Maybe don't check this in, it's just here to save time when we don't need to real data!!! var tempLookup = new TagByQueryLookup(); intermediateLookups.Add(type, tempLookup); var tempBitMapIndexes = new TagByQueryBitMapLookup(); intermediateBitMapIndexes.Add(type, tempBitMapIndexes); } } // Now fetch from disk the AllTags Lookup, Tag -> Count (i.e. "C#" -> 579,321, "Java" -> 560,432) var allTags = Serialisation.DeserialiseFromDisk <TagLookup>(AllTagsFileName, intermediateFilesFolder); deserializeTimer.Stop(); Logger.LogStartupMessage("\nTook {0} (in TOTAL) to DE-serialise the intermediate data FROM disk\n", deserializeTimer.Elapsed); return(new TagServer(rawQuestions, allTags, intermediateLookups, intermediateBitMapIndexes)); }