/// <summary> /// This generates the "SearchIndex-Titles.js" file and plain text representations of Posts that are required to display search result content (the /// JsonSearchIndexDataRecorder generates the javascript search index data which identifies matches but this content is required to map that on to /// Post titles and sections of content). These two classes are very specific to my Blog site implementation. /// </summary> public static void Write(NonNullImmutableList <Post> posts, DirectoryInfo destination) { if (posts == null) { throw new ArgumentNullException("posts"); } if (destination == null) { throw new ArgumentNullException("destination"); } destination.Refresh(); if (!destination.Exists) { throw new ArgumentException("destination does not exist"); } // Load the Post Data (all files will be compressed to take up as little space as possible in the NeoCities hosting) // - Generate "SearchIndex-Titles.js" // - Generate "SearchIndex-Content-{0}.txt" var titlesFilename = "SearchIndex-Titles.lz.txt"; Console.WriteLine("Writing " + titlesFilename); var titlesJson = JsonConvert.SerializeObject( posts.ToDictionary( p => p.Id, p => new { Title = p.Title.Trim(), Slug = p.Slug } ) ); File.WriteAllText( Path.Combine( destination.FullName, titlesFilename ), LZStringCompress.CompressToUTF16(titlesJson), new UTF8Encoding() ); foreach (var post in posts) { var contentFilename = "SearchIndex-Content-" + post.Id + ".lz.txt"; Console.WriteLine("Writing " + contentFilename); File.WriteAllText( Path.Combine( destination.FullName, contentFilename ), LZStringCompress.CompressToUTF16( post.GetContentAsPlainText() ), new UTF8Encoding() ); } }
/// <summary> /// This generates the "SearchIndex-SummaryDictionary.js" and "SearchIndex-{PostId}-CompleteDictionary.js" files that are used to perform the full /// text site search. The first file maps token matches onto Posts by Key, specifying the match Weight. It doesn't contain the source locations /// which map the token back onto the source content in order to keep the file size down. The "SearchIndex-{PostId}-CompleteDictionary.js" files /// contain the mappings with source locations for a single Post. These only need to be accessed once a Post has been identified as matching the /// search term(s). In order to display matched content, the source locations must be mapped onto the plain text content generated by the /// PlainTextContentRecorder. These two classes are very specific to my Blog site implementation. /// </summary> public static void Write(IIndexData <int> searchIndex, DirectoryInfo destination) { if (searchIndex == null) { throw new ArgumentNullException("searchIndexFile"); } if (destination == null) { throw new ArgumentNullException("destination"); } destination.Refresh(); if (!destination.Exists) { throw new ArgumentException("destination does not exist"); } // Get Search Index Data // - Generate "SearchIndex-SummaryDictionary.js" // - Generate all of "SearchIndex-{0}-CompleteDictionary.js" // Translate into combined detail data for all Posts var matchData = searchIndex.GetAllTokens().Select(token => new JsTokenMatch { t = token, l = searchIndex.GetMatches(token).Select(weightedEntry => new JsSourceLocation { k = weightedEntry.Key, w = weightedEntry.Weight, l = weightedEntry.SourceLocationsIfRecorded.Select(sourceLocation => new JsSourceLocationDetail { f = sourceLocation.SourceFieldIndex, w = sourceLocation.MatchWeightContribution, t = sourceLocation.TokenIndex, i = sourceLocation.SourceIndex, l = sourceLocation.SourceTokenLength }) }) }); // The all-Post Summary data is going to be an associative array of token to Key/Weight matches (no Source Location data). This won't be // compressed so that the initial searching can be as quick as possible (the trade-off between valuable space at NeoCities hosting vs the // speed of native compression - ie. the gzip that happens over the wire but that doesn't benefit the backend storage - is worth it) var allPostsSummaryDictionary = matchData.ToDictionary( tokenMatch => tokenMatch.t, tokenMatch => tokenMatch.l.Select(weightedEntry => new JsSourceLocation { k = weightedEntry.k, w = weightedEntry.w }) ); var summaryFilename = "SearchIndex-SummaryDictionary.js"; Console.WriteLine("Writing " + summaryFilename); File.WriteAllText( Path.Combine(destination.FullName, summaryFilename), SerialiseToJson(allPostsSummaryDictionary), new UTF8Encoding() ); // The per-Post Detail data is going to be an associative array of token to Key/Weight matches (with Source Location) but only a single // Key will appear in each dictionary. This data WILL be compressed since it takes up a lot of space considering the NeoCities limits. var perPostData = new Dictionary <int, IEnumerable <JsTokenMatch> >(); foreach (var entry in matchData) { foreach (var result in entry.l) { var key = result.k; if (!perPostData.ContainsKey(key)) { perPostData.Add(key, new JsTokenMatch[0]); } perPostData[key] = perPostData[key].Concat(new[] { new JsTokenMatch { t = entry.t, l = new[] { result } } }); } } foreach (var postId in perPostData.Keys) { var detailFilename = "SearchIndex-" + postId + "-CompleteDictionary.lz.txt"; Console.WriteLine("Writing " + detailFilename); File.WriteAllText( Path.Combine(destination.FullName, detailFilename), LZStringCompress.CompressToUTF16( SerialiseToJson( perPostData[postId].ToDictionary( entry => entry.t, entry => entry.l ) ) ), new UTF8Encoding() ); } }