/// <summary>
        /// This generates the "SearchIndex-Titles.js" file and plain text representations of Posts that are required to display search result content (the
        /// JsonSearchIndexDataRecorder generates the javascript search index data which identifies matches but this content is required to map that on to
        /// Post titles and sections of content). These two classes are very specific to my Blog site implementation.
        /// </summary>
        public static void Write(NonNullImmutableList <Post> posts, DirectoryInfo destination)
        {
            if (posts == null)
            {
                throw new ArgumentNullException("posts");
            }
            if (destination == null)
            {
                throw new ArgumentNullException("destination");
            }
            destination.Refresh();
            if (!destination.Exists)
            {
                throw new ArgumentException("destination does not exist");
            }

            // Load the Post Data (all files will be compressed to take up as little space as possible in the NeoCities hosting)
            // - Generate "SearchIndex-Titles.js"
            // - Generate "SearchIndex-Content-{0}.txt"
            var titlesFilename = "SearchIndex-Titles.lz.txt";

            Console.WriteLine("Writing " + titlesFilename);
            var titlesJson = JsonConvert.SerializeObject(
                posts.ToDictionary(
                    p => p.Id,
                    p => new { Title = p.Title.Trim(), Slug = p.Slug }
                    )
                );

            File.WriteAllText(
                Path.Combine(
                    destination.FullName,
                    titlesFilename
                    ),
                LZStringCompress.CompressToUTF16(titlesJson),
                new UTF8Encoding()
                );
            foreach (var post in posts)
            {
                var contentFilename = "SearchIndex-Content-" + post.Id + ".lz.txt";
                Console.WriteLine("Writing " + contentFilename);
                File.WriteAllText(
                    Path.Combine(
                        destination.FullName,
                        contentFilename
                        ),
                    LZStringCompress.CompressToUTF16(
                        post.GetContentAsPlainText()
                        ),
                    new UTF8Encoding()
                    );
            }
        }
Beispiel #2
0
        /// <summary>
        /// This generates the "SearchIndex-SummaryDictionary.js" and "SearchIndex-{PostId}-CompleteDictionary.js" files that are used to perform the full
        /// text site search. The first file maps token matches onto Posts by Key, specifying the match Weight. It doesn't contain the source locations
        /// which map the token back onto the source content in order to keep the file size down. The "SearchIndex-{PostId}-CompleteDictionary.js" files
        /// contain the mappings with source locations for a single Post. These only need to be accessed once a Post has been identified as matching the
        /// search term(s). In order to display matched content, the source locations must be mapped onto the plain text content generated by the
        /// PlainTextContentRecorder. These two classes are very specific to my Blog site implementation.
        /// </summary>
        public static void Write(IIndexData <int> searchIndex, DirectoryInfo destination)
        {
            if (searchIndex == null)
            {
                throw new ArgumentNullException("searchIndexFile");
            }
            if (destination == null)
            {
                throw new ArgumentNullException("destination");
            }
            destination.Refresh();
            if (!destination.Exists)
            {
                throw new ArgumentException("destination does not exist");
            }

            // Get Search Index Data
            // - Generate "SearchIndex-SummaryDictionary.js"
            // - Generate all of "SearchIndex-{0}-CompleteDictionary.js"

            // Translate into combined detail data for all Posts
            var matchData = searchIndex.GetAllTokens().Select(token => new JsTokenMatch
            {
                t = token,
                l = searchIndex.GetMatches(token).Select(weightedEntry => new JsSourceLocation
                {
                    k = weightedEntry.Key,
                    w = weightedEntry.Weight,
                    l = weightedEntry.SourceLocationsIfRecorded.Select(sourceLocation => new JsSourceLocationDetail
                    {
                        f = sourceLocation.SourceFieldIndex,
                        w = sourceLocation.MatchWeightContribution,
                        t = sourceLocation.TokenIndex,
                        i = sourceLocation.SourceIndex,
                        l = sourceLocation.SourceTokenLength
                    })
                })
            });

            // The all-Post Summary data is going to be an associative array of token to Key/Weight matches (no Source Location data). This won't be
            // compressed so that the initial searching can be as quick as possible (the trade-off between valuable space at NeoCities hosting vs the
            // speed of native compression - ie. the gzip that happens over the wire but that doesn't benefit the backend storage - is worth it)
            var allPostsSummaryDictionary = matchData.ToDictionary(
                tokenMatch => tokenMatch.t,
                tokenMatch => tokenMatch.l.Select(weightedEntry => new JsSourceLocation
            {
                k = weightedEntry.k,
                w = weightedEntry.w
            })
                );
            var summaryFilename = "SearchIndex-SummaryDictionary.js";

            Console.WriteLine("Writing " + summaryFilename);
            File.WriteAllText(
                Path.Combine(destination.FullName, summaryFilename),
                SerialiseToJson(allPostsSummaryDictionary),
                new UTF8Encoding()
                );

            // The per-Post Detail data is going to be an associative array of token to Key/Weight matches (with Source Location) but only a single
            // Key will appear in each dictionary. This data WILL be compressed since it takes up a lot of space considering the NeoCities limits.
            var perPostData = new Dictionary <int, IEnumerable <JsTokenMatch> >();

            foreach (var entry in matchData)
            {
                foreach (var result in entry.l)
                {
                    var key = result.k;
                    if (!perPostData.ContainsKey(key))
                    {
                        perPostData.Add(key, new JsTokenMatch[0]);
                    }
                    perPostData[key] = perPostData[key].Concat(new[] {
                        new JsTokenMatch
                        {
                            t = entry.t,
                            l = new[] { result }
                        }
                    });
                }
            }
            foreach (var postId in perPostData.Keys)
            {
                var detailFilename = "SearchIndex-" + postId + "-CompleteDictionary.lz.txt";
                Console.WriteLine("Writing " + detailFilename);
                File.WriteAllText(
                    Path.Combine(destination.FullName, detailFilename),
                    LZStringCompress.CompressToUTF16(
                        SerialiseToJson(
                            perPostData[postId].ToDictionary(
                                entry => entry.t,
                                entry => entry.l
                                )
                            )
                        ),
                    new UTF8Encoding()
                    );
            }
        }