public void SetRedirect(string path, HttpStatusCode status, Uri redirectUri)
        {
            string targetUri = redirectUri.PathAndQuery;

            if (!_baseUri.IsSameHost(redirectUri))
            {
                targetUri = redirectUri.AbsoluteUri;
            }

            _data.Update(path,
                         rec =>
            {
                Modified = Modified || rec.HttpStatus != (uint)status ||
                           rec.ContentRedirect != targetUri;

                ContentRecord.Builder builder = rec.ToBuilder()
                                                .SetContentUri(path)
                                                .SetLastCrawled(CrawlTime)
                                                .SetLastValid(CrawlTime)
                                                .SetHttpStatus((uint)status)
                                                .SetContentRedirect(targetUri)
                ;
                return(builder.Build());
            }
                         );
            AddUri(redirectUri);
        }
        public void Rename(
            [Argument("page", "p", Description = "The full http address of the page to move the source content to.")]
            string targetLink,
            [Argument("source", "s", Description = "The full http address of the page you want to move.")]
            string sourceLink,
            [Argument("redirect", "r", DefaultValue = true, Description = "True to insert a redirect after moving the content.")]
            bool redirect)
        {
            Uri targetUri = new Uri(targetLink, UriKind.Absolute);
            Uri sourceUri = new Uri(sourceLink, UriKind.Absolute);

            Check.Assert <InvalidOperationException>(sourceUri.IsSameHost(targetUri), "The source and target should be in the same site.");

            using (ContentStorage store = new ContentStorage(StoragePath(sourceLink), false))
            {
                store.Rename(sourceUri.NormalizedPathAndQuery(), targetUri.NormalizedPathAndQuery());
                if (redirect)
                {
                    DateTime time = DateTime.Now;
                    ContentRecord.Builder builder = store.New(sourceUri.NormalizedPathAndQuery(), time);
                    builder
                    .SetHttpStatus((uint)HttpStatusCode.Redirect)
                    .SetContentRedirect(targetUri.NormalizedPathAndQuery())
                    ;
                    store.Add(builder.ContentUri, builder.Build());
                }
            }
        }
        public void SetContents(string path, HttpStatusCode status, string contentType, string etag, DateTime?modified, byte[] contents)
        {
            if (contents.Length == 0)
            {
                Console.Error.WriteLine("{0} - {1}  (Content is empty)", (int)status, path);
            }

            ContentRecord rec           = _data[path];
            ITransactable pendingUpdate = null;

            try
            {
                ContentRecord.Builder builder = rec.ToBuilder()
                                                .SetContentUri(path)
                                                .SetLastCrawled(CrawlTime)
                                                .SetLastValid(CrawlTime)
                                                .SetHttpStatus((uint)status)
                ;

                builder.ClearContentRedirect();
                builder.SetContentType(contentType);
                builder.SetContentLength((uint)contents.Length);
                if (!String.IsNullOrEmpty(etag))
                {
                    builder.SetETag(etag);
                }

                string hash = Hash.SHA256(contents).ToString();
                if (hash != builder.HashOriginal)
                {
                    Modified = true;
                    builder.SetHashOriginal(hash);
                    builder.SetDateModified(CrawlTime);
                    pendingUpdate = _data.WriteContent(builder, contents);
                }

                if (_data.AddOrUpdate(path, rec = builder.Build()))
                {
                    if (pendingUpdate != null)
                    {
                        pendingUpdate.Commit();
                        pendingUpdate.Dispose();
                        pendingUpdate = null;
                    }
                }
            }
            finally
            {
                if (pendingUpdate != null)
                {
                    pendingUpdate.Rollback();
                    pendingUpdate.Dispose();
                }
            }

            ProcessFileContent(rec, contents);
        }
 public void SetNotModified(string path)
 {
     _data.Update(path,
                  rec =>
     {
         ContentRecord.Builder builder = rec.ToBuilder()
                                         .SetLastCrawled(CrawlTime)
                                         .SetLastValid(CrawlTime)
         ;
         return(builder.Build());
     }
                  );
 }
 public void SetHttpError(string path, HttpStatusCode status)
 {
     Console.Error.WriteLine("{0} - {1}", (int)status, path);
     _data.Update(path,
                  rec =>
     {
         ContentRecord.Builder builder = rec.ToBuilder()
                                         .SetContentUri(path)
                                         .SetLastCrawled(CrawlTime)
                                         .SetHttpStatus((uint)status)
         ;
         return(builder.Build());
     }
                  );
 }
Exemple #6
0
 public ITransactable WriteContent(ContentRecord.Builder builder, byte[] contents)
 {
     byte[] compressed;
     builder.SetContentLength((uint)contents.Length);
     if (contents.TryCompress(out compressed))
     {
         builder.SetHashContents(Hash.SHA256(compressed).ToString());
         builder.SetCompressedLength((uint)compressed.Length);
         return(WriteBytes(builder, compressed));
     }
     else
     {
         builder.SetHashContents(Hash.SHA256(contents).ToString());
         builder.ClearCompressedLength();
         return(WriteBytes(builder, contents));
     }
 }
Exemple #7
0
        private ITransactable WriteBytes(ContentRecord.Builder builder, byte[] content)
        {
            AssertModify();
            if (!builder.HasContentStoreId)
            {
                ulong  fid;
                string name;
                do
                {
                    fid  = Guid.NewGuid().ToUInt64();
                    name = FileName(fid);
                }while(File.Exists(name));

                builder.SetContentStoreId(fid);
            }

            ReplaceFile f = new ReplaceFile(FileName(builder.ContentStoreId));

            f.WriteAllBytes(content);
            return(f);
        }
Exemple #8
0
        public void WriteContent(ContentRecord rec, byte[] bytes)
        {
            bool[] modified = new bool[1];
            modified[0] = false;
            bool success = Update(rec.ContentUri,
                                  r =>
            {
                ContentRecord.Builder b = r.ToBuilder();
                using (ITransactable t = WriteContent(b, bytes))
                    t.Commit();
                ContentRecord newRec = b.Build();
                modified[0]          = !newRec.Equals(r);
                return(newRec);
            }
                                  );

            if (!success && modified[0])
            {
                throw new ApplicationException("Record not found.");
            }
        }
        public void CopyTo(ContentStorage writer, Func <ContentRecord, byte[], byte[]> fnprocess)
        {
            bool success;

            foreach (KeyValuePair <string, ContentRecord> item in _content)
            {
                ContentRecord.Builder builder = item.Value.ToBuilder();
                if (item.Value.HasContentStoreId)
                {
                    byte[] data = _content.ReadContent(item.Value, true);
                    if (fnprocess != null)
                    {
                        data = fnprocess(item.Value, data);
                    }

                    using (ITransactable trans = writer.WriteContent(builder, data))
                    {
                        success = Overwrite
                                ? writer.AddOrUpdate(item.Key, builder.Build())
                                : writer.Add(item.Key, builder.Build());
                        if (success)
                        {
                            trans.Commit();
                        }
                    }
                }
                else
                {
                    success = Overwrite
                            ? writer.AddOrUpdate(item.Key, builder.Build())
                            : writer.Add(item.Key, builder.Build());
                }

                if (!success)
                {
                    Console.Error.WriteLine("Path already exists " + item.Key);
                }
            }
        }
        public void Deduplicate(
            [Argument("site", "s", Description = "The root http address of the website copy.")]
            string site,
            [Argument("remove", "r", DefaultValue = false, Description = "True to remove the page and modify source links, otherwise inserts a redirect.")]
            bool remove,
            [Argument("noprompt", "q", DefaultValue = false, Description = "True to stop prompt for confirmation before changing content.")]
            bool noPrompt)
        {
            using (ContentStorage store = new ContentStorage(StoragePath(site), false))
            {
                Dictionary <string, string> replacements = new Dictionary <string, string>(StringComparer.Ordinal);
                Dictionary <string, string> hashes       = new Dictionary <string, string>(StringComparer.Ordinal);
                foreach (KeyValuePair <string, ContentRecord> item in store)
                {
                    if (item.Value.HasHashContents)
                    {
                        string original;
                        if (hashes.TryGetValue(item.Value.HashContents, out original))
                        {
                            replacements[item.Key] = original;
                            Console.WriteLine("{0,-38} => {1,-38}", item.Key, original);
                        }
                        else
                        {
                            hashes.Add(item.Value.HashContents, item.Key);
                        }
                    }
                }

                if (replacements.Count > 0 &&
                    (noPrompt || new ConfirmPrompt().Continue("Replace all of the above links")))
                {
                    Uri baseUri = new Uri(site, UriKind.Absolute);

                    if (remove)
                    {
                        ContentParser parser = new ContentParser(store, baseUri);
                        parser.RewriteUri += u =>
                        {
                            string target;
                            if (u.IsSameHost(baseUri) && replacements.TryGetValue(u.NormalizedPathAndQuery(), out target))
                            {
                                return(new Uri(baseUri, target));
                            }
                            return(u);
                        };
                        parser.ProcessAll();
                    }
                    foreach (string removed in replacements.Keys)
                    {
                        ContentRecord rec = store[removed];
                        store.Remove(removed);
                        if (!remove)
                        {
                            ContentRecord.Builder builder = rec.ToBuilder();
                            builder
                            .ClearCompressedLength()
                            .ClearContentLength()
                            .ClearContentStoreId()
                            .ClearContentType()
                            .ClearHashContents()
                            .SetHttpStatus((uint)HttpStatusCode.Redirect)
                            .SetContentRedirect(replacements[removed])
                            ;
                            store.Add(removed, builder.Build());
                        }
                    }
                }
            }
        }