Ejemplo n.º 1
0
        private void Save(IItemConsumer consumer, DateTime lastCutoffWritten)
        {
            Trace.WriteLine("Saving...");
            consumer.Save();
            Trace.WriteLine("Save Complete.");

            // Record the new last cutoff written
            ItemProviderUtilities.SaveLastCutoff(this.Configuration.ArribaTable, this.ConfigurationName + ".CSV", lastCutoffWritten);
        }
Ejemplo n.º 2
0
        private void Save(IItemConsumer consumer, Stopwatch saveWatch, DateTime lastCutoffWritten)
        {
            // Save the data itself
            Trace.WriteLine("Saving...");
            saveWatch.Start();
            consumer.Save();
            saveWatch.Stop();
            Trace.WriteLine("Save Complete.");

            // Record the new last cutoff written
            ItemProviderUtilities.SaveLastCutoff(this.Configuration.ArribaTable, this.ConfigurationName, lastCutoffWritten);
        }
Ejemplo n.º 3
0
        public void Import(IItemConsumer consumer)
        {
            DateTime  lastCutoffWritten = ItemProviderUtilities.LoadLastCutoff(this.Configuration.ArribaTable, this.ConfigurationName + ".CSV", false);
            Stopwatch saveWatch         = null;

            CsvReaderItemProvider provider = null;

            try
            {
                provider = new CsvReaderItemProvider(this.Configuration.ArribaTable, this.ChangedDateColumn, lastCutoffWritten, DateTime.UtcNow);

                while (true)
                {
                    // Get another batch of items
                    Console.Write("[");
                    DataBlock block = provider.GetNextBlock(BatchSize);
                    if (block == null || block.RowCount == 0)
                    {
                        break;
                    }

                    // Append them
                    Console.Write("]");
                    consumer.Append(block);

                    // Track the last item changed date
                    DateTime lastItemInBlock;
                    Value.Create(block[block.RowCount - 1, block.IndexOfColumn(this.ChangedDateColumn)]).TryConvert <DateTime>(out lastItemInBlock);
                    if (lastItemInBlock > lastCutoffWritten)
                    {
                        lastCutoffWritten = lastItemInBlock;
                    }

                    if (saveWatch == null)
                    {
                        saveWatch = Stopwatch.StartNew();
                    }
                    if (saveWatch.Elapsed.TotalMinutes > WriteAfterMinutes)
                    {
                        Save(consumer, lastCutoffWritten);
                        saveWatch.Restart();
                    }
                }
            }
            finally
            {
                provider.Dispose();

                Save(consumer, lastCutoffWritten);
                consumer.Dispose();
            }
        }
        public static ProductStudioFullHistory LoadFullHistory(string rawHistory)
        {
            if (String.IsNullOrWhiteSpace(rawHistory))
            {
                return(new ProductStudioFullHistory());
            }

            Debug.Assert(xmlSerializer != null, "xmlSerializer must be initialized in the static constructor.");

            //Comes from database as ChangeHistroy entries with no root node. Need to wrap in root node to deserialize with XmlSerializer.
            string rawHistoryXml = String.Format("<ProductStudioFullHistory><ProductStudioChangeHistoryRecords>{0}</ProductStudioChangeHistoryRecords></ProductStudioFullHistory>", rawHistory);

            using (TextReader reader = new StringReader(rawHistoryXml))
            {
                try
                {
                    ProductStudioFullHistory hist = (ProductStudioFullHistory)xmlSerializer.Deserialize(reader);

                    foreach (ProductStudioChangeHistory change in hist.ProductStudioChangeHistoryRecords)
                    {
                        change.Value = ItemProviderUtilities.ConvertLineBreaksToHtml(change.Value);
                    }

                    return(hist);
                }
                catch (InvalidOperationException)
                {
                    //There was data in the field, but it couldn't be deserialized as ProductStudioChangeHistory records.
                    //We still want the data so create a single ChangeHistory record and return the entire contents of this
                    //as the value.
                    ProductStudioFullHistory hist = new ProductStudioFullHistory();
                    hist.ProductStudioChangeHistoryRecords.Add(new ProductStudioChangeHistory()
                    {
                        Value = rawHistory
                    });
                    return(hist);
                }
            }
        }
Ejemplo n.º 5
0
        private static int Main(string[] args)
        {
            if (args.Length < 2)
            {
                Usage();
                return(-1);
            }

            string configurationName = args[0];
            string mode = args[1].ToLowerInvariant();

            using (FileLock locker = FileLock.TryGet(String.Format("Arriba.TfsWorkItemCrawler.{0}.lock", configurationName)))
            {
                try
                {
                    // Ensure we got the file lock (no duplicate crawlers
                    if (locker == null)
                    {
                        Console.WriteLine("Another instance running. Stopping.");
                        return(-2);
                    }

                    // Load the Configuration [up two or three folders, for Databases\<configurationName>\config.json
                    string thisExePath    = Path.GetDirectoryName(Assembly.GetEntryAssembly().Location);
                    string configJsonPath = Path.Combine(thisExePath, @"..\..\Databases", configurationName, "config.json");
                    if (!File.Exists(configJsonPath))
                    {
                        configJsonPath = Path.Combine(thisExePath, @"..\..\..\Databases", configurationName, "config.json");
                    }
                    string configJson = File.ReadAllText(configJsonPath);

                    CrawlerConfiguration config = JsonConvert.DeserializeObject <CrawlerConfiguration>(configJson);
                    config.ConfigurationName = configurationName;

                    // Password storage mode
                    if (mode.Equals("-password", StringComparison.OrdinalIgnoreCase))
                    {
                        return(TfsItemProvider.EncryptPassword(config));
                    }

                    // Build the item consumer
                    IItemConsumer consumer = ItemConsumerUtilities.Build(config);

                    // Build the item provider
                    IItemProvider provider = ItemProviderUtilities.Build(config);

                    // Determine the list of columns to crawl
                    IEnumerable <ColumnDetails> columns = provider.GetColumns();
                    if (config.ColumnsToInclude.Count > 0)
                    {
                        columns = columns.Where(cd => config.ColumnsToInclude.Contains(cd.Name));
                    }
                    if (config.ColumnsToExclude.Count > 0)
                    {
                        columns = columns.Where(cd => !config.ColumnsToExclude.Contains(cd.Name));
                    }
                    List <ColumnDetails> columnsToAdd = new List <ColumnDetails>(columns);

                    // Create the target table (if it doesn't already exist)
                    consumer.CreateTable(columnsToAdd, config.LoadPermissions());

                    // Build a crawler and crawl the items in restartable order
                    DefaultCrawler crawler = new DefaultCrawler(config, columnsToAdd.Select((cd) => cd.Name), configurationName, !mode.Equals("-i"));
                    crawler.Crawl(provider, consumer);

                    return(0);
                }
                catch (AggregateException ex)
                {
                    foreach (Exception inner in ex.InnerExceptions)
                    {
                        Trace.TraceError(String.Format("ERROR: {0}\r\n{1}", Environment.CommandLine, inner));
                    }

                    return(-2);
                }
                catch (Exception ex)
                {
                    Trace.TraceError(String.Format("ERROR: {0}\r\n{1}", Environment.CommandLine, ex));
                    return(-2);
                }
            }
        }
Ejemplo n.º 6
0
        public void Crawl(IItemProvider provider, IItemConsumer consumer)
        {
            ParallelOptions parallelOptions = new ParallelOptions()
            {
                MaxDegreeOfParallelism = CrawlMaxParallelism
            };
            object locker = new object();

            int       exceptionCount          = 0;
            int       itemCount               = 0;
            DateTime  lastChangedItemAppended = DateTime.MinValue;
            Stopwatch readWatch               = new Stopwatch();
            Stopwatch writeWatch              = new Stopwatch();
            Stopwatch saveWatch               = new Stopwatch();
            Stopwatch sinceLastWrite          = null;

            try
            {
                DateTime previousLastChangedItem = ItemProviderUtilities.LoadLastCutoff(this.Configuration.ArribaTable, this.ConfigurationName, this.Rebuild);
                DateTime now = DateTime.UtcNow;
                lastChangedItemAppended = previousLastChangedItem;

                Trace.WriteLine(string.Format("Last Updated item was updated at '{0}'...", previousLastChangedItem));

                // For clean crawl, get more than a day at a time until first items found
                int intervalDays = ((now - previousLastChangedItem).TotalDays > 365 ? 365 : 1);

                DateTime end;
                for (DateTime start = previousLastChangedItem; start <= now; start = end)
                {
                    end = start.AddDays(intervalDays);

                    // Find the set of items to retrieve
                    Trace.WriteLine(string.Format("Identifying items changed between '{0}' and '{1}'...", start, end));
                    IList <ItemIdentity> itemsToGet = null;
                    itemsToGet = provider.GetItemsChangedBetween(start, end);

                    // If few or no items are returned, crawl by week. If many, by day
                    if (itemsToGet != null && itemsToGet.Count > 1000)
                    {
                        intervalDays = 1;
                    }
                    else
                    {
                        if (intervalDays != 365)
                        {
                            intervalDays = 7;
                        }
                    }

                    // If no items in this batch, get the next batch
                    if (itemsToGet == null || itemsToGet.Count == 0)
                    {
                        continue;
                    }

                    // After getting the first item list, save every 30 minutes
                    if (sinceLastWrite == null)
                    {
                        sinceLastWrite = Stopwatch.StartNew();
                    }

                    // Get the items in blocks in ascending order by Changed Date [restartability]
                    Trace.WriteLine(string.Format("Downloading {0:n0} items...", itemsToGet.Count));

                    List <IList <ItemIdentity> > pages = new List <IList <ItemIdentity> >(itemsToGet.OrderBy(ii => ii.ChangedDate).Page(BatchSize));

                    for (int nextPageIndex = 0; nextPageIndex < pages.Count; nextPageIndex += CrawlMaxParallelism)
                    {
                        try
                        {
                            int pageCountThisIteration = Math.Min(CrawlMaxParallelism, pages.Count - nextPageIndex);

                            // Get items in parallel
                            readWatch.Start();
                            DataBlock[] blocks = new DataBlock[pageCountThisIteration];
                            Parallel.For(0, pageCountThisIteration, (relativeIndex) =>
                            {
                                try
                                {
                                    // Read the next page of items
                                    Console.Write("[");
                                    blocks[relativeIndex] = provider.GetItemBlock(pages[nextPageIndex + relativeIndex], this.ColumnNames);
                                }
                                catch (Exception e)
                                {
                                    exceptionCount++;
                                    Trace.WriteLine(string.Format("Exception when fetching {0} items. Error: {1}\r\nItem IDs: {2}", ConfigurationName, e.ToString(), String.Join(", ", pages[nextPageIndex + relativeIndex].Select(r => r.ID))));
                                    if (exceptionCount > 10)
                                    {
                                        throw;
                                    }
                                }
                            });
                            readWatch.Stop();

                            // Append items serially
                            writeWatch.Start();
                            for (int relativeIndex = 0; relativeIndex < pageCountThisIteration; ++relativeIndex)
                            {
                                try
                                {
                                    // Write the next page of items
                                    Console.Write("]");
                                    consumer.Append(blocks[relativeIndex]);

                                    // Track total count appended
                                    itemCount += blocks[relativeIndex].RowCount;

                                    // Track last changed date written
                                    DateTime latestCutoffInGroup = pages[nextPageIndex + relativeIndex].Max(ii => ii.ChangedDate);
                                    if (latestCutoffInGroup > lastChangedItemAppended)
                                    {
                                        lastChangedItemAppended = latestCutoffInGroup;
                                    }
                                }
                                catch (Exception e)
                                {
                                    exceptionCount++;
                                    Trace.WriteLine(string.Format("Exception when writing {0} items. Error: {1}\r\nItem IDs: {2}", ConfigurationName, e.ToString(), String.Join(", ", pages[nextPageIndex + relativeIndex].Select(r => r.ID))));
                                    if (exceptionCount > 10)
                                    {
                                        throw;
                                    }
                                }
                            }
                            writeWatch.Stop();

                            // Save table if enough time has elapsed
                            if (sinceLastWrite.Elapsed.TotalMinutes > WriteAfterMinutes)
                            {
                                Console.WriteLine();

                                try
                                {
                                    Save(consumer, saveWatch, lastChangedItemAppended);
                                    sinceLastWrite.Restart();
                                }
                                catch (Exception e)
                                {
                                    exceptionCount++;
                                    Trace.WriteLine(string.Format("Exception saving {0} batch. Error: {1}", ConfigurationName, e.ToString()));

                                    if (exceptionCount > 10)
                                    {
                                        throw;
                                    }
                                }
                            }
                        }
                        catch (Exception)
                        {
                            Trace.WriteLine(String.Format("Crawler Failed. At {1:u}, {2:n0} items, {3} read, {4} write, {5} save for '{0}'.", this.ConfigurationName, DateTime.Now, itemCount, readWatch.Elapsed.ToFriendlyString(), writeWatch.Elapsed.ToFriendlyString(), saveWatch.Elapsed.ToFriendlyString()));
                            throw;
                        }
                    }

                    Console.WriteLine();
                }
            }
            finally
            {
                // Disconnect from the source
                if (provider != null)
                {
                    provider.Dispose();
                    provider = null;
                }

                // Save (if any items were added) and disconnect from the consumer
                if (consumer != null)
                {
                    if (itemCount > 0)
                    {
                        Save(consumer, saveWatch, lastChangedItemAppended);
                    }

                    consumer.Dispose();
                    consumer = null;
                }

                Console.WriteLine();

                // Old tracing logic
                Trace.WriteLine(String.Format("Crawler Done. At {1:u}, {2:n0} items, {3} read, {4} write, {5} save for '{0}'.", this.ConfigurationName, DateTime.Now, itemCount, readWatch.Elapsed.ToFriendlyString(), writeWatch.Elapsed.ToFriendlyString(), saveWatch.Elapsed.ToFriendlyString()));
            }
        }