Пример #1
0
        /// <summary>
        /// Records the error in the parsing report and determines if to continue with the parsing.
        /// </summary>
        private bool ShouldContinueAfterError(ParsingErrorReason error, FileParsingReport parsingReport, string usageFile, long lineNumber)
        {
            parsingReport.Errors.Add(new ParsingError(Path.GetFileName(usageFile), lineNumber, error));
            if (parsingReport.Errors.Count > _maximumParsingErrorsCount)
            {
                parsingReport.IsCompletedSuccessfuly = false;
                return(false);
            }

            return(true);
        }
Пример #2
0
        /// <summary>
        /// Parse a usage events file into usage events items.
        /// Expected format is userId,itemId,timestamp[,event-type[,event-weight]
        /// </summary>
        private IEnumerable <SarUsageEvent> ParseUsageEventsFile(string usageFile, FileParsingReport parsingReport, DateTime defaultEventTimestamp)
        {
            using (var reader = new TextFieldParser(usageFile)
            {
                Delimiters = new[] { "," }
            })
            {
                while (!reader.EndOfData)
                {
                    string[] fields;
                    parsingReport.TotalLinesCount++;

                    try
                    {
                        fields = reader.ReadFields();
                    }
                    catch (MalformedLineException ex)
                    {
                        if (ShouldContinueAfterError(ParsingErrorReason.MalformedLine,
                                                     parsingReport, usageFile, ex.LineNumber))
                        {
                            continue;
                        }

                        yield break;
                    }

                    ParsingErrorReason?parsingError;
                    ParsingErrorReason?parsingWarning;
                    SarUsageEvent      usageEvent = ParseUsageEvent(fields, defaultEventTimestamp, out parsingError, out parsingWarning);
                    if (parsingError.HasValue)
                    {
                        if (ShouldContinueAfterError(parsingError.Value,
                                                     parsingReport, usageFile, reader.LineNumber - 1))
                        {
                            continue;
                        }

                        yield break;
                    }

                    if (parsingWarning.HasValue)
                    {
                        parsingReport.Warnings.Add(new ParsingError(Path.GetFileName(usageFile), reader.LineNumber - 1,
                                                                    parsingWarning.Value));

                        continue;
                    }

                    parsingReport.SuccessfulLinesCount++;
                    yield return(usageEvent);
                }
            }
        }
        /// <summary>
        /// Parse a catalog file to <see cref="SarCatalogItem"/> items.
        /// </summary>
        /// <param name="catalogFilePath">The file to parse</param>
        /// <param name="cancellationToken">A cancellation token used to abort the operation</param>
        /// <param name="catalogItems">The parsed catalog items</param>
        /// <param name="featureNames">The parsed names of the catalog items features, in the same order as the feature values in the catalog</param>
        /// <returns>The parsing report</returns>
        public FileParsingReport ParseCatalogFile(
            string catalogFilePath,
            CancellationToken cancellationToken,
            out IList <SarCatalogItem> catalogItems,
            out string[] featureNames)
        {
            if (string.IsNullOrWhiteSpace(catalogFilePath))
            {
                throw new ArgumentNullException(nameof(catalogFilePath));
            }

            if (!File.Exists(catalogFilePath))
            {
                throw new ArgumentException($"Failed to find catalog file under '{catalogFilePath}'", nameof(catalogFilePath));
            }

            _tracer.TraceInformation("Starting catalog file parsing");

            // parse the catalog file into catalog items
            var featureNamesIndex = new ConcurrentDictionary <string, uint>();
            var parsingReport     = new FileParsingReport();
            IList <SarCatalogItem> parsedCatalogItems = ParseCatalogFile(catalogFilePath, featureNamesIndex, parsingReport).ToList();

            if (!parsingReport.IsCompletedSuccessfuly)
            {
                catalogItems = new List <SarCatalogItem>(0);
                featureNames = new string[0];
                _tracer.TraceError("Failed parsing catalog file");
                return(parsingReport);
            }

            cancellationToken.ThrowIfCancellationRequested();

            // clear the feature index as it is no longer needed
            featureNames = featureNamesIndex.OrderBy(kvp => kvp.Value).Select(kvp => kvp.Key).ToArray();
            featureNamesIndex.Clear();

            // inflate feature vectors
            int numberOfFeatures = featureNames.Length;

            catalogItems = parsedCatalogItems.Select(item => InflateFeaturesVector(item, numberOfFeatures)).ToList();

            _tracer.TraceInformation("Finished catalog file parsing");
            return(parsingReport);
        }
        /// <summary>
        /// Parses each file found in the input folder into usage events,
        /// while indexing the found user ids and item ids.
        /// </summary>
        /// <param name="usageFolder">A folder containing the usage files to parse</param>
        /// <param name="cancellationToken">A cancellation token used to abort the operation</param>
        /// <param name="usageEvents">The parsed usage events</param>
        /// <returns>The parsing report</returns>
        public FileParsingReport ParseUsageEventFiles(string usageFolder, CancellationToken cancellationToken, out IList <SarUsageEvent> usageEvents)
        {
            if (string.IsNullOrWhiteSpace(usageFolder))
            {
                throw new ArgumentNullException(nameof(usageFolder));
            }

            if (!Directory.Exists(usageFolder))
            {
                throw new ArgumentException($"Failed to find usage files folder: '{usageFolder}'", nameof(usageFolder));
            }

            _tracer.TraceInformation("Starting usage files parsing");

            DefaultEventTimestamp    = DateTime.UtcNow;
            MostRecentEventTimestamp = DefaultEventTimestamp;
            var parsingReport = new FileParsingReport();

            usageEvents = ParseUsageEventFilesInternal(usageFolder, parsingReport, cancellationToken).ToList();

            _tracer.TraceInformation("Finished usage files parsing");
            return(parsingReport);
        }
        /// <summary>
        /// Parse the input catalog file into catalog items.
        /// </summary>
        private IEnumerable <SarCatalogItem> ParseCatalogFile(
            string catalogFilePath,
            ConcurrentDictionary <string, uint> featureNamesIndex,
            FileParsingReport parsingReport)
        {
            using (var reader = new TextFieldParser(catalogFilePath)
            {
                Delimiters = new[] { "," }
            })
            {
                while (!reader.EndOfData)
                {
                    string[] fields;
                    try
                    {
                        parsingReport.TotalLinesCount++;
                        fields = reader.ReadFields();
                    }
                    catch (MalformedLineException)
                    {
                        parsingReport.Errors.Add(
                            new ParsingError(Path.GetFileName(catalogFilePath), reader.ErrorLineNumber,
                                             ParsingErrorReason.MalformedLine));
                        if (parsingReport.Errors.Count > _maximumParsingErrorsCount)
                        {
                            parsingReport.IsCompletedSuccessfuly = false;
                            yield break;
                        }

                        continue;
                    }

                    ParsingErrorReason?parsingError;
                    ParsingErrorReason?parsingWarning;
                    SarCatalogItem     catalogItem = ParseCatalogItem(fields, featureNamesIndex, out parsingError, out parsingWarning);
                    if (parsingError.HasValue)
                    {
                        parsingReport.Errors.Add(
                            new ParsingError(Path.GetFileName(catalogFilePath), reader.LineNumber - 1, parsingError.Value));
                        if (parsingReport.Errors.Count > _maximumParsingErrorsCount)
                        {
                            parsingReport.IsCompletedSuccessfuly = false;
                            yield break;
                        }

                        continue;
                    }

                    if (parsingWarning.HasValue)
                    {
                        parsingReport.Warnings.Add(
                            new ParsingError(Path.GetFileName(catalogFilePath), reader.LineNumber - 1, parsingWarning.Value));

                        continue;
                    }

                    parsingReport.SuccessfulLinesCount++;
                    yield return(catalogItem);
                }
            }

            // no more lines to parse - mark the parsing as successful
            parsingReport.IsCompletedSuccessfuly = true;
        }
Пример #6
0
        /// <summary>
        /// Parses each file found in the input folder into usage events,
        /// while indexing the found user ids and item ids.
        /// </summary>
        private IEnumerable <SarUsageEvent> ParseUsageEventFilesInternal(string usageFolder, FileParsingReport parsingReport, DateTime defaultEventTimestamp, CancellationToken cancellationToken)
        {
            foreach (string usageFile in Directory.GetFiles(usageFolder))
            {
                cancellationToken.ThrowIfCancellationRequested();
                _tracer.TraceInformation($"Parsing file {Path.GetFileName(usageFile)} ({(double)new FileInfo(usageFile).Length/(1024*1024):F2} MB)");

                foreach (SarUsageEvent sarUsageEvent in ParseUsageEventsFile(usageFile, parsingReport, defaultEventTimestamp))
                {
                    yield return(sarUsageEvent);
                }

                if (parsingReport.Errors.Count > _maximumParsingErrorsCount)
                {
                    parsingReport.IsCompletedSuccessfuly = false;
                    yield break;
                }
            }

            // no more lines to parse - mark the parsing as successful
            parsingReport.IsCompletedSuccessfuly = true;
        }