예제 #1
0
        static void AnalizeYearByChangesetAll(string fileChangesetAll, string year)
        {
            var readerOsmBZip2Raw = new FileStream(fileChangesetAll, FileMode.Open);

            string dayStart   = string.Empty;
            string dayEnd     = string.Empty;
            var    dayCurrent = default(DateTime);

            try
            {
                switch (year.Length)
                {
                case 4:     // год
                    var daySY = new DateTime(int.Parse(year.Substring(0, 4)), 1, 1);
                    var dayEY = daySY.AddYears(1);
                    dayStart   = daySY.ToShortDateString();
                    dayEnd     = dayEY.ToShortDateString();
                    dayCurrent = daySY;
                    break;

                case 7:     // месяц
                    var daySM = new DateTime(int.Parse(year.Substring(0, 4)), int.Parse(year.Substring(5, 2)), 1);
                    var dayEM = daySM.AddMonths(1);
                    dayStart   = daySM.ToShortDateString();
                    dayEnd     = dayEM.ToShortDateString();
                    dayCurrent = daySM;
                    break;

                case 10:     // день
                    var daySD = new DateTime(int.Parse(year.Substring(0, 4)), int.Parse(year.Substring(5, 2)), int.Parse(year.Substring(8, 2)));
                    dayStart   = dayEnd = daySD.ToShortDateString();
                    dayCurrent = daySD;
                    break;

                default:
                    Console.WriteLine("Wrong YEAR: {0}", year);
                    return;
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine("Error parse YEAR: {0}\n{1}", year, ex.Message);
                return;
            }

            if (dayCurrent < new DateTime(2009, 4, 21))
            {
                Console.WriteLine("Changesets before 2009-04-21 not supported. Exit.");
                return;
            }

            string backupLine = string.Empty;
            string curDay     = dayCurrent.ToShortDateString();
            int    chainDays  = 1;

            var searchState = SearchState.No;

            var indexBZip2 = GetOrCreateIndexBZip2Achives(readerOsmBZip2Raw);

            Console.Write("Progress: {0} - ", curDay);

            var prevDaySet         = default(HashSet <RowAnalizeStore>);
            var curDaySet          = new HashSet <RowAnalizeStore>();
            var yearSet            = new HashSet <RowAnalizeStore>();
            var hashTableByUid     = new Hashtable();
            var prevHashTableByUid = new Hashtable();

            //var bIndex = indexBZip2.BinarySearch(new BZip2OsmIndex { StartDateTime = dayCurrent });
            //while (indexBZip2[--bIndex].StartDateTime == dayCurrent)
            //{ }
            //readerOsmBZip2Raw.Position = indexBZip2[bIndex + 1].Position;
            var prevBlockIndex = indexBZip2.FindIndex(el => el.StartDateTime == dayCurrent) - 1;

            readerOsmBZip2Raw.Position = indexBZip2[prevBlockIndex].Position;

            while (readerOsmBZip2Raw.Length > readerOsmBZip2Raw.Position && searchState != SearchState.Complete)
            {
                var readerOsmBZip2 = new BZip2InputStream(readerOsmBZip2Raw);
                readerOsmBZip2.IsStreamOwner = false;
                var readerOsm = new StreamReader(readerOsmBZip2, Encoding.UTF8);

                while (!readerOsm.EndOfStream && searchState != SearchState.Complete)
                {
                    var line = readerOsm.ReadLine();
                    if (readerOsm.EndOfStream)
                    {
                        backupLine = line;
                        continue;
                    }
                    if (backupLine.Length > 0)
                    {
                        line       = backupLine + line;
                        backupLine = string.Empty;
                    }

                    if (line.Contains("<changeset id="))
                    {
                        var splits = line.Split('"');
                        if (splits[2] == " created_at=")
                        {
                            var date = splits[3].Substring(0, 10);

                            // раньше времени
                            if (date.CompareTo(dayStart) < 0)
                            {
                                continue;
                            }
                            if (searchState == SearchState.No)
                            {
                                searchState = SearchState.Found;
                            }

                            if (date != curDay)
                            {
                                if (prevDaySet != null)
                                {
                                    var forYear = new HashSet <RowAnalizeStore>(curDaySet);
                                    curDaySet.IntersectWith(prevDaySet);

                                    forYear.ExceptWith(curDaySet);
                                    yearSet.UnionWith(forYear);

                                    prevDaySet.ExceptWith(curDaySet);
                                    yearSet.UnionWith(prevDaySet);


                                    foreach (var row in curDaySet)
                                    {
                                        row.ChainDays        = chainDays;
                                        row.ChangesetsCount += ((RowAnalizeStore)prevHashTableByUid[row.Uid]).ChangesetsCount;
                                    }

                                    prevDaySet.Clear();
                                    prevHashTableByUid.Clear();

                                    if (curDaySet.Count == 0)
                                    {
                                        searchState = SearchState.Complete;
                                    }

                                    Console.SetCursorPosition(Console.CursorLeft - 10, Console.CursorTop);
                                }

                                chainDays++;

                                Console.Write(curDay);

                                if (searchState == SearchState.Found)
                                {
                                    prevDaySet         = curDaySet;
                                    prevHashTableByUid = hashTableByUid;
                                }

                                curDaySet      = new HashSet <RowAnalizeStore>();
                                hashTableByUid = new Hashtable();

                                dayCurrent = dayCurrent.AddDays(1);
                                curDay     = dayCurrent.ToShortDateString();
                            }

                            if (searchState == SearchState.Found && date == curDay && date.CompareTo(dayEnd) < 0)
                            {
                                // в противном случае ананимные правки
                                if (splits.Length >= 10 && splits[10] == " uid=" && splits[8] == " user="******"{0}_{1}.csv", Path.GetFileNameWithoutExtension(fileChangesetAll), year);
                                string fileResult = Path.Combine(dir, fileCsv);
                                if (!File.Exists(fileResult))
                                {
                                    ExportToCsv(fileResult, yearSet);
                                    Console.WriteLine();
                                    Console.WriteLine("Exporting... -> {0} Records: {1}", fileCsv, yearSet.Count);
                                }
                                else
                                {
                                    Console.WriteLine("Skip: {0}", fileResult);
                                }
                                searchState = SearchState.Complete;
                            }
                        }
                        else
                        {
                            //Console.WriteLine();
                            Console.WriteLine("WARNING: bad line - {0}", line);
                        }
                    }
                }
                readerOsm.Dispose();
                readerOsmBZip2.Dispose();
            }

            readerOsmBZip2Raw.Close();
        }
예제 #2
0
        static void Analize(string pathToDir)
        {
            var filesCsv = default(string[]);

            try
            {
                filesCsv = Directory.GetFiles(pathToDir, "*.csv");
            } catch (IOException ioex)
            {
                Console.Error.WriteLine("Dir not found: {0}", pathToDir);
                return;
            }
            filesCsv = filesCsv.Where(f => !f.Contains("-")).ToArray(); // исключить файлы анализа
            Array.Sort(filesCsv);

            var setPrev = new HashSet <RowAnalizeStore>();
            var setCur  = default(HashSet <RowAnalizeStore>);

            int chainDays = 1;

            foreach (var fileCsv in filesCsv)
            {
                setPrev = setCur;
                setCur  = new HashSet <RowAnalizeStore>();

                var readerCsv       = new StreamReader(Path.Combine(pathToDir, fileCsv), Encoding.UTF8);
                var header          = readerCsv.ReadLine();
                var columnsHeader   = header.Split(';');
                int cUid            = Array.FindIndex(columnsHeader, str => str == "uid");
                int cUser           = Array.FindIndex(columnsHeader, str => str == "user");
                int cChangesetCount = Array.FindIndex(columnsHeader, str => str == "changesets_count");

                var helperQuoteColumn = new StringBuilder(128);

                while (!readerCsv.EndOfStream)
                {
                    var line    = readerCsv.ReadLine();
                    var columns = line.Split(';');
                    var row     = new RowAnalizeStore();
                    if (columns.Length == columnsHeader.Length)
                    {
                        row.Uid             = int.Parse(columns[cUid]);
                        row.User            = columns[cUser];
                        row.ChangesetsCount = int.Parse(columns[cChangesetCount]);
                    }
                    else
                    {
                        // Quote
                        var columnQ   = line.Split('"');
                        var curColumn = 0;
                        helperQuoteColumn.Length = 0;
                        bool openQoute = false;
                        for (int i = 0; i < line.Length; i++)
                        {
                            char c = line[i];
                            if (c == ';' && !openQoute || i == line.Length - 1)
                            {
                                if (i == line.Length - 1)
                                {
                                    helperQuoteColumn.Append(c);
                                }

                                if (curColumn == cUid)
                                {
                                    row.Uid = int.Parse(helperQuoteColumn.ToString());
                                }
                                else if (curColumn == cUser)
                                {
                                    row.User = helperQuoteColumn.ToString();
                                }
                                else if (curColumn == cChangesetCount)
                                {
                                    row.ChangesetsCount = int.Parse(helperQuoteColumn.ToString());
                                }

                                helperQuoteColumn.Length = 0;
                                curColumn++;
                            }
                            else if (c == '"')
                            {
                                openQoute = !openQoute;
                            }
                            else
                            {
                                helperQuoteColumn.Append(c);
                            }
                        }
                    }
                    setCur.Add(row);
                }
                readerCsv.Close();

                if (setPrev != null)
                {
                    setCur.IntersectWith(setPrev);
                    var tmpDict = setPrev.ToDictionary(el => el.Uid);

                    foreach (var row in setCur)
                    {
                        row.ChainDays        = chainDays;
                        row.ChangesetsCount += tmpDict[row.Uid].ChangesetsCount;
                    }
                    tmpDict.Clear();

                    var prev       = Path.GetFileNameWithoutExtension(filesCsv[0]);
                    var cur        = Path.GetFileNameWithoutExtension(fileCsv);
                    var fileResult = string.Format("{0}-{1}.csv", prev, cur);
                    if (!File.Exists(fileResult))
                    {
                        //ExportAnalizeToCsv(Path.Combine(pathToDir, fileResult), setCur);
                        ExportToCsv(Path.Combine(pathToDir, fileResult), setCur);
                        Console.WriteLine("Exporting... -> {0} Records: {1}", fileResult, setCur.Count);
                    }
                    chainDays++;
                }
            }
        }