static void AnalizeYearByChangesetAll(string fileChangesetAll, string year) { var readerOsmBZip2Raw = new FileStream(fileChangesetAll, FileMode.Open); string dayStart = string.Empty; string dayEnd = string.Empty; var dayCurrent = default(DateTime); try { switch (year.Length) { case 4: // год var daySY = new DateTime(int.Parse(year.Substring(0, 4)), 1, 1); var dayEY = daySY.AddYears(1); dayStart = daySY.ToShortDateString(); dayEnd = dayEY.ToShortDateString(); dayCurrent = daySY; break; case 7: // месяц var daySM = new DateTime(int.Parse(year.Substring(0, 4)), int.Parse(year.Substring(5, 2)), 1); var dayEM = daySM.AddMonths(1); dayStart = daySM.ToShortDateString(); dayEnd = dayEM.ToShortDateString(); dayCurrent = daySM; break; case 10: // день var daySD = new DateTime(int.Parse(year.Substring(0, 4)), int.Parse(year.Substring(5, 2)), int.Parse(year.Substring(8, 2))); dayStart = dayEnd = daySD.ToShortDateString(); dayCurrent = daySD; break; default: Console.WriteLine("Wrong YEAR: {0}", year); return; } } catch (Exception ex) { Console.WriteLine("Error parse YEAR: {0}\n{1}", year, ex.Message); return; } if (dayCurrent < new DateTime(2009, 4, 21)) { Console.WriteLine("Changesets before 2009-04-21 not supported. Exit."); return; } string backupLine = string.Empty; string curDay = dayCurrent.ToShortDateString(); int chainDays = 1; var searchState = SearchState.No; var indexBZip2 = GetOrCreateIndexBZip2Achives(readerOsmBZip2Raw); Console.Write("Progress: {0} - ", curDay); var prevDaySet = default(HashSet <RowAnalizeStore>); var curDaySet = new HashSet <RowAnalizeStore>(); var yearSet = new HashSet <RowAnalizeStore>(); var hashTableByUid = new Hashtable(); var prevHashTableByUid = new Hashtable(); //var bIndex = indexBZip2.BinarySearch(new BZip2OsmIndex { StartDateTime = dayCurrent }); //while (indexBZip2[--bIndex].StartDateTime == dayCurrent) //{ } //readerOsmBZip2Raw.Position = indexBZip2[bIndex + 1].Position; var prevBlockIndex = indexBZip2.FindIndex(el => el.StartDateTime == dayCurrent) - 1; readerOsmBZip2Raw.Position = indexBZip2[prevBlockIndex].Position; while (readerOsmBZip2Raw.Length > readerOsmBZip2Raw.Position && searchState != SearchState.Complete) { var readerOsmBZip2 = new BZip2InputStream(readerOsmBZip2Raw); readerOsmBZip2.IsStreamOwner = false; var readerOsm = new StreamReader(readerOsmBZip2, Encoding.UTF8); while (!readerOsm.EndOfStream && searchState != SearchState.Complete) { var line = readerOsm.ReadLine(); if (readerOsm.EndOfStream) { backupLine = line; continue; } if (backupLine.Length > 0) { line = backupLine + line; backupLine = string.Empty; } if (line.Contains("<changeset id=")) { var splits = line.Split('"'); if (splits[2] == " created_at=") { var date = splits[3].Substring(0, 10); // раньше времени if (date.CompareTo(dayStart) < 0) { continue; } if (searchState == SearchState.No) { searchState = SearchState.Found; } if (date != curDay) { if (prevDaySet != null) { var forYear = new HashSet <RowAnalizeStore>(curDaySet); curDaySet.IntersectWith(prevDaySet); forYear.ExceptWith(curDaySet); yearSet.UnionWith(forYear); prevDaySet.ExceptWith(curDaySet); yearSet.UnionWith(prevDaySet); foreach (var row in curDaySet) { row.ChainDays = chainDays; row.ChangesetsCount += ((RowAnalizeStore)prevHashTableByUid[row.Uid]).ChangesetsCount; } prevDaySet.Clear(); prevHashTableByUid.Clear(); if (curDaySet.Count == 0) { searchState = SearchState.Complete; } Console.SetCursorPosition(Console.CursorLeft - 10, Console.CursorTop); } chainDays++; Console.Write(curDay); if (searchState == SearchState.Found) { prevDaySet = curDaySet; prevHashTableByUid = hashTableByUid; } curDaySet = new HashSet <RowAnalizeStore>(); hashTableByUid = new Hashtable(); dayCurrent = dayCurrent.AddDays(1); curDay = dayCurrent.ToShortDateString(); } if (searchState == SearchState.Found && date == curDay && date.CompareTo(dayEnd) < 0) { // в противном случае ананимные правки if (splits.Length >= 10 && splits[10] == " uid=" && splits[8] == " user="******"{0}_{1}.csv", Path.GetFileNameWithoutExtension(fileChangesetAll), year); string fileResult = Path.Combine(dir, fileCsv); if (!File.Exists(fileResult)) { ExportToCsv(fileResult, yearSet); Console.WriteLine(); Console.WriteLine("Exporting... -> {0} Records: {1}", fileCsv, yearSet.Count); } else { Console.WriteLine("Skip: {0}", fileResult); } searchState = SearchState.Complete; } } else { //Console.WriteLine(); Console.WriteLine("WARNING: bad line - {0}", line); } } } readerOsm.Dispose(); readerOsmBZip2.Dispose(); } readerOsmBZip2Raw.Close(); }
static void Analize(string pathToDir) { var filesCsv = default(string[]); try { filesCsv = Directory.GetFiles(pathToDir, "*.csv"); } catch (IOException ioex) { Console.Error.WriteLine("Dir not found: {0}", pathToDir); return; } filesCsv = filesCsv.Where(f => !f.Contains("-")).ToArray(); // исключить файлы анализа Array.Sort(filesCsv); var setPrev = new HashSet <RowAnalizeStore>(); var setCur = default(HashSet <RowAnalizeStore>); int chainDays = 1; foreach (var fileCsv in filesCsv) { setPrev = setCur; setCur = new HashSet <RowAnalizeStore>(); var readerCsv = new StreamReader(Path.Combine(pathToDir, fileCsv), Encoding.UTF8); var header = readerCsv.ReadLine(); var columnsHeader = header.Split(';'); int cUid = Array.FindIndex(columnsHeader, str => str == "uid"); int cUser = Array.FindIndex(columnsHeader, str => str == "user"); int cChangesetCount = Array.FindIndex(columnsHeader, str => str == "changesets_count"); var helperQuoteColumn = new StringBuilder(128); while (!readerCsv.EndOfStream) { var line = readerCsv.ReadLine(); var columns = line.Split(';'); var row = new RowAnalizeStore(); if (columns.Length == columnsHeader.Length) { row.Uid = int.Parse(columns[cUid]); row.User = columns[cUser]; row.ChangesetsCount = int.Parse(columns[cChangesetCount]); } else { // Quote var columnQ = line.Split('"'); var curColumn = 0; helperQuoteColumn.Length = 0; bool openQoute = false; for (int i = 0; i < line.Length; i++) { char c = line[i]; if (c == ';' && !openQoute || i == line.Length - 1) { if (i == line.Length - 1) { helperQuoteColumn.Append(c); } if (curColumn == cUid) { row.Uid = int.Parse(helperQuoteColumn.ToString()); } else if (curColumn == cUser) { row.User = helperQuoteColumn.ToString(); } else if (curColumn == cChangesetCount) { row.ChangesetsCount = int.Parse(helperQuoteColumn.ToString()); } helperQuoteColumn.Length = 0; curColumn++; } else if (c == '"') { openQoute = !openQoute; } else { helperQuoteColumn.Append(c); } } } setCur.Add(row); } readerCsv.Close(); if (setPrev != null) { setCur.IntersectWith(setPrev); var tmpDict = setPrev.ToDictionary(el => el.Uid); foreach (var row in setCur) { row.ChainDays = chainDays; row.ChangesetsCount += tmpDict[row.Uid].ChangesetsCount; } tmpDict.Clear(); var prev = Path.GetFileNameWithoutExtension(filesCsv[0]); var cur = Path.GetFileNameWithoutExtension(fileCsv); var fileResult = string.Format("{0}-{1}.csv", prev, cur); if (!File.Exists(fileResult)) { //ExportAnalizeToCsv(Path.Combine(pathToDir, fileResult), setCur); ExportToCsv(Path.Combine(pathToDir, fileResult), setCur); Console.WriteLine("Exporting... -> {0} Records: {1}", fileResult, setCur.Count); } chainDays++; } } }