public void Initialize() { cpu.EnablePerformanceCounters(); for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++) { ThreadAffinity.Set(1UL << threadIdx); // Set PERF_CTR0 to count retired branches Ring0.WriteMsr(MSR_PERF_CTL_0, GetPerfCtlValue(0xC2, 0, true, true, false, false, true, false, 0, 0, false, false)); // PERF_CTR1 = mispredicted retired branches Ring0.WriteMsr(MSR_PERF_CTL_1, GetPerfCtlValue(0xC3, 0, true, true, false, false, true, false, 0, 0, false, false)); // PERF_CTR2 = retired instrs Ring0.WriteMsr(MSR_PERF_CTL_2, GetPerfCtlValue(0xC0, 0, true, true, false, false, true, false, 0, 0, false, false)); // PERF_CTR3 = cycles not in halt Ring0.WriteMsr(MSR_PERF_CTL_3, GetPerfCtlValue(0x76, 0, true, true, false, false, true, false, 0, 0, false, false)); // PERF_CTR4 = decoder overrides existing prediction Ring0.WriteMsr(MSR_PERF_CTL_4, GetPerfCtlValue(0x91, 0, true, true, false, false, true, false, 0, 0, false, false)); // PERF_CTR5 = retired fused branch instructions Ring0.WriteMsr(MSR_PERF_CTL_5, GetPerfCtlValue(0xD0, 0, true, true, false, false, true, false, 0, 1, false, false)); } }
public MonitoringUpdateResults Update() { float normalizationFactor = dataFabric.GetNormalizationFactor(ref lastUpdateTime); MonitoringUpdateResults results = new MonitoringUpdateResults(); ThreadAffinity.Set(1UL << monitoringThread); ulong ctr0 = ReadAndClearMsr(MSR_DF_PERF_CTR_0); ulong ctr1 = ReadAndClearMsr(MSR_DF_PERF_CTR_1); ulong ctr2 = ReadAndClearMsr(MSR_DF_PERF_CTR_2); ulong ctr3 = ReadAndClearMsr(MSR_DF_PERF_CTR_3); dataFabric.ReadPackagePowerCounter(); results.unitMetrics = new string[4][]; results.unitMetrics[0] = new string[] { "CCM0 DRAM BW?", FormatLargeNumber(ctr0 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr0 * normalizationFactor), "N/A" }; results.unitMetrics[1] = new string[] { "CCM1 DRAM BW?", FormatLargeNumber(ctr1 * normalizationFactor * 16) + "B/s", FormatLargeNumber(ctr1 * normalizationFactor), "N/A" }; results.unitMetrics[2] = new string[] { "Mem BW related?", FormatLargeNumber(ctr2 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr2 * normalizationFactor), "N/A" }; results.unitMetrics[3] = new string[] { "Wat Dis?", FormatLargeNumber(ctr3 * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr3 * normalizationFactor), "N/A" }; results.overallMetrics = new string[] { "Overall", FormatLargeNumber((ctr0 + ctr1 + ctr2 + ctr3) * normalizationFactor * 64) + "B/s", FormatLargeNumber(ctr0 + ctr1 + ctr2 + ctr3), string.Format("{0:F2} W", dataFabric.NormalizedTotalCounts.watts) }; results.overallCounterValues = new Tuple <string, float> [5]; results.overallCounterValues[0] = new Tuple <string, float>("Package Power", dataFabric.NormalizedTotalCounts.watts); results.overallCounterValues[1] = new Tuple <string, float>("Evt 0x07 Umask 2", ctr0); results.overallCounterValues[2] = new Tuple <string, float>("Evt 0x47 Umask 2", ctr1); results.overallCounterValues[3] = new Tuple <string, float>("Mem BW? 0x87 Umask 1", ctr2); results.overallCounterValues[4] = new Tuple <string, float>("Evt 0xC7 Umask 4", ctr3); return(results); }
public static CPUID Get(int group, int thread) { if (thread >= 64) { return(null); } var affinity = GroupAffinity.Single((ushort)group, thread); var previousAffinity = ThreadAffinity.Set(affinity); if (previousAffinity == GroupAffinity.Undefined) { return(null); } try { return(new CPUID(group, thread, affinity)); } finally { ThreadAffinity.Set(previousAffinity); } }
public void UpdateCcxL3CounterData(int ccxIdx, int threadIdx) { ThreadAffinity.Set(1UL << threadIdx); float normalizationFactor = GetNormalizationFactor(threadIdx); ulong ctr0 = ReadAndClearMsr(MSR_L3_PERF_CTR_0); ulong ctr1 = ReadAndClearMsr(MSR_L3_PERF_CTR_1); ulong ctr2 = ReadAndClearMsr(MSR_L3_PERF_CTR_2); ulong ctr3 = ReadAndClearMsr(MSR_L3_PERF_CTR_3); ulong ctr4 = ReadAndClearMsr(MSR_L3_PERF_CTR_4); ulong ctr5 = ReadAndClearMsr(MSR_L3_PERF_CTR_5); if (ccxCounterData[ccxIdx] == null) { ccxCounterData[ccxIdx] = new L3CounterData(); } ccxCounterData[ccxIdx].ctr0 = ctr0 * normalizationFactor; ccxCounterData[ccxIdx].ctr1 = ctr1 * normalizationFactor; ccxCounterData[ccxIdx].ctr2 = ctr2 * normalizationFactor; ccxCounterData[ccxIdx].ctr3 = ctr3 * normalizationFactor; ccxCounterData[ccxIdx].ctr4 = ctr4 * normalizationFactor; ccxCounterData[ccxIdx].ctr5 = ctr5 * normalizationFactor; ccxTotals.ctr0 += ccxCounterData[ccxIdx].ctr0; ccxTotals.ctr1 += ccxCounterData[ccxIdx].ctr1; ccxTotals.ctr2 += ccxCounterData[ccxIdx].ctr2; ccxTotals.ctr3 += ccxCounterData[ccxIdx].ctr3; ccxTotals.ctr4 += ccxCounterData[ccxIdx].ctr4; ccxTotals.ctr5 += ccxCounterData[ccxIdx].ctr5; }
public void Initialize() { // Undocumented data fabric mentioned in prelimary PPR, but removed in the latest one // prelimary PPR suggests calculating DRAM bandwidth by adding up all these events and // multiplying by 64 // These four are always zero on the 3950X. Possibly for quad channel? /*ulong mysteryDramBytes7 = 0x00000001004038C7; * ulong mysteryDramBytes6 = 0x0000000100403887; * ulong mysteryDramBytes5 = 0x0000000100403847; * ulong mysteryDramBytes4 = 0x0000000100403807;*/ // These four actually have counts ulong mysteryDramBytes3 = 0x00000000004038C7; ulong mysteryDramBytes2 = 0x0000000000403887; ulong mysteryDramBytes1 = 0x0000000000403847; ulong mysteryDramBytes0 = 0x0000000000403807; ThreadAffinity.Set(1UL << monitoringThread); Ring0.WriteMsr(MSR_DF_PERF_CTL_0, mysteryDramBytes0); Ring0.WriteMsr(MSR_DF_PERF_CTL_1, mysteryDramBytes1); Ring0.WriteMsr(MSR_DF_PERF_CTL_2, mysteryDramBytes2); Ring0.WriteMsr(MSR_DF_PERF_CTL_3, mysteryDramBytes3); lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds(); }
public void Initialize() { cpu.EnablePerformanceCounters(); for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++) { ThreadAffinity.Set(1UL << threadIdx); // PERF_CTR0 = active cycles Ring0.WriteMsr(MSR_PERF_CTL_0, GetPerfCtlValue(0x76, 0, true, true, false, false, true, false, 0, 0, false, false)); // PERF_CTR1 = retired instructions Ring0.WriteMsr(MSR_PERF_CTL_1, GetPerfCtlValue(0xC0, 0, true, true, false, false, true, false, 0, 0, false, false)); // PERF_CTR2 to count retired branches Ring0.WriteMsr(MSR_PERF_CTL_2, GetPerfCtlValue(0xC2, 0, true, true, false, false, true, false, 0, 0, false, false)); // PERF_CTR3 = mispredicted retired branches Ring0.WriteMsr(MSR_PERF_CTL_3, GetPerfCtlValue(0xC3, 0, true, true, false, false, true, false, 0, 0, false, false)); // PERF_CTR4 = L1 BTB Overrides Ring0.WriteMsr(MSR_PERF_CTL_4, GetPerfCtlValue(0x8A, 0, true, true, false, false, true, false, 0, 0, false, false)); // PERF_CTR5 = L2 BTB overrides Ring0.WriteMsr(MSR_PERF_CTL_5, GetPerfCtlValue(0x8B, 0, true, true, false, false, true, false, 0, 0, false, false)); } }
public void Initialize() { cpu.EnablePerformanceCounters(); for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++) { ThreadAffinity.Set(1UL << threadIdx); // PERF_CTR2 = active cycles Ring0.WriteMsr(MSR_PERF_CTL_0, GetPerfCtlValue(0x76, 0, true, true, false, false, true, false, 0, 0, false, false)); // PERF_CTR3 = ret instr Ring0.WriteMsr(MSR_PERF_CTL_1, GetPerfCtlValue(0xC0, 0, true, true, false, false, true, false, 0, 0, false, false)); // Set PERF_CTR2 to count DC reflls from L2 Ring0.WriteMsr(MSR_PERF_CTL_2, GetPerfCtlValue(0x43, 1, true, true, false, false, true, false, 0, 0, false, false)); // PERF_CTR3 = DC refills from another cache (L3) Ring0.WriteMsr(MSR_PERF_CTL_3, GetPerfCtlValue(0x43, 2, true, true, false, false, true, false, 0, 0, false, false)); // PERF_CTR4 = DC refills from local dram Ring0.WriteMsr(MSR_PERF_CTL_4, GetPerfCtlValue(0x43, 8, true, true, false, false, true, false, 0, 0, false, false)); // PERF_CTR5 = remote refills Ring0.WriteMsr(MSR_PERF_CTL_5, GetPerfCtlValue(0x43, 0x50, true, true, false, false, true, false, 0, 0, false, false)); } }
public void Initialize() { cpu.EnablePerformanceCounters(); for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++) { ThreadAffinity.Set(1UL << threadIdx); // Set PMC0 to cycles when uops are executed on port 0 // anyThread sometimes works (i7-4712HQ) and sometimes not (E5-1620v3). It works on SNB. // don't set anythread for consistent behavior ulong retiredBranches = GetPerfEvtSelRegisterValue(0xA1, 0x01, usr: true, os: true, edge: false, pc: false, interrupt: false, anyThread: false, enable: true, invert: false, cmask: 0); Ring0.WriteMsr(IA32_PERFEVTSEL0, retiredBranches); // Set PMC1 to count ^ for port 1 ulong retiredMispredictedBranches = GetPerfEvtSelRegisterValue(0xA1, 0x02, true, true, false, false, false, false, true, false, 0); Ring0.WriteMsr(IA32_PERFEVTSEL1, retiredMispredictedBranches); // Set PMC2 to count ^ for port 5 ulong branchResteers = GetPerfEvtSelRegisterValue(0xA1, 0x20, true, true, false, false, false, false, true, false, 0); Ring0.WriteMsr(IA32_PERFEVTSEL2, branchResteers); // Set PMC3 to count ^ for port 6 ulong notTakenBranches = GetPerfEvtSelRegisterValue(0xA1, 0x40, true, true, false, false, false, false, true, false, 0); Ring0.WriteMsr(IA32_PERFEVTSEL3, notTakenBranches); } }
private void ProgramPerfCounters(ulong ctr0, ulong ctr1, ulong ctr2, ulong ctr3) { ThreadAffinity.Set(1UL << monitoringThread); Ring0.WriteMsr(MSR_NB_PERF_CTL_0, ctr0); Ring0.WriteMsr(MSR_NB_PERF_CTL_1, ctr1); Ring0.WriteMsr(MSR_NB_PERF_CTL_2, ctr2); Ring0.WriteMsr(MSR_NB_PERF_CTL_3, ctr3); }
/// <summary> /// Update counter values for thread, and add to totals /// Will set thread affinity /// </summary> /// <param name="threadIdx">thread in question</param> public void UpdateThreadCoreCounterData(int threadIdx) { ThreadAffinity.Set(1UL << threadIdx); float normalizationFactor = GetNormalizationFactor(threadIdx); float joules; ulong aperf, mperf, tsc, instr; ulong ctr0, ctr1, ctr2, ctr3, ctr4, ctr5; ReadFixedCounters(threadIdx, out aperf, out instr, out tsc, out mperf); ctr0 = ReadAndClearMsr(MSR_PERF_CTR_0); ctr1 = ReadAndClearMsr(MSR_PERF_CTR_1); ctr2 = ReadAndClearMsr(MSR_PERF_CTR_2); ctr3 = ReadAndClearMsr(MSR_PERF_CTR_3); ctr4 = ReadAndClearMsr(MSR_PERF_CTR_4); ctr5 = ReadAndClearMsr(MSR_PERF_CTR_5); ReadCorePowerCounter(threadIdx, out joules); if (NormalizedThreadCounts == null) { NormalizedThreadCounts = new NormalizedCoreCounterData[threadCount]; } if (NormalizedThreadCounts[threadIdx] == null) { NormalizedThreadCounts[threadIdx] = new NormalizedCoreCounterData(); } NormalizedThreadCounts[threadIdx].aperf = aperf * normalizationFactor; NormalizedThreadCounts[threadIdx].mperf = mperf * normalizationFactor; NormalizedThreadCounts[threadIdx].instr = instr * normalizationFactor; NormalizedThreadCounts[threadIdx].tsc = tsc * normalizationFactor; NormalizedThreadCounts[threadIdx].ctr0 = ctr0 * normalizationFactor; NormalizedThreadCounts[threadIdx].ctr1 = ctr1 * normalizationFactor; NormalizedThreadCounts[threadIdx].ctr2 = ctr2 * normalizationFactor; NormalizedThreadCounts[threadIdx].ctr3 = ctr3 * normalizationFactor; NormalizedThreadCounts[threadIdx].ctr4 = ctr4 * normalizationFactor; NormalizedThreadCounts[threadIdx].ctr5 = ctr5 * normalizationFactor; NormalizedThreadCounts[threadIdx].watts = joules * normalizationFactor; NormalizedThreadCounts[threadIdx].NormalizationFactor = normalizationFactor; NormalizedTotalCounts.aperf += NormalizedThreadCounts[threadIdx].aperf; NormalizedTotalCounts.mperf += NormalizedThreadCounts[threadIdx].mperf; NormalizedTotalCounts.instr += NormalizedThreadCounts[threadIdx].instr; NormalizedTotalCounts.tsc += NormalizedThreadCounts[threadIdx].tsc; NormalizedTotalCounts.ctr0 += NormalizedThreadCounts[threadIdx].ctr0; NormalizedTotalCounts.ctr1 += NormalizedThreadCounts[threadIdx].ctr1; NormalizedTotalCounts.ctr2 += NormalizedThreadCounts[threadIdx].ctr2; NormalizedTotalCounts.ctr3 += NormalizedThreadCounts[threadIdx].ctr3; NormalizedTotalCounts.ctr4 += NormalizedThreadCounts[threadIdx].ctr4; NormalizedTotalCounts.ctr5 += NormalizedThreadCounts[threadIdx].ctr5; // only add core power once per core. don't count it per-SMT thread // and always add if SMT is off (thread count == core count) if (threadCount == coreCount || (threadCount == coreCount * 2 && threadIdx % 2 == 0)) { NormalizedTotalCounts.totalCoreWatts += NormalizedThreadCounts[threadIdx].watts; } }
public override void Update() { if (sensorConfig.GetSensorEvaluate(totalLoad.IdentifierString) || sensorConfig.GetSensorEvaluate(maxLoad.IdentifierString) || coreLoads.Any(sensor => sensorConfig.GetSensorEvaluate(sensor.IdentifierString))) { if (HasTimeStampCounter && isInvariantTimeStampCounter) { // make sure always the same thread is used var previousAffinity = ThreadAffinity.Set(cpuid[0][0].Affinity); // read time before and after getting the TSC to estimate the error long firstTime = Stopwatch.GetTimestamp(); ulong timeStampCount = Opcode.Rdtsc(); long time = Stopwatch.GetTimestamp(); // restore the thread affinity mask ThreadAffinity.Set(previousAffinity); double delta = ((double)(time - lastTime)) / Stopwatch.Frequency; double error = ((double)(time - firstTime)) / Stopwatch.Frequency; // only use data if they are measured accuarte enough (max 0.1ms delay) if (error < 1E-04) { // ignore the first reading because there are no initial values // ignore readings with too large or too small time window if (lastTime != 0 && delta > 0.5 && delta < 2) { // update the TSC frequency with the new value TimeStampCounterFrequency = (timeStampCount - lastTimeStampCount) / (1e6 * delta); } lastTimeStampCount = timeStampCount; lastTime = time; } } } if (cpuLoad.IsAvailable) { cpuLoad.Update(); for (int i = 0; i < coreLoads.Length; i++) { coreLoads[i].Value = cpuLoad.GetCoreLoad(i); } if (totalLoad != null) { totalLoad.Value = cpuLoad.GetTotalLoad(); } if (maxLoad != null) { maxLoad.Value = cpuLoad.GetMaxLoad(); } } }
public void Initialize() { ThreadAffinity.Set(1UL << monitoringThread); Ring0.WriteMsr(MSR_DF_PERF_CTL_0, GetDFPerfCtlValue(0x7 | 0, 0x38, true, 0, 0)); Ring0.WriteMsr(MSR_DF_PERF_CTL_1, GetDFPerfCtlValue(0x7 | (1 << 6), 0x38, true, 0, 0)); Ring0.WriteMsr(MSR_DF_PERF_CTL_2, GetDFPerfCtlValue(0x7 | (2 << 6), 0x38, true, 0, 0)); Ring0.WriteMsr(MSR_DF_PERF_CTL_3, GetDFPerfCtlValue(0x7 | (3 << 6), 0x38, true, 0, 0)); lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds(); }
public override void Update() { if (HasTimeStampCounter && _isInvariantTimeStampCounter) { // make sure always the same thread is used GroupAffinity previousAffinity = ThreadAffinity.Set(_cpuId[0][0].Affinity); // read time before and after getting the TSC to estimate the error long firstTime = Stopwatch.GetTimestamp(); ulong timeStampCount = OpCode.Rdtsc(); long time = Stopwatch.GetTimestamp(); // restore the thread affinity mask ThreadAffinity.Set(previousAffinity); double delta = (double)(time - _lastTime) / Stopwatch.Frequency; double error = (double)(time - firstTime) / Stopwatch.Frequency; // only use data if they are measured accurate enough (max 0.1ms delay) if (error < 0.0001) { // ignore the first reading because there are no initial values // ignore readings with too large or too small time window if (_lastTime != 0 && delta > 0.5 && delta < 2) { // update the TSC frequency with the new value TimeStampCounterFrequency = (timeStampCount - _lastTimeStampCount) / (1e6 * delta); } _lastTimeStampCount = timeStampCount; _lastTime = time; } } if (_cpuLoad.IsAvailable) { _cpuLoad.Update(); if (_threadLoads != null) { for (int i = 0; i < _threadLoads.Length; i++) { if (_threadLoads[i] != null) { _threadLoads[i].Value = _cpuLoad.GetThreadLoad(i); } } } if (_totalLoad != null) { _totalLoad.Value = _cpuLoad.GetTotalLoad(); } } }
public void Initialize() { ThreadAffinity.Set(1UL << monitoringThread); Ring0.WriteMsr(MSR_DF_PERF_CTL_0, GetDFPerfCtlValue(0x0, 0x3, true, 0x6, 0)); Ring0.WriteMsr(MSR_DF_PERF_CTL_1, GetDFPerfCtlValue(0x40, 0x3, true, 0x6, 0)); Ring0.WriteMsr(MSR_DF_PERF_CTL_2, GetDFPerfCtlValue(0x87, 1, true, 0, 0)); Ring0.WriteMsr(MSR_DF_PERF_CTL_3, GetDFPerfCtlValue(0xC0, 0x3, true, 0x6, 0)); dataFabric.InitializeCoreTotals(); lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds(); }
internal Cpu(int processorIndex, Cpuid[][] cpuid) : base(cpuid[0][0].Name) { Cpuid = cpuid; Vendor = cpuid[0][0].Vendor; Family = cpuid[0][0].Family; Model = cpuid[0][0].Model; Stepping = cpuid[0][0].Stepping; ProcessorIndex = processorIndex; CoreCount = cpuid.Length; // check if processor has MSRs HasModelSpecificRegisters = cpuid[0][0].Data.GetLength(0) > 1 && (cpuid[0][0].Data[1, 3] & 0x20) != 0; // check if processor has a TSC HasTimeStampCounter = cpuid[0][0].Data.GetLength(0) > 1 && (cpuid[0][0].Data[1, 3] & 0x10) != 0; // check if processor supports an invariant TSC _isInvariantTimeStampCounter = cpuid[0][0].ExtData.GetLength(0) > 7 && (cpuid[0][0].ExtData[7, 3] & 0x100) != 0; TotalLoad = CoreCount > 1 ? new Sensor("CPU Total", SensorType.Load) : null; CoreLoads = new Sensor[CoreCount]; for (var i = 0; i < CoreLoads.Length; i++) { CoreLoads[i] = new Sensor(CoreString(i), SensorType.Load); } _cpuLoad = new CpuLoad(cpuid); if (HasTimeStampCounter) { var mask = ThreadAffinity.Set(1UL << cpuid[0][0].Thread); EstimateTimeStampCounterFrequency( out _estimatedTimeStampCounterFrequency, out _estimatedTimeStampCounterFrequencyError); ThreadAffinity.Set(mask); } else { _estimatedTimeStampCounterFrequency = 0; } TimeStampCounterFrequency = _estimatedTimeStampCounterFrequency; }
/// <summary> /// Program core perf counters /// </summary> /// <param name="ctr0">Counter 0 event select</param> /// <param name="ctr1">Counter 1 event select</param> /// <param name="ctr2">Counter 2 event select</param> /// <param name="ctr3">Counter 3 event select</param> /// <param name="ctr4">Counter 4 event select</param> /// <param name="ctr5">Counter 5 event select</param> public void ProgramPerfCounters(ulong ctr0, ulong ctr1, ulong ctr2, ulong ctr3, ulong ctr4, ulong ctr5) { for (int threadIdx = 0; threadIdx < this.GetThreadCount(); threadIdx++) { ThreadAffinity.Set(1UL << threadIdx); Ring0.WriteMsr(MSR_PERF_CTL_0, ctr0); Ring0.WriteMsr(MSR_PERF_CTL_1, ctr1); Ring0.WriteMsr(MSR_PERF_CTL_2, ctr2); Ring0.WriteMsr(MSR_PERF_CTL_3, ctr3); Ring0.WriteMsr(MSR_PERF_CTL_4, ctr4); Ring0.WriteMsr(MSR_PERF_CTL_5, ctr5); } }
public void Initialize() { cpu.EnablePerformanceCounters(); for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++) { ThreadAffinity.Set(1UL << threadIdx); Ring0.WriteMsr(IA32_PERFEVTSEL0, GetPerfEvtSelRegisterValue(0xA3, 0x4, true, true, false, false, false, false, true, false, cmask: 4)); // no execute Ring0.WriteMsr(IA32_PERFEVTSEL1, GetPerfEvtSelRegisterValue(0xA3, 0x6, true, true, false, false, false, false, true, false, cmask: 6)); // LDM pending Ring0.WriteMsr(IA32_PERFEVTSEL2, GetPerfEvtSelRegisterValue(0xA3, 0xC, true, true, false, false, false, false, true, false, cmask: 0xC)); // L1D pending, pmc2 only Ring0.WriteMsr(IA32_PERFEVTSEL3, GetPerfEvtSelRegisterValue(0xA3, 0x5, true, true, false, false, false, false, true, false, cmask: 5)); // L2 Pending } }
public void Initialize() { cpu.EnablePerformanceCounters(); for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++) { ThreadAffinity.Set(1UL << threadIdx); Ring0.WriteMsr(IA32_PERFEVTSEL0, GetPerfEvtSelRegisterValue(0x80, 0x1, true, true, false, false, false, false, true, false, 0)); // ic hit Ring0.WriteMsr(IA32_PERFEVTSEL1, GetPerfEvtSelRegisterValue(0x80, 0x2, true, true, false, false, false, false, true, false, 0)); // ic miss Ring0.WriteMsr(IA32_PERFEVTSEL2, GetPerfEvtSelRegisterValue(0x80, 0x4, true, true, false, false, false, false, true, false, 0)); // ifetch stall Ring0.WriteMsr(IA32_PERFEVTSEL3, GetPerfEvtSelRegisterValue(0x87, 0x4, true, true, false, false, false, false, true, false, 0)); // iq full } }
public void Initialize() { cpu.EnablePerformanceCounters(); for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++) { ThreadAffinity.Set(1UL << threadIdx); Ring0.WriteMsr(IA32_PERFEVTSEL0, GetPerfEvtSelRegisterValue(0xD0, 0x81, true, true, false, false, false, false, true, false, cmask: 0)); // all loads Ring0.WriteMsr(IA32_PERFEVTSEL1, GetPerfEvtSelRegisterValue(0xD0, 0x41, true, true, false, false, false, false, true, false, cmask: 0)); // locked loads Ring0.WriteMsr(IA32_PERFEVTSEL2, GetPerfEvtSelRegisterValue(0xF4, 0x10, true, true, false, false, false, false, true, false, cmask: 0)); // SQ split locks Ring0.WriteMsr(IA32_PERFEVTSEL3, GetPerfEvtSelRegisterValue(0xD2, 0x4, true, true, false, false, false, false, true, false, cmask: 0)); // Snoop hit } }
public override void Update() { if (HasTimeStampCounter && _isInvariantTimeStampCounter) { // make sure always the same thread is used var mask = ThreadAffinity.Set(1UL << Cpuid[0][0].Thread); // read time before and after getting the TSC to estimate the error var firstTime = Stopwatch.GetTimestamp(); var timeStampCount = Opcode.Rdtsc(); var time = Stopwatch.GetTimestamp(); // restore the thread affinity mask ThreadAffinity.Set(mask); var delta = (double)(time - _lastTime) / Stopwatch.Frequency; var error = (double)(time - firstTime) / Stopwatch.Frequency; // only use data if they are measured accuarte enough (max 0.1ms delay) if (error < 0.0001) { // ignore the first reading because there are no initial values // ignore readings with too large or too small time window if (_lastTime != 0 && delta > 0.5 && delta < 2) { TimeStampCounterFrequency = (timeStampCount - _lastTimeStampCount) / (1e6 * delta); } _lastTimeStampCount = timeStampCount; _lastTime = time; } } if (!_cpuLoad.IsAvailable) { return; } _cpuLoad.Update(); for (var i = 0; i < CoreLoads.Length; i++) { CoreLoads[i].Value = _cpuLoad.GetCoreLoad(i); } if (TotalLoad != null) { TotalLoad.Value = _cpuLoad.GetTotalLoad(); } }
public void Initialize() { cpu.EnablePerformanceCounters(); for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++) { ThreadAffinity.Set(1UL << threadIdx); // PMC0 - all uops issued across both threads, cmask 1 Ring0.WriteMsr(IA32_PERFEVTSEL0, GetPerfEvtSelRegisterValue(0xE, 0x1, true, true, false, false, false, anyThread: true, true, false, cmask: 1)); Ring0.WriteMsr(IA32_PERFEVTSEL1, GetPerfEvtSelRegisterValue(0xE, 0x1, true, true, false, false, false, anyThread: true, true, false, cmask: 2)); Ring0.WriteMsr(IA32_PERFEVTSEL2, GetPerfEvtSelRegisterValue(0xE, 0x1, true, true, false, false, false, anyThread: true, true, false, cmask: 3)); Ring0.WriteMsr(IA32_PERFEVTSEL3, GetPerfEvtSelRegisterValue(0xE, 0x1, true, true, false, false, false, anyThread: true, true, false, cmask: 4)); } }
public void Initialize() { cpu.EnablePerformanceCounters(); for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++) { ThreadAffinity.Set(1UL << threadIdx); // MITE uops, cmask 1,2,3,5 Ring0.WriteMsr(IA32_PERFEVTSEL0, GetPerfEvtSelRegisterValue(0x79, 0x4, true, true, false, false, false, false, true, false, 1)); Ring0.WriteMsr(IA32_PERFEVTSEL1, GetPerfEvtSelRegisterValue(0x79, 0x4, true, true, false, false, false, false, true, false, 2)); Ring0.WriteMsr(IA32_PERFEVTSEL2, GetPerfEvtSelRegisterValue(0x79, 0x4, true, true, false, false, false, false, true, false, 4)); Ring0.WriteMsr(IA32_PERFEVTSEL3, GetPerfEvtSelRegisterValue(0x79, 0x4, true, true, false, false, false, false, true, false, 5)); } }
public void Initialize() { ThreadAffinity.Set(0x1); cpu.EnableUncoreCounters(); for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) { // 0x22 = Snoop response, 0xFF = all responses Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx, GetUncorePerfEvtSelRegisterValue(0x22, 0xFF, false, false, true, false, 0)); // 0x22 = Snoop response, umask 0x44 = hit non-modified line. 0x48 = hitm Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx, GetUncorePerfEvtSelRegisterValue(0x22, 0x12 | 0x20 | 0x40 | 0x80, false, false, true, false, 0)); } }
public void Initialize() { ThreadAffinity.Set(0x1); cpu.EnableUncoreCounters(); for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) { // 0x22 = Snoop response, 0xFF = all responses Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL0_base + MSR_UNC_CBO_increment * cboIdx, GetUncorePerfEvtSelRegisterValue(0x22, 0xFF, false, false, true, false, 0)); // 0x22 = Snoop response, umask 0x4 = non-modified line hit, umask 0x8 = modified line hit // high 3 bits of umask = filter. 0x20 = external snoop, 0x40 = core memory request, 0x80 = L3 eviction Ring0.WriteMsr(MSR_UNC_CBO_PERFEVTSEL1_base + MSR_UNC_CBO_increment * cboIdx, GetUncorePerfEvtSelRegisterValue(0x22, 0x4 | 0x8 | 0x20 | 0x40 | 0x80, false, false, true, false, 0)); } }
public void Initialize() { ulong L3AccessPerfCtl = GetL3PerfCtlValue(0x04, 0xFF, true, 0xF, 0xFF); ulong L3MissPerfCtl = GetL3PerfCtlValue(0x04, 0x01, true, 0xF, 0xFF); ulong L3MissLatencyCtl = GetL3PerfCtlValue(0x90, 0, true, 0xF, 0xFF); ulong L3MissSdpRequestPerfCtl = GetL3PerfCtlValue(0x9A, 0x1F, true, 0xF, 0xFF); foreach (KeyValuePair <int, int> ccxThread in l3Cache.ccxSampleThreads) { ThreadAffinity.Set(1UL << ccxThread.Value); Ring0.WriteMsr(MSR_L3_PERF_CTL_0, L3AccessPerfCtl); Ring0.WriteMsr(MSR_L3_PERF_CTL_1, L3MissPerfCtl); Ring0.WriteMsr(MSR_L3_PERF_CTL_2, L3MissLatencyCtl); Ring0.WriteMsr(MSR_L3_PERF_CTL_3, L3MissSdpRequestPerfCtl); } }
public void Initialize() { /* from preliminary PPR */ ulong mysteryOutboundBytes3 = 0x800400247; ulong mysteryOutboundBytes2 = 0x800400247; // yes the same event is mentioned twice ulong mysteryOutboundBytes1 = 0x800400207; ulong mysteryOutboundBytes0 = 0x7004002C7; ThreadAffinity.Set(1UL << monitoringThread); Ring0.WriteMsr(MSR_DF_PERF_CTL_0, mysteryOutboundBytes0); Ring0.WriteMsr(MSR_DF_PERF_CTL_1, mysteryOutboundBytes1); Ring0.WriteMsr(MSR_DF_PERF_CTL_2, mysteryOutboundBytes2); Ring0.WriteMsr(MSR_DF_PERF_CTL_3, mysteryOutboundBytes3); lastUpdateTime = DateTimeOffset.Now.ToUnixTimeMilliseconds(); }
public MonitoringUpdateResults Update() { MonitoringUpdateResults results = new MonitoringUpdateResults(); results.unitMetrics = new string[cpu.CboCount][]; cpu.InitializeCboTotals(); ThreadAffinity.Set(0x1); for (uint cboIdx = 0; cboIdx < cpu.CboCount; cboIdx++) { cpu.UpdateCboCounterData(cboIdx); results.unitMetrics[cboIdx] = computeMetrics("CBo " + cboIdx, cpu.cboData[cboIdx]); } results.overallMetrics = computeMetrics("Overall", cpu.cboTotals); return(results); }
public MonitoringUpdateResults Update() { MonitoringUpdateResults results = new MonitoringUpdateResults(); results.unitMetrics = new string[l3Cache.ccxSampleThreads.Count()][]; float[] ccxClocks = new float[l3Cache.allCcxThreads.Count()]; l3Cache.ClearTotals(); ulong totalAperf = 0, totalMperf = 0, totalTsc = 0, totalIrPerfCount = 0; foreach (KeyValuePair <int, int> ccxThread in l3Cache.ccxSampleThreads) { // Try to determine frequency, by getting max frequency of cores in ccx foreach (int ccxThreadIdx in l3Cache.allCcxThreads[ccxThread.Key]) { ThreadAffinity.Set(1UL << ccxThreadIdx); float normalizationFactor = l3Cache.GetNormalizationFactor(l3Cache.GetThreadCount() + ccxThreadIdx); ulong aperf, mperf, tsc, irperfcount; l3Cache.ReadFixedCounters(ccxThreadIdx, out aperf, out irperfcount, out tsc, out mperf); totalAperf += aperf; totalIrPerfCount += irperfcount; totalTsc += tsc; totalMperf += mperf; float clk = tsc * ((float)aperf / mperf) * normalizationFactor; if (clk > ccxClocks[ccxThread.Key]) { ccxClocks[ccxThread.Key] = clk; } if (ccxThreadIdx == ccxThread.Value) { l3Cache.UpdateCcxL3CounterData(ccxThread.Key, ccxThread.Value); results.unitMetrics[ccxThread.Key] = computeMetrics("CCX " + ccxThread.Key, l3Cache.ccxCounterData[ccxThread.Key], ccxClocks[ccxThread.Key]); } } } float avgClk = 0; foreach (float ccxClock in ccxClocks) { avgClk += ccxClock; } avgClk /= l3Cache.allCcxThreads.Count(); results.overallMetrics = computeMetrics("Overall", l3Cache.ccxTotals, avgClk); results.overallCounterValues = l3Cache.GetOverallL3CounterValues(totalAperf, totalMperf, totalIrPerfCount, totalTsc, "L3Access", "L3Miss", "L3MissLat/16", "L3MissSdpReq", "Unused", "Unused"); return(results); }
public void Initialize() { cpu.EnablePerformanceCounters(); for (int threadIdx = 0; threadIdx < cpu.GetThreadCount(); threadIdx++) { ThreadAffinity.Set(1UL << threadIdx); // scalar single Ring0.WriteMsr(IA32_PERFEVTSEL0, GetPerfEvtSelRegisterValue(0xC7, 0x2, true, true, false, false, false, false, true, false, cmask: 0)); // scalar 128B packed Ring0.WriteMsr(IA32_PERFEVTSEL1, GetPerfEvtSelRegisterValue(0xC7, 0x8, true, true, false, false, false, false, true, false, cmask: 0)); // scalar 256B packed Ring0.WriteMsr(IA32_PERFEVTSEL2, GetPerfEvtSelRegisterValue(0xC7, 0x20, true, true, false, false, false, false, true, false, cmask: 0)); // All FP instr retired Ring0.WriteMsr(IA32_PERFEVTSEL3, GetPerfEvtSelRegisterValue(0xC7, 0xFF, true, true, false, false, false, false, true, false, cmask: 0)); } }
public void Initialize() { foreach (KeyValuePair <int, int> ccxThread in l3Cache.ccxSampleThreads) { ThreadAffinity.Set(1UL << ccxThread.Value); // L2 victims (L3 fill?) Ring0.WriteMsr(MSR_L3_PERF_CTL_0, GetL3PerfCtlValue(0x3, 0x1, true, 0xF, 0xFF)); // L3FillVicReq, L2 change to writeable Ring0.WriteMsr(MSR_L3_PERF_CTL_1, GetL3PerfCtlValue(0x3, 0x2, true, 0xF, 0xFF)); // L3FillVicReq, L2 miss with victim Ring0.WriteMsr(MSR_L3_PERF_CTL_2, GetL3PerfCtlValue(0x3, 0b1010100, true, 0xF, 0xFF)); // L3FillVicReq, L2 miss Ring0.WriteMsr(MSR_L3_PERF_CTL_3, GetL3PerfCtlValue(0x3, 0b10101000, true, 0xF, 0xFF)); Ring0.WriteMsr(MSR_L3_PERF_CTL_4, GetL3PerfCtlValue(0x04, 0xFF, true, 0xF, 0xFF)); // L3 access Ring0.WriteMsr(MSR_L3_PERF_CTL_5, GetL3PerfCtlValue(0x04, 0x01, true, 0xF, 0xFF)); // L3 miss } }