diff options
Diffstat (limited to 'arch/x86/events')
| -rw-r--r-- | arch/x86/events/amd/brs.c | 3 | ||||
| -rw-r--r-- | arch/x86/events/amd/ibs.c | 282 | ||||
| -rw-r--r-- | arch/x86/events/amd/iommu.c | 2 | ||||
| -rw-r--r-- | arch/x86/events/amd/lbr.c | 3 | ||||
| -rw-r--r-- | arch/x86/events/core.c | 31 | ||||
| -rw-r--r-- | arch/x86/events/intel/bts.c | 43 | ||||
| -rw-r--r-- | arch/x86/events/intel/core.c | 296 | ||||
| -rw-r--r-- | arch/x86/events/intel/ds.c | 216 | ||||
| -rw-r--r-- | arch/x86/events/intel/lbr.c | 73 | ||||
| -rw-r--r-- | arch/x86/events/intel/p4.c | 7 | ||||
| -rw-r--r-- | arch/x86/events/intel/p6.c | 26 | ||||
| -rw-r--r-- | arch/x86/events/perf_event.h | 61 | ||||
| -rw-r--r-- | arch/x86/events/perf_event_flags.h | 2 | ||||
| -rw-r--r-- | arch/x86/events/rapl.c | 20 |
14 files changed, 794 insertions, 271 deletions
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c index 780acd3dff22..ec3427463382 100644 --- a/arch/x86/events/amd/brs.c +++ b/arch/x86/events/amd/brs.c @@ -381,7 +381,8 @@ static void amd_brs_poison_buffer(void) * On ctxswin, sched_in = true, called after the PMU has started * On ctxswout, sched_in = false, called before the PMU is stopped */ -void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index e7a8b8758e08..66f981865091 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -28,9 +28,6 @@ static u32 ibs_caps; #include <asm/nmi.h> #include <asm/amd-ibs.h> -#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) -#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT - /* attr.config2 */ #define IBS_SW_FILTER_MASK 1 @@ -89,6 +86,7 @@ struct perf_ibs { u64 cnt_mask; u64 enable_mask; u64 valid_mask; + u16 min_period; u64 max_period; unsigned long offset_mask[1]; int offset_max; @@ -270,11 +268,19 @@ static int validate_group(struct perf_event *event) return 0; } +static bool perf_ibs_ldlat_event(struct perf_ibs *perf_ibs, + struct perf_event *event) +{ + return perf_ibs == &perf_ibs_op && + (ibs_caps & IBS_CAPS_OPLDLAT) && + (event->attr.config1 & 0xFFF); +} + static int perf_ibs_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct perf_ibs *perf_ibs; - u64 max_cnt, config; + u64 config; int ret; perf_ibs = get_ibs_pmu(event->attr.type); @@ -310,25 +316,47 @@ static int perf_ibs_init(struct perf_event *event) if (config & perf_ibs->cnt_mask) /* raw max_cnt may not be set */ return -EINVAL; - if (!event->attr.sample_freq && hwc->sample_period & 0x0f) - /* - * lower 4 bits can not be set in ibs max cnt, - * but allowing it in case we adjust the - * sample period to set a frequency. - */ - return -EINVAL; - hwc->sample_period &= ~0x0FULL; - if (!hwc->sample_period) - hwc->sample_period = 0x10; + + if (event->attr.freq) { + hwc->sample_period = perf_ibs->min_period; + } else { + /* Silently mask off lower nibble. IBS hw mandates it. */ + hwc->sample_period &= ~0x0FULL; + if (hwc->sample_period < perf_ibs->min_period) + return -EINVAL; + } } else { - max_cnt = config & perf_ibs->cnt_mask; + u64 period = 0; + + if (event->attr.freq) + return -EINVAL; + + if (perf_ibs == &perf_ibs_op) { + period = (config & IBS_OP_MAX_CNT) << 4; + if (ibs_caps & IBS_CAPS_OPCNTEXT) + period |= config & IBS_OP_MAX_CNT_EXT_MASK; + } else { + period = (config & IBS_FETCH_MAX_CNT) << 4; + } + config &= ~perf_ibs->cnt_mask; - event->attr.sample_period = max_cnt << 4; - hwc->sample_period = event->attr.sample_period; + event->attr.sample_period = period; + hwc->sample_period = period; + + if (hwc->sample_period < perf_ibs->min_period) + return -EINVAL; } - if (!hwc->sample_period) - return -EINVAL; + if (perf_ibs_ldlat_event(perf_ibs, event)) { + u64 ldlat = event->attr.config1 & 0xFFF; + + if (ldlat < 128 || ldlat > 2048) + return -EINVAL; + ldlat >>= 7; + + config |= (ldlat - 1) << 59; + config |= IBS_OP_L3MISSONLY | IBS_OP_LDLAT_EN; + } /* * If we modify hwc->sample_period, we also need to update @@ -349,7 +377,8 @@ static int perf_ibs_set_period(struct perf_ibs *perf_ibs, int overflow; /* ignore lower 4 bits in min count: */ - overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); + overflow = perf_event_set_period(hwc, perf_ibs->min_period, + perf_ibs->max_period, period); local64_set(&hwc->prev_count, 0); return overflow; @@ -447,6 +476,9 @@ static void perf_ibs_start(struct perf_event *event, int flags) WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); hwc->state = 0; + if (event->attr.freq && hwc->sample_period < perf_ibs->min_period) + hwc->sample_period = perf_ibs->min_period; + perf_ibs_set_period(perf_ibs, hwc, &period); if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) { config |= period & IBS_OP_MAX_CNT_EXT_MASK; @@ -554,6 +586,28 @@ static void perf_ibs_del(struct perf_event *event, int flags) static void perf_ibs_read(struct perf_event *event) { } +static int perf_ibs_check_period(struct perf_event *event, u64 value) +{ + struct perf_ibs *perf_ibs; + u64 low_nibble; + + if (event->attr.freq) + return 0; + + perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); + low_nibble = value & 0xFULL; + + /* + * This contradicts with perf_ibs_init() which allows sample period + * with lower nibble bits set but silently masks them off. Whereas + * this returns error. + */ + if (low_nibble || value < perf_ibs->min_period) + return -EINVAL; + + return 0; +} + /* * We need to initialize with empty group if all attributes in the * group are dynamic. @@ -572,7 +626,10 @@ PMU_FORMAT_ATTR(cnt_ctl, "config:19"); PMU_FORMAT_ATTR(swfilt, "config2:0"); PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59"); PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16"); +PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_format, "config1:0-11"); PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1"); +PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_cap, "1"); +PMU_EVENT_ATTR_STRING(dtlb_pgsize, ibs_op_dtlb_pgsize_cap, "1"); static umode_t zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i) @@ -580,6 +637,18 @@ zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0; } +static umode_t +ibs_op_ldlat_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return ibs_caps & IBS_CAPS_OPLDLAT ? attr->mode : 0; +} + +static umode_t +ibs_op_dtlb_pgsize_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return ibs_caps & IBS_CAPS_OPDTLBPGSIZE ? attr->mode : 0; +} + static struct attribute *fetch_attrs[] = { &format_attr_rand_en.attr, &format_attr_swfilt.attr, @@ -596,6 +665,16 @@ static struct attribute *zen4_ibs_extensions_attrs[] = { NULL, }; +static struct attribute *ibs_op_ldlat_cap_attrs[] = { + &ibs_op_ldlat_cap.attr.attr, + NULL, +}; + +static struct attribute *ibs_op_dtlb_pgsize_cap_attrs[] = { + &ibs_op_dtlb_pgsize_cap.attr.attr, + NULL, +}; + static struct attribute_group group_fetch_formats = { .name = "format", .attrs = fetch_attrs, @@ -613,6 +692,18 @@ static struct attribute_group group_zen4_ibs_extensions = { .is_visible = zen4_ibs_extensions_is_visible, }; +static struct attribute_group group_ibs_op_ldlat_cap = { + .name = "caps", + .attrs = ibs_op_ldlat_cap_attrs, + .is_visible = ibs_op_ldlat_is_visible, +}; + +static struct attribute_group group_ibs_op_dtlb_pgsize_cap = { + .name = "caps", + .attrs = ibs_op_dtlb_pgsize_cap_attrs, + .is_visible = ibs_op_dtlb_pgsize_is_visible, +}; + static const struct attribute_group *fetch_attr_groups[] = { &group_fetch_formats, &empty_caps_group, @@ -651,6 +742,11 @@ static struct attribute_group group_op_formats = { .attrs = op_attrs, }; +static struct attribute *ibs_op_ldlat_format_attrs[] = { + &ibs_op_ldlat_format.attr.attr, + NULL, +}; + static struct attribute_group group_cnt_ctl = { .name = "format", .attrs = cnt_ctl_attrs, @@ -669,10 +765,19 @@ static const struct attribute_group *op_attr_groups[] = { NULL, }; +static struct attribute_group group_ibs_op_ldlat_format = { + .name = "format", + .attrs = ibs_op_ldlat_format_attrs, + .is_visible = ibs_op_ldlat_is_visible, +}; + static const struct attribute_group *op_attr_update[] = { &group_cnt_ctl, &group_op_l3missonly, &group_zen4_ibs_extensions, + &group_ibs_op_ldlat_cap, + &group_ibs_op_ldlat_format, + &group_ibs_op_dtlb_pgsize_cap, NULL, }; @@ -686,12 +791,14 @@ static struct perf_ibs perf_ibs_fetch = { .start = perf_ibs_start, .stop = perf_ibs_stop, .read = perf_ibs_read, + .check_period = perf_ibs_check_period, }, .msr = MSR_AMD64_IBSFETCHCTL, - .config_mask = IBS_FETCH_CONFIG_MASK, + .config_mask = IBS_FETCH_MAX_CNT | IBS_FETCH_RAND_EN, .cnt_mask = IBS_FETCH_MAX_CNT, .enable_mask = IBS_FETCH_ENABLE, .valid_mask = IBS_FETCH_VAL, + .min_period = 0x10, .max_period = IBS_FETCH_MAX_CNT << 4, .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, @@ -709,13 +816,15 @@ static struct perf_ibs perf_ibs_op = { .start = perf_ibs_start, .stop = perf_ibs_stop, .read = perf_ibs_read, + .check_period = perf_ibs_check_period, }, .msr = MSR_AMD64_IBSOPCTL, - .config_mask = IBS_OP_CONFIG_MASK, + .config_mask = IBS_OP_MAX_CNT, .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | IBS_OP_CUR_CNT_RAND, .enable_mask = IBS_OP_ENABLE, .valid_mask = IBS_OP_VAL, + .min_period = 0x90, .max_period = IBS_OP_MAX_CNT << 4, .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, .offset_max = MSR_AMD64_IBSOP_REG_COUNT, @@ -917,6 +1026,10 @@ static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3, if (!op_data3->dc_lin_addr_valid) return; + if ((ibs_caps & IBS_CAPS_OPDTLBPGSIZE) && + !op_data3->dc_phy_addr_valid) + return; + if (!op_data3->dc_l1tlb_miss) { data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT; return; @@ -941,6 +1054,8 @@ static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3, data_src->mem_lock = PERF_MEM_LOCK_LOCKED; } +/* Be careful. Works only for contiguous MSRs. */ +#define ibs_fetch_msr_idx(msr) (msr - MSR_AMD64_IBSFETCHCTL) #define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL) static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data, @@ -1021,21 +1136,92 @@ static void perf_ibs_parse_ld_st_data(__u64 sample_type, } } -static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, +static bool perf_ibs_is_mem_sample_type(struct perf_ibs *perf_ibs, + struct perf_event *event) +{ + u64 sample_type = event->attr.sample_type; + + return perf_ibs == &perf_ibs_op && + sample_type & (PERF_SAMPLE_DATA_SRC | + PERF_SAMPLE_WEIGHT_TYPE | + PERF_SAMPLE_ADDR | + PERF_SAMPLE_PHYS_ADDR); +} + +static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, + struct perf_event *event, int check_rip) { - if (sample_type & PERF_SAMPLE_RAW || - (perf_ibs == &perf_ibs_op && - (sample_type & PERF_SAMPLE_DATA_SRC || - sample_type & PERF_SAMPLE_WEIGHT_TYPE || - sample_type & PERF_SAMPLE_ADDR || - sample_type & PERF_SAMPLE_PHYS_ADDR))) + if (event->attr.sample_type & PERF_SAMPLE_RAW || + perf_ibs_is_mem_sample_type(perf_ibs, event) || + perf_ibs_ldlat_event(perf_ibs, event)) return perf_ibs->offset_max; else if (check_rip) return 3; return 1; } +static bool perf_ibs_is_kernel_data_addr(struct perf_event *event, + struct perf_ibs_data *ibs_data) +{ + u64 sample_type_mask = PERF_SAMPLE_ADDR | PERF_SAMPLE_RAW; + union ibs_op_data3 op_data3; + u64 dc_lin_addr; + + op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; + dc_lin_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; + + return unlikely((event->attr.sample_type & sample_type_mask) && + op_data3.dc_lin_addr_valid && kernel_ip(dc_lin_addr)); +} + +static bool perf_ibs_is_kernel_br_target(struct perf_event *event, + struct perf_ibs_data *ibs_data, + int br_target_idx) +{ + union ibs_op_data op_data; + u64 br_target; + + op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; + br_target = ibs_data->regs[br_target_idx]; + + return unlikely((event->attr.sample_type & PERF_SAMPLE_RAW) && + op_data.op_brn_ret && kernel_ip(br_target)); +} + +static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event *event, + struct pt_regs *regs, struct perf_ibs_data *ibs_data, + int br_target_idx) +{ + if (perf_exclude_event(event, regs)) + return true; + + if (perf_ibs != &perf_ibs_op || !event->attr.exclude_kernel) + return false; + + if (perf_ibs_is_kernel_data_addr(event, ibs_data)) + return true; + + if (br_target_idx != -1 && + perf_ibs_is_kernel_br_target(event, ibs_data, br_target_idx)) + return true; + + return false; +} + +static void perf_ibs_phyaddr_clear(struct perf_ibs *perf_ibs, + struct perf_ibs_data *ibs_data) +{ + if (perf_ibs == &perf_ibs_op) { + ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)] &= ~(1ULL << 18); + ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)] = 0; + return; + } + + ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHCTL)] &= ~(1ULL << 52); + ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHPHYSAD)] = 0; +} + static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) { struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); @@ -1048,6 +1234,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) int offset, size, check_rip, offset_max, throttle = 0; unsigned int msr; u64 *buf, *config, period, new_config = 0; + int br_target_idx = -1; if (!test_bit(IBS_STARTED, pcpu->state)) { fail: @@ -1084,7 +1271,7 @@ fail: offset = 1; check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); - offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip); + offset_max = perf_ibs_get_offset_max(perf_ibs, event, check_rip); do { rdmsrl(msr + offset, *buf++); @@ -1093,6 +1280,22 @@ fail: perf_ibs->offset_max, offset + 1); } while (offset < offset_max); + + if (perf_ibs_ldlat_event(perf_ibs, event)) { + union ibs_op_data3 op_data3; + + op_data3.val = ibs_data.regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; + /* + * Opening event is errored out if load latency threshold is + * outside of [128, 2048] range. Since the event has reached + * interrupt handler, we can safely assume the threshold is + * within [128, 2048] range. + */ + if (!op_data3.ld_op || !op_data3.dc_miss || + op_data3.dc_miss_lat <= (event->attr.config1 & 0xFFF)) + goto out; + } + /* * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately * depending on their availability. @@ -1102,6 +1305,7 @@ fail: if (perf_ibs == &perf_ibs_op) { if (ibs_caps & IBS_CAPS_BRNTRGT) { rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); + br_target_idx = size; size++; } if (ibs_caps & IBS_CAPS_OPDATA4) { @@ -1129,10 +1333,19 @@ fail: } if ((event->attr.config2 & IBS_SW_FILTER_MASK) && - perf_exclude_event(event, ®s)) { + perf_ibs_swfilt_discard(perf_ibs, event, ®s, &ibs_data, br_target_idx)) { throttle = perf_event_account_interrupt(event); goto out; } + /* + * Prevent leaking physical addresses to unprivileged users. Skip + * PERF_SAMPLE_PHYS_ADDR check since generic code prevents it for + * unprivileged users. + */ + if ((event->attr.sample_type & PERF_SAMPLE_RAW) && + perf_allow_kernel(&event->attr)) { + perf_ibs_phyaddr_clear(perf_ibs, &ibs_data); + } if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw = (struct perf_raw_record){ @@ -1155,6 +1368,10 @@ fail: perf_sample_save_callchain(&data, event, iregs); throttle = perf_event_overflow(event, &data, ®s); + + if (event->attr.freq && hwc->sample_period < perf_ibs->min_period) + hwc->sample_period = perf_ibs->min_period; + out: if (throttle) { perf_ibs_stop(event, 0); @@ -1244,7 +1461,8 @@ static __init int perf_ibs_op_init(void) if (ibs_caps & IBS_CAPS_OPCNTEXT) { perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK; perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK; - perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK; + perf_ibs_op.cnt_mask |= (IBS_OP_MAX_CNT_EXT_MASK | + IBS_OP_CUR_CNT_EXT_MASK); } if (ibs_caps & IBS_CAPS_ZEN4) diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index b15f7b950d2e..f8228d8243f7 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -30,7 +30,7 @@ #define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL) #define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL) -#define IOMMU_NAME_SIZE 16 +#define IOMMU_NAME_SIZE 24 struct perf_amd_iommu { struct list_head list; diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index 19c7b76e21bc..c06ccca96851 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -371,7 +371,8 @@ void amd_pmu_lbr_del(struct perf_event *event) perf_sched_cb_dec(event->pmu); } -void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8f218ac0d445..6866cc5acb0b 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -87,13 +87,14 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling); DEFINE_STATIC_CALL_NULL(x86_pmu_stop_scheduling, *x86_pmu.stop_scheduling); DEFINE_STATIC_CALL_NULL(x86_pmu_sched_task, *x86_pmu.sched_task); -DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx); DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases); DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter); +DEFINE_STATIC_CALL_NULL(x86_pmu_late_setup, *x86_pmu.late_setup); + /* * This one is magic, it will get called even when PMU init fails (because * there is no PMU), in which case it should simply return NULL. @@ -628,7 +629,7 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == event->pmu->type) event->hw.config |= x86_pmu_get_event_config(event); - if (event->attr.sample_period && x86_pmu.limit_period) { + if (!event->attr.freq && x86_pmu.limit_period) { s64 left = event->attr.sample_period; x86_pmu.limit_period(event, &left); if (left > event->attr.sample_period) @@ -1298,6 +1299,15 @@ static void x86_pmu_enable(struct pmu *pmu) if (cpuc->n_added) { int n_running = cpuc->n_events - cpuc->n_added; + + /* + * The late setup (after counters are scheduled) + * is required for some cases, e.g., PEBS counters + * snapshotting. Because an accurate counter index + * is needed. + */ + static_call_cond(x86_pmu_late_setup)(); + /* * apply assignment obtained either from * hw_perf_group_sched_in() or x86_pmu_enable() @@ -2028,13 +2038,14 @@ static void x86_pmu_static_call_update(void) static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling); static_call_update(x86_pmu_sched_task, x86_pmu.sched_task); - static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx); static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs); static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases); static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs); static_call_update(x86_pmu_filter, x86_pmu.filter); + + static_call_update(x86_pmu_late_setup, x86_pmu.late_setup); } static void _x86_pmu_read(struct perf_event *event) @@ -2625,15 +2636,10 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { NULL, }; -static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) -{ - static_call_cond(x86_pmu_sched_task)(pmu_ctx, sched_in); -} - -static void x86_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc) +static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { - static_call_cond(x86_pmu_swap_task_ctx)(prev_epc, next_epc); + static_call_cond(x86_pmu_sched_task)(pmu_ctx, task, sched_in); } void perf_check_microcode(void) @@ -2700,7 +2706,6 @@ static struct pmu pmu = { .event_idx = x86_pmu_event_idx, .sched_task = x86_pmu_sched_task, - .swap_task_ctx = x86_pmu_swap_task_ctx, .check_period = x86_pmu_check_period, .aux_output_match = x86_pmu_aux_output_match, @@ -2844,7 +2849,7 @@ static bool is_uprobe_at_func_entry(struct pt_regs *regs) return true; /* endbr64 (64-bit only) */ - if (user_64bit_mode(regs) && is_endbr(*(u32 *)auprobe->insn)) + if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn)) return true; return false; diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 8f78b0c900ef..39a987d5eb6e 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -36,7 +36,7 @@ enum { BTS_STATE_ACTIVE, }; -static DEFINE_PER_CPU(struct bts_ctx, bts_ctx); +static struct bts_ctx __percpu *bts_ctx; #define BTS_RECORD_SIZE 24 #define BTS_SAFETY_MARGIN 4080 @@ -58,7 +58,7 @@ struct bts_buffer { local_t head; unsigned long end; void **data_pages; - struct bts_phys buf[]; + struct bts_phys buf[] __counted_by(nr_bufs); }; static struct pmu bts_pmu; @@ -231,7 +231,7 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); static void __bts_event_start(struct perf_event *event) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); struct bts_buffer *buf = perf_get_aux(&bts->handle); u64 config = 0; @@ -260,7 +260,7 @@ static void __bts_event_start(struct perf_event *event) static void bts_event_start(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); struct bts_buffer *buf; buf = perf_aux_output_begin(&bts->handle, event); @@ -290,7 +290,7 @@ fail_stop: static void __bts_event_stop(struct perf_event *event, int state) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); /* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */ WRITE_ONCE(bts->state, state); @@ -305,7 +305,7 @@ static void __bts_event_stop(struct perf_event *event, int state) static void bts_event_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); struct bts_buffer *buf = NULL; int state = READ_ONCE(bts->state); @@ -338,9 +338,14 @@ static void bts_event_stop(struct perf_event *event, int flags) void intel_bts_enable_local(void) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - int state = READ_ONCE(bts->state); + struct bts_ctx *bts; + int state; + + if (!bts_ctx) + return; + bts = this_cpu_ptr(bts_ctx); + state = READ_ONCE(bts->state); /* * Here we transition from INACTIVE to ACTIVE; * if we instead are STOPPED from the interrupt handler, @@ -358,7 +363,12 @@ void intel_bts_enable_local(void) void intel_bts_disable_local(void) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_ctx *bts; + + if (!bts_ctx) + return; + + bts = this_cpu_ptr(bts_ctx); /* * Here we transition from ACTIVE to INACTIVE; @@ -450,12 +460,17 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) int intel_bts_interrupt(void) { struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds; - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct perf_event *event = bts->handle.event; + struct bts_ctx *bts; + struct perf_event *event; struct bts_buffer *buf; s64 old_head; int err = -ENOSPC, handled = 0; + if (!bts_ctx) + return 0; + + bts = this_cpu_ptr(bts_ctx); + event = bts->handle.event; /* * The only surefire way of knowing if this NMI is ours is by checking * the write ptr against the PMI threshold. @@ -518,7 +533,7 @@ static void bts_event_del(struct perf_event *event, int mode) static int bts_event_add(struct perf_event *event, int mode) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -605,6 +620,10 @@ static __init int bts_init(void) return -ENODEV; } + bts_ctx = alloc_percpu(struct bts_ctx); + if (!bts_ctx) + return -ENOMEM; + bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | PERF_PMU_CAP_EXCLUSIVE; bts_pmu.task_ctx_nr = perf_sw_context; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7601196d1d18..1ac39611fea8 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -397,34 +397,28 @@ static struct event_constraint intel_lnc_event_constraints[] = { METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6), METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7), + INTEL_EVENT_CONSTRAINT(0x20, 0xf), + + INTEL_UEVENT_CONSTRAINT(0x012a, 0xf), + INTEL_UEVENT_CONSTRAINT(0x012b, 0xf), INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), INTEL_UEVENT_CONSTRAINT(0x0175, 0x4), INTEL_EVENT_CONSTRAINT(0x2e, 0x3ff), INTEL_EVENT_CONSTRAINT(0x3c, 0x3ff), - /* - * Generally event codes < 0x90 are restricted to counters 0-3. - * The 0x2E and 0x3C are exception, which has no restriction. - */ - INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf), - INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf), - INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1), INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1), INTEL_UEVENT_CONSTRAINT(0x10a4, 0x1), INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8), + INTEL_UEVENT_CONSTRAINT(0x01cd, 0x3fc), INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3), - INTEL_EVENT_CONSTRAINT(0xce, 0x1), INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf), - /* - * Generally event codes >= 0x90 are likely to have no restrictions. - * The exception are defined as above. - */ - INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0x3ff), + + INTEL_UEVENT_CONSTRAINT(0x00e0, 0xf), EVENT_CONSTRAINT_END }; @@ -2720,7 +2714,7 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots, * modify by a NMI. PMU has to be disabled before calling this function. */ -static u64 intel_update_topdown_event(struct perf_event *event, int metric_end) +static u64 intel_update_topdown_event(struct perf_event *event, int metric_end, u64 *val) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *other; @@ -2728,13 +2722,24 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end) bool reset = true; int idx; - /* read Fixed counter 3 */ - rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots); - if (!slots) - return 0; + if (!val) { + /* read Fixed counter 3 */ + rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots); + if (!slots) + return 0; - /* read PERF_METRICS */ - rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics); + /* read PERF_METRICS */ + rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics); + } else { + slots = val[0]; + metrics = val[1]; + /* + * Don't reset the PERF_METRICS and Fixed counter 3 + * for each PEBS record read. Utilize the RDPMC metrics + * clear mode. + */ + reset = false; + } for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) { if (!is_topdown_idx(idx)) @@ -2777,36 +2782,47 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end) return slots; } -static u64 icl_update_topdown_event(struct perf_event *event) +static u64 icl_update_topdown_event(struct perf_event *event, u64 *val) { return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE + - x86_pmu.num_topdown_events - 1); + x86_pmu.num_topdown_events - 1, + val); } -DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update); +DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update); -static void intel_pmu_read_topdown_event(struct perf_event *event) +static void intel_pmu_read_event(struct perf_event *event) { - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + if (event->hw.flags & (PERF_X86_EVENT_AUTO_RELOAD | PERF_X86_EVENT_TOPDOWN) || + is_pebs_counter_event_group(event)) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + bool pmu_enabled = cpuc->enabled; - /* Only need to call update_topdown_event() once for group read. */ - if ((cpuc->txn_flags & PERF_PMU_TXN_READ) && - !is_slots_event(event)) - return; + /* Only need to call update_topdown_event() once for group read. */ + if (is_metric_event(event) && (cpuc->txn_flags & PERF_PMU_TXN_READ)) + return; - perf_pmu_disable(event->pmu); - static_call(intel_pmu_update_topdown_event)(event); - perf_pmu_enable(event->pmu); -} + cpuc->enabled = 0; + if (pmu_enabled) + intel_pmu_disable_all(); -static void intel_pmu_read_event(struct perf_event *event) -{ - if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) - intel_pmu_auto_reload_read(event); - else if (is_topdown_count(event)) - intel_pmu_read_topdown_event(event); - else - x86_perf_event_update(event); + /* + * If the PEBS counters snapshotting is enabled, + * the topdown event is available in PEBS records. + */ + if (is_topdown_event(event) && !is_pebs_counter_event_group(event)) + static_call(intel_pmu_update_topdown_event)(event, NULL); + else + intel_pmu_drain_pebs_buffer(); + + cpuc->enabled = pmu_enabled; + if (pmu_enabled) + intel_pmu_enable_all(0); + + return; + } + + x86_perf_event_update(event); } static void intel_pmu_enable_fixed(struct perf_event *event) @@ -2938,7 +2954,7 @@ static int intel_pmu_set_period(struct perf_event *event) static u64 intel_pmu_update(struct perf_event *event) { if (unlikely(is_topdown_count(event))) - return static_call(intel_pmu_update_topdown_event)(event); + return static_call(intel_pmu_update_topdown_event)(event, NULL); return x86_perf_event_update(event); } @@ -3076,7 +3092,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) handled++; x86_pmu_handle_guest_pebs(regs, &data); - x86_pmu.drain_pebs(regs, &data); + static_call(x86_pmu_drain_pebs)(regs, &data); status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; /* @@ -3104,7 +3120,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) { handled++; - static_call(intel_pmu_update_topdown_event)(NULL); + static_call(intel_pmu_update_topdown_event)(NULL, NULL); } /* @@ -3122,6 +3138,27 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) if (!test_bit(bit, cpuc->active_mask)) continue; + /* + * There may be unprocessed PEBS records in the PEBS buffer, + * which still stores the previous values. + * Process those records first before handling the latest value. + * For example, + * A is a regular counter + * B is a PEBS event which reads A + * C is a PEBS event + * + * The following can happen: + * B-assist A=1 + * C A=2 + * B-assist A=3 + * A-overflow-PMI A=4 + * C-assist-PMI (PEBS buffer) A=5 + * + * The PEBS buffer has to be drained before handling the A-PMI + */ + if (is_pebs_counter_event_group(event)) + x86_pmu.drain_pebs(regs, &data); + if (!intel_pmu_save_and_restart(event)) continue; @@ -3958,6 +3995,85 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx) return test_bit(idx, (unsigned long *)&intel_cap->capabilities); } +static u64 intel_pmu_freq_start_period(struct perf_event *event) +{ + int type = event->attr.type; + u64 config, factor; + s64 start; + + /* + * The 127 is the lowest possible recommended SAV (sample after value) + * for a 4000 freq (default freq), according to the event list JSON file. + * Also, assume the workload is idle 50% time. + */ + factor = 64 * 4000; + if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE) + goto end; + + /* + * The estimation of the start period in the freq mode is + * based on the below assumption. + * + * For a cycles or an instructions event, 1GHZ of the + * underlying platform, 1 IPC. The workload is idle 50% time. + * The start period = 1,000,000,000 * 1 / freq / 2. + * = 500,000,000 / freq + * + * Usually, the branch-related events occur less than the + * instructions event. According to the Intel event list JSON + * file, the SAV (sample after value) of a branch-related event + * is usually 1/4 of an instruction event. + * The start period of branch-related events = 125,000,000 / freq. + * + * The cache-related events occurs even less. The SAV is usually + * 1/20 of an instruction event. + * The start period of cache-related events = 25,000,000 / freq. + */ + config = event->attr.config & PERF_HW_EVENT_MASK; + if (type == PERF_TYPE_HARDWARE) { + switch (config) { + case PERF_COUNT_HW_CPU_CYCLES: + case PERF_COUNT_HW_INSTRUCTIONS: + case PERF_COUNT_HW_BUS_CYCLES: + case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND: + case PERF_COUNT_HW_STALLED_CYCLES_BACKEND: + case PERF_COUNT_HW_REF_CPU_CYCLES: + factor = 500000000; + break; + case PERF_COUNT_HW_BRANCH_INSTRUCTIONS: + case PERF_COUNT_HW_BRANCH_MISSES: + factor = 125000000; + break; + case PERF_COUNT_HW_CACHE_REFERENCES: + case PERF_COUNT_HW_CACHE_MISSES: + factor = 25000000; + break; + default: + goto end; + } + } + + if (type == PERF_TYPE_HW_CACHE) + factor = 25000000; +end: + /* + * Usually, a prime or a number with less factors (close to prime) + * is chosen as an SAV, which makes it less likely that the sampling + * period synchronizes with some periodic event in the workload. + * Minus 1 to make it at least avoiding values near power of twos + * for the default freq. + */ + start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1; + + if (start > x86_pmu.max_period) + start = x86_pmu.max_period; + + if (x86_pmu.limit_period) + x86_pmu.limit_period(event, &start); + + return start; +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -3969,6 +4085,12 @@ static int intel_pmu_hw_config(struct perf_event *event) if (ret) return ret; + if (event->attr.freq && event->attr.sample_freq) { + event->hw.sample_period = intel_pmu_freq_start_period(event); + event->hw.last_period = event->hw.sample_period; + local64_set(&event->hw.period_left, event->hw.sample_period); + } + if (event->attr.precise_ip) { if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) return -EINVAL; @@ -4069,6 +4191,13 @@ static int intel_pmu_hw_config(struct perf_event *event) event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT; } + if ((event->attr.sample_type & PERF_SAMPLE_READ) && + (x86_pmu.intel_cap.pebs_format >= 6) && + x86_pmu.intel_cap.pebs_baseline && + is_sampling_event(event) && + event->attr.precise_ip) + event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR; + if ((event->attr.type == PERF_TYPE_HARDWARE) || (event->attr.type == PERF_TYPE_HW_CACHE)) return 0; @@ -4606,9 +4735,9 @@ static int adl_hw_config(struct perf_event *event) return -EOPNOTSUPP; } -static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void) +static enum intel_cpu_type adl_get_hybrid_cpu_type(void) { - return HYBRID_INTEL_CORE; + return INTEL_CPU_TYPE_CORE; } static inline bool erratum_hsw11(struct perf_event *event) @@ -4905,20 +5034,22 @@ static inline bool intel_pmu_broken_perf_cap(void) static void update_pmu_cap(struct x86_hybrid_pmu *pmu) { - unsigned int sub_bitmaps, eax, ebx, ecx, edx; + unsigned int cntr, fixed_cntr, ecx, edx; + union cpuid35_eax eax; + union cpuid35_ebx ebx; - cpuid(ARCH_PERFMON_EXT_LEAF, &sub_bitmaps, &ebx, &ecx, &edx); + cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx); - if (ebx & ARCH_PERFMON_EXT_UMASK2) + if (ebx.split.umask2) pmu->config_mask |= ARCH_PERFMON_EVENTSEL_UMASK2; - if (ebx & ARCH_PERFMON_EXT_EQ) + if (ebx.split.eq) pmu->config_mask |= ARCH_PERFMON_EVENTSEL_EQ; - if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) { + if (eax.split.cntr_subleaf) { cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF, - &eax, &ebx, &ecx, &edx); - pmu->cntr_mask64 = eax; - pmu->fixed_cntr_mask64 = ebx; + &cntr, &fixed_cntr, &ecx, &edx); + pmu->cntr_mask64 = cntr; + pmu->fixed_cntr_mask64 = fixed_cntr; } if (!intel_pmu_broken_perf_cap()) { @@ -4941,11 +5072,6 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) else pmu->intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS); - if (pmu->intel_cap.pebs_output_pt_available) - pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; - else - pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT; - intel_pmu_check_event_constraints(pmu->event_constraints, pmu->cntr_mask64, pmu->fixed_cntr_mask64, @@ -4956,7 +5082,8 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) { - u8 cpu_type = get_this_hybrid_cpu_type(); + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + enum intel_cpu_type cpu_type = c->topo.intel_type; int i; /* @@ -4965,7 +5092,7 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) * on it. There should be a fixup function provided for these * troublesome CPUs (->get_hybrid_cpu_type). */ - if (cpu_type == HYBRID_INTEL_NONE) { + if (cpu_type == INTEL_CPU_TYPE_UNKNOWN) { if (x86_pmu.get_hybrid_cpu_type) cpu_type = x86_pmu.get_hybrid_cpu_type(); else @@ -4982,16 +5109,16 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type; u32 native_id; - if (cpu_type == HYBRID_INTEL_CORE && pmu_type == hybrid_big) + if (cpu_type == INTEL_CPU_TYPE_CORE && pmu_type == hybrid_big) return &x86_pmu.hybrid_pmu[i]; - if (cpu_type == HYBRID_INTEL_ATOM) { + if (cpu_type == INTEL_CPU_TYPE_ATOM) { if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small) return &x86_pmu.hybrid_pmu[i]; - native_id = get_this_hybrid_cpu_native_id(); - if (native_id == skt_native_id && pmu_type == hybrid_small) + native_id = c->topo.intel_native_model_id; + if (native_id == INTEL_ATOM_SKT_NATIVE_ID && pmu_type == hybrid_small) return &x86_pmu.hybrid_pmu[i]; - if (native_id == cmt_native_id && pmu_type == hybrid_tiny) + if (native_id == INTEL_ATOM_CMT_NATIVE_ID && pmu_type == hybrid_tiny) return &x86_pmu.hybrid_pmu[i]; } } @@ -5023,9 +5150,6 @@ static bool init_hybrid_pmu(int cpu) pr_info("%s PMU driver: ", pmu->name); - if (pmu->intel_cap.pebs_output_pt_available) - pr_cont("PEBS-via-PT "); - pr_cont("\n"); x86_pmu_show_pmu_cap(&pmu->pmu); @@ -5048,8 +5172,11 @@ static void intel_pmu_cpu_starting(int cpu) init_debug_store_on_cpu(cpu); /* - * Deal with CPUs that don't clear their LBRs on power-up. + * Deal with CPUs that don't clear their LBRs on power-up, and that may + * even boot with LBRs enabled. */ + if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && x86_pmu.lbr_nr) + msr_clear_bit(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR_BIT); intel_pmu_lbr_reset(); cpuc->lbr_sel = NULL; @@ -5168,16 +5295,10 @@ static void intel_pmu_cpu_dead(int cpu) } static void intel_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, - bool sched_in) + struct task_struct *task, bool sched_in) { intel_pmu_pebs_sched_task(pmu_ctx, sched_in); - intel_pmu_lbr_sched_task(pmu_ctx, sched_in); -} - -static void intel_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc) -{ - intel_pmu_lbr_swap_task_ctx(prev_epc, next_epc); + intel_pmu_lbr_sched_task(pmu_ctx, task, sched_in); } static int intel_pmu_check_period(struct perf_event *event, u64 value) @@ -5348,7 +5469,6 @@ static __initconst const struct x86_pmu intel_pmu = { .guest_get_msrs = intel_guest_get_msrs, .sched_task = intel_pmu_sched_task, - .swap_task_ctx = intel_pmu_swap_task_ctx, .check_period = intel_pmu_check_period, @@ -6370,11 +6490,9 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; if (pmu->pmu_type & hybrid_small_tiny) { pmu->intel_cap.perf_metrics = 0; - pmu->intel_cap.pebs_output_pt_available = 1; pmu->mid_ack = true; } else if (pmu->pmu_type & hybrid_big) { pmu->intel_cap.perf_metrics = 1; - pmu->intel_cap.pebs_output_pt_available = 0; pmu->late_ack = true; } } @@ -6466,15 +6584,21 @@ __init int intel_pmu_init(void) char *name; struct x86_hybrid_pmu *pmu; + /* Architectural Perfmon was introduced starting with Core "Yonah" */ if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { - case 0x6: - return p6_pmu_init(); - case 0xb: + case 6: + if (boot_cpu_data.x86_vfm < INTEL_CORE_YONAH) + return p6_pmu_init(); + break; + case 11: return knc_pmu_init(); - case 0xf: + case 15: return p4_pmu_init(); } + + pr_cont("unsupported CPU family %d model %d ", + boot_cpu_data.x86, boot_cpu_data.x86_model); return -ENODEV; } @@ -6622,7 +6746,7 @@ __init int intel_pmu_init(void) case INTEL_ATOM_SILVERMONT_D: case INTEL_ATOM_SILVERMONT_MID: case INTEL_ATOM_AIRMONT: - case INTEL_ATOM_AIRMONT_MID: + case INTEL_ATOM_SILVERMONT_MID2: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ba74e1198328..1f7e1a692a7a 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -953,11 +953,11 @@ unlock: return 1; } -static inline void intel_pmu_drain_pebs_buffer(void) +void intel_pmu_drain_pebs_buffer(void) { struct perf_sample_data data; - x86_pmu.drain_pebs(NULL, &data); + static_call(x86_pmu_drain_pebs)(NULL, &data); } /* @@ -1199,7 +1199,7 @@ struct event_constraint intel_lnc_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), - INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3ff), + INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3fc), INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3), INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ @@ -1294,6 +1294,19 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) ds->pebs_interrupt_threshold = threshold; } +#define PEBS_DATACFG_CNTRS(x) \ + ((x >> PEBS_DATACFG_CNTR_SHIFT) & PEBS_DATACFG_CNTR_MASK) + +#define PEBS_DATACFG_CNTR_BIT(x) \ + (((1ULL << x) & PEBS_DATACFG_CNTR_MASK) << PEBS_DATACFG_CNTR_SHIFT) + +#define PEBS_DATACFG_FIX(x) \ + ((x >> PEBS_DATACFG_FIX_SHIFT) & PEBS_DATACFG_FIX_MASK) + +#define PEBS_DATACFG_FIX_BIT(x) \ + (((1ULL << (x)) & PEBS_DATACFG_FIX_MASK) \ + << PEBS_DATACFG_FIX_SHIFT) + static void adaptive_pebs_record_size_update(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1308,10 +1321,58 @@ static void adaptive_pebs_record_size_update(void) sz += sizeof(struct pebs_xmm); if (pebs_data_cfg & PEBS_DATACFG_LBRS) sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry); + if (pebs_data_cfg & (PEBS_DATACFG_METRICS | PEBS_DATACFG_CNTR)) { + sz += sizeof(struct pebs_cntr_header); + + /* Metrics base and Metrics Data */ + if (pebs_data_cfg & PEBS_DATACFG_METRICS) + sz += 2 * sizeof(u64); + + if (pebs_data_cfg & PEBS_DATACFG_CNTR) { + sz += (hweight64(PEBS_DATACFG_CNTRS(pebs_data_cfg)) + + hweight64(PEBS_DATACFG_FIX(pebs_data_cfg))) * + sizeof(u64); + } + } cpuc->pebs_record_size = sz; } +static void __intel_pmu_pebs_update_cfg(struct perf_event *event, + int idx, u64 *pebs_data_cfg) +{ + if (is_metric_event(event)) { + *pebs_data_cfg |= PEBS_DATACFG_METRICS; + return; + } + + *pebs_data_cfg |= PEBS_DATACFG_CNTR; + + if (idx >= INTEL_PMC_IDX_FIXED) + *pebs_data_cfg |= PEBS_DATACFG_FIX_BIT(idx - INTEL_PMC_IDX_FIXED); + else + *pebs_data_cfg |= PEBS_DATACFG_CNTR_BIT(idx); +} + + +static void intel_pmu_late_setup(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_event *event; + u64 pebs_data_cfg = 0; + int i; + + for (i = 0; i < cpuc->n_events; i++) { + event = cpuc->event_list[i]; + if (!is_pebs_counter_event_group(event)) + continue; + __intel_pmu_pebs_update_cfg(event, cpuc->assign[i], &pebs_data_cfg); + } + + if (pebs_data_cfg & ~cpuc->pebs_data_cfg) + cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW; +} + #define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \ PERF_SAMPLE_PHYS_ADDR | \ PERF_SAMPLE_WEIGHT_TYPE | \ @@ -1914,12 +1975,89 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs, #endif } +static void intel_perf_event_update_pmc(struct perf_event *event, u64 pmc) +{ + int shift = 64 - x86_pmu.cntval_bits; + struct hw_perf_event *hwc; + u64 delta, prev_pmc; + + /* + * A recorded counter may not have an assigned event in the + * following cases. The value should be dropped. + * - An event is deleted. There is still an active PEBS event. + * The PEBS record doesn't shrink on pmu::del(). + * If the counter of the deleted event once occurred in a PEBS + * record, PEBS still records the counter until the counter is + * reassigned. + * - An event is stopped for some reason, e.g., throttled. + * During this period, another event is added and takes the + * counter of the stopped event. The stopped event is assigned + * to another new and uninitialized counter, since the + * x86_pmu_start(RELOAD) is not invoked for a stopped event. + * The PEBS__DATA_CFG is updated regardless of the event state. + * The uninitialized counter can be recorded in a PEBS record. + * But the cpuc->events[uninitialized_counter] is always NULL, + * because the event is stopped. The uninitialized value is + * safely dropped. + */ + if (!event) + return; + + hwc = &event->hw; + prev_pmc = local64_read(&hwc->prev_count); + + /* Only update the count when the PMU is disabled */ + WARN_ON(this_cpu_read(cpu_hw_events.enabled)); + local64_set(&hwc->prev_count, pmc); + + delta = (pmc << shift) - (prev_pmc << shift); + delta >>= shift; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); +} + +static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc, + struct perf_event *event, + struct pebs_cntr_header *cntr, + void *next_record) +{ + int bit; + + for_each_set_bit(bit, (unsigned long *)&cntr->cntr, INTEL_PMC_MAX_GENERIC) { + intel_perf_event_update_pmc(cpuc->events[bit], *(u64 *)next_record); + next_record += sizeof(u64); + } + + for_each_set_bit(bit, (unsigned long *)&cntr->fixed, INTEL_PMC_MAX_FIXED) { + /* The slots event will be handled with perf_metric later */ + if ((cntr->metrics == INTEL_CNTR_METRICS) && + (bit + INTEL_PMC_IDX_FIXED == INTEL_PMC_IDX_FIXED_SLOTS)) { + next_record += sizeof(u64); + continue; + } + intel_perf_event_update_pmc(cpuc->events[bit + INTEL_PMC_IDX_FIXED], + *(u64 *)next_record); + next_record += sizeof(u64); + } + + /* HW will reload the value right after the overflow. */ + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + local64_set(&event->hw.prev_count, (u64)-event->hw.sample_period); + + if (cntr->metrics == INTEL_CNTR_METRICS) { + static_call(intel_pmu_update_topdown_event) + (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS], + (u64 *)next_record); + next_record += 2 * sizeof(u64); + } +} + #define PEBS_LATENCY_MASK 0xffff /* * With adaptive PEBS the layout depends on what fields are configured. */ - static void setup_pebs_adaptive_sample_data(struct perf_event *event, struct pt_regs *iregs, void *__pebs, struct perf_sample_data *data, @@ -2049,6 +2187,28 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, } } + if (format_group & (PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS)) { + struct pebs_cntr_header *cntr = next_record; + unsigned int nr; + + next_record += sizeof(struct pebs_cntr_header); + /* + * The PEBS_DATA_CFG is a global register, which is the + * superset configuration for all PEBS events. + * For the PEBS record of non-sample-read group, ignore + * the counter snapshot fields. + */ + if (is_pebs_counter_event_group(event)) { + __setup_pebs_counter_group(cpuc, event, cntr, next_record); + data->sample_flags |= PERF_SAMPLE_READ; + } + + nr = hweight32(cntr->cntr) + hweight32(cntr->fixed); + if (cntr->metrics == INTEL_CNTR_METRICS) + nr += 2; + next_record += nr * sizeof(u64); + } + WARN_ONCE(next_record != __pebs + basic->format_size, "PEBS record size %u, expected %llu, config %llx\n", basic->format_size, @@ -2094,15 +2254,6 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit) return NULL; } -void intel_pmu_auto_reload_read(struct perf_event *event) -{ - WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)); - - perf_pmu_disable(event->pmu); - intel_pmu_drain_pebs_buffer(); - perf_pmu_enable(event->pmu); -} - /* * Special variant of intel_pmu_save_and_restart() for auto-reload. */ @@ -2211,13 +2362,21 @@ __intel_pmu_pebs_last_event(struct perf_event *event, } if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { - /* - * Now, auto-reload is only enabled in fixed period mode. - * The reload value is always hwc->sample_period. - * May need to change it, if auto-reload is enabled in - * freq mode later. - */ - intel_pmu_save_and_restart_reload(event, count); + if ((is_pebs_counter_event_group(event))) { + /* + * The value of each sample has been updated when setup + * the corresponding sample data. + */ + perf_event_update_userpage(event); + } else { + /* + * Now, auto-reload is only enabled in fixed period mode. + * The reload value is always hwc->sample_period. + * May need to change it, if auto-reload is enabled in + * freq mode later. + */ + intel_pmu_save_and_restart_reload(event, count); + } } else intel_pmu_save_and_restart(event); } @@ -2552,6 +2711,11 @@ void __init intel_ds_init(void) break; case 6: + if (x86_pmu.intel_cap.pebs_baseline) { + x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ; + x86_pmu.late_setup = intel_pmu_late_setup; + } + fallthrough; case 5: x86_pmu.pebs_ept = 1; fallthrough; @@ -2576,9 +2740,17 @@ void __init intel_ds_init(void) PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); } - pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual); + pr_cont("PEBS fmt%d%c%s, ", format, pebs_type, pebs_qual); - if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) { + /* + * The PEBS-via-PT is not supported on hybrid platforms, + * because not all CPUs of a hybrid machine support it. + * The global x86_pmu.intel_cap, which only contains the + * common capabilities, is used to check the availability + * of the feature. The per-PMU pebs_output_pt_available + * in a hybrid machine should be ignored. + */ + if (x86_pmu.intel_cap.pebs_output_pt_available) { pr_cont("PEBS-via-PT, "); x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index dc641b50814e..f44c3d866f24 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -422,11 +422,17 @@ static __always_inline bool lbr_is_reset_in_cstate(void *ctx) return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL); } +static inline bool has_lbr_callstack_users(void *ctx) +{ + return task_context_opt(ctx)->lbr_callstack_users || + x86_pmu.lbr_callstack_users; +} + static void __intel_pmu_lbr_restore(void *ctx) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - if (task_context_opt(ctx)->lbr_callstack_users == 0 || + if (!has_lbr_callstack_users(ctx) || task_context_opt(ctx)->lbr_stack_state == LBR_NONE) { intel_pmu_lbr_reset(); return; @@ -503,7 +509,7 @@ static void __intel_pmu_lbr_save(void *ctx) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - if (task_context_opt(ctx)->lbr_callstack_users == 0) { + if (!has_lbr_callstack_users(ctx)) { task_context_opt(ctx)->lbr_stack_state = LBR_NONE; return; } @@ -516,32 +522,11 @@ static void __intel_pmu_lbr_save(void *ctx) cpuc->last_log_id = ++task_context_opt(ctx)->log_id; } -void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc) -{ - void *prev_ctx_data, *next_ctx_data; - - swap(prev_epc->task_ctx_data, next_epc->task_ctx_data); - - /* - * Architecture specific synchronization makes sense in case - * both prev_epc->task_ctx_data and next_epc->task_ctx_data - * pointers are allocated. - */ - - prev_ctx_data = next_epc->task_ctx_data; - next_ctx_data = prev_epc->task_ctx_data; - - if (!prev_ctx_data || !next_ctx_data) - return; - - swap(task_context_opt(prev_ctx_data)->lbr_callstack_users, - task_context_opt(next_ctx_data)->lbr_callstack_users); -} - -void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_ctx_data *ctx_data; void *task_ctx; if (!cpuc->lbr_users) @@ -552,14 +537,18 @@ void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched * the task was scheduled out, restore the stack. Otherwise flush * the LBR stack. */ - task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL; + rcu_read_lock(); + ctx_data = rcu_dereference(task->perf_ctx_data); + task_ctx = ctx_data ? ctx_data->data : NULL; if (task_ctx) { if (sched_in) __intel_pmu_lbr_restore(task_ctx); else __intel_pmu_lbr_save(task_ctx); + rcu_read_unlock(); return; } + rcu_read_unlock(); /* * Since a context switch can flip the address space and LBR entries @@ -588,9 +577,19 @@ void intel_pmu_lbr_add(struct perf_event *event) cpuc->br_sel = event->hw.branch_reg.reg; - if (branch_user_callstack(cpuc->br_sel) && event->pmu_ctx->task_ctx_data) - task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users++; + if (branch_user_callstack(cpuc->br_sel)) { + if (event->attach_state & PERF_ATTACH_TASK) { + struct task_struct *task = event->hw.target; + struct perf_ctx_data *ctx_data; + rcu_read_lock(); + ctx_data = rcu_dereference(task->perf_ctx_data); + if (ctx_data) + task_context_opt(ctx_data->data)->lbr_callstack_users++; + rcu_read_unlock(); + } else + x86_pmu.lbr_callstack_users++; + } /* * Request pmu::sched_task() callback, which will fire inside the * regular perf event scheduling, so that call will: @@ -664,9 +663,19 @@ void intel_pmu_lbr_del(struct perf_event *event) if (!x86_pmu.lbr_nr) return; - if (branch_user_callstack(cpuc->br_sel) && - event->pmu_ctx->task_ctx_data) - task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users--; + if (branch_user_callstack(cpuc->br_sel)) { + if (event->attach_state & PERF_ATTACH_TASK) { + struct task_struct *task = event->hw.target; + struct perf_ctx_data *ctx_data; + + rcu_read_lock(); + ctx_data = rcu_dereference(task->perf_ctx_data); + if (ctx_data) + task_context_opt(ctx_data->data)->lbr_callstack_users--; + rcu_read_unlock(); + } else + x86_pmu.lbr_callstack_users--; + } if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) cpuc->lbr_select = 0; diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index 844bc4fc4724..fb726c6fc6e7 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -10,6 +10,7 @@ #include <linux/perf_event.h> #include <asm/perf_event_p4.h> +#include <asm/cpu_device_id.h> #include <asm/hardirq.h> #include <asm/apic.h> @@ -732,9 +733,9 @@ static bool p4_event_match_cpu_model(unsigned int event_idx) { /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ if (event_idx == P4_EVENT_INSTR_COMPLETED) { - if (boot_cpu_data.x86_model != 3 && - boot_cpu_data.x86_model != 4 && - boot_cpu_data.x86_model != 6) + if (boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT && + boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT_2M && + boot_cpu_data.x86_vfm != INTEL_P4_CEDARMILL) return false; } diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c index a6cffb4f4ef5..65b45e9d7016 100644 --- a/arch/x86/events/intel/p6.c +++ b/arch/x86/events/intel/p6.c @@ -2,6 +2,8 @@ #include <linux/perf_event.h> #include <linux/types.h> +#include <asm/cpu_device_id.h> + #include "../perf_event.h" /* @@ -248,30 +250,8 @@ __init int p6_pmu_init(void) { x86_pmu = p6_pmu; - switch (boot_cpu_data.x86_model) { - case 1: /* Pentium Pro */ + if (boot_cpu_data.x86_vfm == INTEL_PENTIUM_PRO) x86_add_quirk(p6_pmu_rdpmc_quirk); - break; - - case 3: /* Pentium II - Klamath */ - case 5: /* Pentium II - Deschutes */ - case 6: /* Pentium II - Mendocino */ - break; - - case 7: /* Pentium III - Katmai */ - case 8: /* Pentium III - Coppermine */ - case 10: /* Pentium III Xeon */ - case 11: /* Pentium III - Tualatin */ - break; - - case 9: /* Pentium M - Banias */ - case 13: /* Pentium M - Dothan */ - break; - - default: - pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model); - return -ENODEV; - } memcpy(hw_cache_event_ids, p6_hw_cache_event_ids, sizeof(hw_cache_event_ids)); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 31c2771545a6..2c0ce0e9545e 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -115,6 +115,11 @@ static inline bool is_branch_counters_group(struct perf_event *event) return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS; } +static inline bool is_pebs_counter_event_group(struct perf_event *event) +{ + return event->group_leader->hw.flags & PERF_X86_EVENT_PEBS_CNTR; +} + struct amd_nb { int nb_id; /* NorthBridge id */ int refcnt; /* reference count */ @@ -669,18 +674,6 @@ enum { #define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 #define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) -/* - * CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture - * of the core. Bits 31-24 indicates its core type (Core or Atom) - * and Bits [23:0] indicates the native model ID of the core. - * Core type and native model ID are defined in below enumerations. - */ -enum hybrid_cpu_type { - HYBRID_INTEL_NONE, - HYBRID_INTEL_ATOM = 0x20, - HYBRID_INTEL_CORE = 0x40, -}; - #define X86_HYBRID_PMU_ATOM_IDX 0 #define X86_HYBRID_PMU_CORE_IDX 1 #define X86_HYBRID_PMU_TINY_IDX 2 @@ -697,11 +690,6 @@ enum hybrid_pmu_type { hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny, }; -enum atom_native_id { - cmt_native_id = 0x2, /* Crestmont */ - skt_native_id = 0x3, /* Skymont */ -}; - struct x86_hybrid_pmu { struct pmu pmu; const char *name; @@ -800,6 +788,7 @@ struct x86_pmu { u64 (*update)(struct perf_event *event); int (*hw_config)(struct perf_event *event); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); + void (*late_setup)(void); unsigned eventsel; unsigned perfctr; unsigned fixedctr; @@ -869,7 +858,7 @@ struct x86_pmu { void (*check_microcode)(void); void (*sched_task)(struct perf_event_pmu_context *pmu_ctx, - bool sched_in); + struct task_struct *task, bool sched_in); /* * Intel Arch Perfmon v2+ @@ -914,6 +903,7 @@ struct x86_pmu { const int *lbr_sel_map; /* lbr_select mappings */ int *lbr_ctl_map; /* LBR_CTL mappings */ }; + u64 lbr_callstack_users; /* lbr callstack system wide users */ bool lbr_double_abort; /* duplicated lbr aborts */ bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ @@ -952,14 +942,6 @@ struct x86_pmu { int num_topdown_events; /* - * perf task context (i.e. struct perf_event_pmu_context::task_ctx_data) - * switch helper to bridge calls from perf/core to perf/x86. - * See struct pmu::swap_task_ctx() usage for examples; - */ - void (*swap_task_ctx)(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc); - - /* * AMD bits */ unsigned int amd_nb_constraints : 1; @@ -994,7 +976,7 @@ struct x86_pmu { */ int num_hybrid_pmus; struct x86_hybrid_pmu *hybrid_pmu; - enum hybrid_cpu_type (*get_hybrid_cpu_type) (void); + enum intel_cpu_type (*get_hybrid_cpu_type) (void); }; struct x86_perf_task_context_opt { @@ -1107,6 +1089,8 @@ extern struct x86_pmu x86_pmu __read_mostly; DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period); DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update); +DECLARE_STATIC_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); +DECLARE_STATIC_CALL(x86_pmu_late_setup, *x86_pmu.late_setup); static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) { @@ -1148,6 +1132,12 @@ extern u64 __read_mostly hw_cache_extra_regs u64 x86_perf_event_update(struct perf_event *event); +static inline u64 intel_pmu_topdown_event_update(struct perf_event *event, u64 *val) +{ + return x86_perf_event_update(event); +} +DECLARE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update); + static inline unsigned int x86_pmu_config_addr(int index) { return x86_pmu.eventsel + (x86_pmu.addr_offset ? @@ -1394,7 +1384,8 @@ void amd_pmu_lbr_reset(void); void amd_pmu_lbr_read(void); void amd_pmu_lbr_add(struct perf_event *event); void amd_pmu_lbr_del(struct perf_event *event); -void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); +void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in); void amd_pmu_lbr_enable_all(void); void amd_pmu_lbr_disable_all(void); int amd_pmu_lbr_hw_config(struct perf_event *event); @@ -1448,7 +1439,8 @@ static inline void amd_pmu_brs_del(struct perf_event *event) perf_sched_cb_dec(event->pmu); } -void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); +void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in); #else static inline int amd_brs_init(void) { @@ -1473,7 +1465,8 @@ static inline void amd_pmu_brs_del(struct perf_event *event) { } -static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { } @@ -1643,7 +1636,7 @@ void intel_pmu_pebs_disable_all(void); void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); -void intel_pmu_auto_reload_read(struct perf_event *event); +void intel_pmu_drain_pebs_buffer(void); void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); @@ -1653,10 +1646,8 @@ void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, struct cpu_hw_events *cpuc, struct perf_event *event); -void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc); - -void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); +void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in); u64 lbr_from_signext_quirk_wr(u64 val); diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h index 6c977c19f2cd..1d9e385649b5 100644 --- a/arch/x86/events/perf_event_flags.h +++ b/arch/x86/events/perf_event_flags.h @@ -9,7 +9,7 @@ PERF_ARCH(PEBS_LD_HSW, 0x00008) /* haswell style datala, load */ PERF_ARCH(PEBS_NA_HSW, 0x00010) /* haswell style datala, unknown */ PERF_ARCH(EXCL, 0x00020) /* HT exclusivity on counter */ PERF_ARCH(DYNAMIC, 0x00040) /* dynamic alloc'd constraint */ - /* 0x00080 */ +PERF_ARCH(PEBS_CNTR, 0x00080) /* PEBS counters snapshot */ PERF_ARCH(EXCL_ACCT, 0x00100) /* accounted EXCL event */ PERF_ARCH(AUTO_RELOAD, 0x00200) /* use PEBS auto-reload */ PERF_ARCH(LARGE_PEBS, 0x00400) /* use large PEBS */ diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index d3bb3865c1b1..043f0a0b1e00 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -370,6 +370,10 @@ static int rapl_pmu_event_init(struct perf_event *event) unsigned int rapl_pmu_idx; struct rapl_pmus *rapl_pmus; + /* only look at RAPL events */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + /* unsupported modes and filters */ if (event->attr.sample_period) /* no sampling */ return -EINVAL; @@ -387,10 +391,6 @@ static int rapl_pmu_event_init(struct perf_event *event) rapl_pmus_scope = rapl_pmus->pmu.scope; if (rapl_pmus_scope == PERF_PMU_SCOPE_PKG || rapl_pmus_scope == PERF_PMU_SCOPE_DIE) { - /* only look at RAPL package events */ - if (event->attr.type != rapl_pmus_pkg->pmu.type) - return -ENOENT; - cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1); if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1) return -EINVAL; @@ -398,10 +398,6 @@ static int rapl_pmu_event_init(struct perf_event *event) bit = cfg - 1; event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr; } else if (rapl_pmus_scope == PERF_PMU_SCOPE_CORE) { - /* only look at RAPL core events */ - if (event->attr.type != rapl_pmus_core->pmu.type) - return -ENOENT; - cfg = array_index_nospec((long)cfg, NR_RAPL_CORE_DOMAINS + 1); if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1) return -EINVAL; @@ -734,6 +730,7 @@ static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_ { int nr_rapl_pmu = topology_max_packages(); struct rapl_pmus *rapl_pmus; + int ret; /* * rapl_pmu_scope must be either PKG, DIE or CORE @@ -765,7 +762,11 @@ static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_ rapl_pmus->pmu.module = THIS_MODULE; rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; - return init_rapl_pmu(rapl_pmus); + ret = init_rapl_pmu(rapl_pmus); + if (ret) + kfree(rapl_pmus); + + return ret; } static struct rapl_model model_snb = { @@ -883,6 +884,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_VFM(INTEL_METEORLAKE_L, &model_skl), X86_MATCH_VFM(INTEL_ARROWLAKE_H, &model_skl), X86_MATCH_VFM(INTEL_ARROWLAKE, &model_skl), + X86_MATCH_VFM(INTEL_ARROWLAKE_U, &model_skl), X86_MATCH_VFM(INTEL_LUNARLAKE_M, &model_skl), {}, }; |