performance - rdpmc in user mode does not work even with PCE set -
based on wikipedia entry intel manual, rdpmc should available user-mode processes long bit 8 of cr4 set. however, still running general protection error when trying run rdpmc userspace bit set.
i running on 8-core intel x3470 on kernel 2.6.32-279.el6.x86_64.
here user-mode program trying execute:
#define _gnu_source #include <stdio.h> #include <stdlib.h> #include <inttypes.h> #include <sched.h> #include <assert.h> uint64_t read_pmc(int ecx) { unsigned int a, d; __asm __volatile("rdpmc" : "=a"(a), "=d"(d) : "c"(ecx)); return ((uint64_t)a) | (((uint64_t)d) << 32); } int main(int ac, char **av) { uint64_t start, end; cpu_set_t cpuset; unsigned int c; int i; if (ac != 3) { fprintf(stderr, "usage: %s cpu-id pmc-num\n", av[0]); exit(exit_failure); } = atoi(av[1]); c = atoi(av[2]); cpu_zero(&cpuset); cpu_set(i, &cpuset); assert(sched_setaffinity(0, sizeof(cpuset), &cpuset) == 0); printf("%lu\n", read_pmc(c)); return 0; } here kernel module sets bit , reads out cr4 can manually verify bit has been set.
/* * enable pmc in user mode. */ #include <linux/module.h> #include <linux/kernel.h> int init_module(void) { typedef long unsigned int uint64_t; uint64_t output; // set cr4, bit 8 enable pmc __asm__("push %rax\n\t" "mov %cr4,%rax;\n\t" "or $(1 << 7),%rax;\n\t" "mov %rax,%cr4;\n\t" "wbinvd\n\t" "pop %rax" ); // read cr4 check bit. __asm__("\t mov %%cr4,%0" : "=r"(output)); printk(kern_info "%lu", output); return 0; } void cleanup_module(void) { __asm__("push %rax\n\t" "push %rbx\n\t" "mov %cr4,%rax;\n\t" "mov $(1 << 7), %rbx\n\t" "not %rbx\n\t" "and %rbx, %rax;\n\t" "mov %rax,%cr4;\n\t" "wbinvd\n\t" "pop %rbx\n\t" "pop %rax\n\t" ); }
apparently, when intel says bit 8, referring 9th bit right, since indexing begins @ 0. replacing $(1 << 7) $(1 << 8) globally resolves issue, , allows rdpmc called user mode.
here updated kernel module, using on_each_cpu make sure set on every core.
/* * read pmc in kernel mode. */ #include <linux/module.h> /* needed modules */ #include <linux/kernel.h> /* needed kern_info */ static void printc4(void) { typedef long unsigned int uint64_t; uint64_t output; // read cr4 check bit. __asm__("\t mov %%cr4,%0" : "=r"(output)); printk(kern_info "%lu", output); } static void setc4b8(void * info) { // set cr4, bit 8 (9th bit right) enable __asm__("push %rax\n\t" "mov %cr4,%rax;\n\t" "or $(1 << 8),%rax;\n\t" "mov %rax,%cr4;\n\t" "wbinvd\n\t" "pop %rax" ); // check cpu on: printk(kern_info "ran on processor %d", smp_processor_id()); printc4(); } static void clearc4b8(void * info) { printc4(); __asm__("push %rax\n\t" "push %rbx\n\t" "mov %cr4,%rax;\n\t" "mov $(1 << 8), %rbx\n\t" "not %rbx\n\t" "and %rbx, %rax;\n\t" "mov %rax,%cr4;\n\t" "wbinvd\n\t" "pop %rbx\n\t" "pop %rax\n\t" ); printk(kern_info "ran on processor %d", smp_processor_id()); } int init_module(void) { on_each_cpu(setc4b8, null, 0); return 0; } void cleanup_module(void) { on_each_cpu(clearc4b8, null, 0); }
Comments
Post a Comment