×
FPGA/DSP > DSP系统 > 详情

普通变量与寄存器变量速度对比

发布时间:2020-05-15 发布时间:
|

寄存器变量速度比普通变量存取速度快。对于C程序,寄存器变量不能取地址,编译器会报错。对于C++程序,可以对寄存器变量进行取址操作,编译器不会报错,但是取出来的地址似乎不是寄存器地址,而是内存地址,不知道是不是C++编译器在涉及取址运算时将寄存器变量自动转换成普通变量来处理。

1、只有普通运算

对于上述的普通累加运算而言,采用普通变量耗时0.7177秒,采用寄存器变量耗时0.111秒,速度上确实有明显的差别。

2、涉及取址运算

如果涉及取址运算,采用普通变量耗时0.7867秒,采用寄存器变量耗时0.4792秒,速度上的差别就没有那么显著了。大家可以发现两种变量取出的地址分别是0x6ffe38和0x6ffe3c,是连续的两个地址,那都是内存地址。不能确定,是不是C++编译器在涉及取址运算时自动将寄存器变量当成普通变量来处理。

实际使用时,底层硬件环境的实际情况对寄存器变量的使用会有一些限制。每个函数中只有很少的变量可以保存在寄存器中,且只允许某些类型的变量。但是,过量的寄存器声明并没有什么害处,这是因为编译器可以忽略过量的或者不支持的寄存器变量声明。另外,无论寄存器变量实际上是不是存放在寄存器中,它的地址都是不能访问的。在不同的机器中,对寄存器变量的数目和类型的具体限制也是不同的。 ——《C程序设计语言(第二版) Brain W.Kernighan & Dennis M.Ritchie》

对于C程序,寄存器变量是不能取址的:

几种线程本地存储变量和普通变量的性能比较

God一直致力于研究高并发服务端的开发,这次要优化的是libGod库中的线程本地存储变量,线程本地存储变量访问非常频繁,优化后库的性能应该会提高不少。已知的线程本地存储方法有boost中的thread_specific_ptr类,gcc中的__thread关键字,pthread中的pthread_getspecific函数。这次测试这3中本地存储以及普通变量之间的性能差别,代码如下:

#include 《iostream》

#include 《stdio.h》

#include 《pthread.h》

#include 《boost/thread/thread.hpp》

#include 《boost/thread/tss.hpp》

using namespace std;

class C {

public:

C(int a) {

m_a = a;

printf(“C() %d\n”, m_a);

}

~C() {

printf(“~C() %d\n”, m_a);

}

private:

int m_a;

};

#define TM 3

#if TM == 1

boost::thread_specific_ptr《C》 pc;

const char *testType = “boost”;

#elif TM == 2

__thread C *pc;

const char *testType = “__thread”;

#elif TM == 3

pthread_key_t pc;

const char *testType = “pthread”;

#else

C *pc;

const char *testType = “normal”;

#endif

void boostthreadFunc() {

#if TM == 1

pc.reset(new C(10));

#elif TM == 2

pc = new C(20);

#elif TM == 3

if (pthread_key_create(&pc, NULL)) {

cout 《《 “pthread_key_create” 《《 endl;

return;

}

if (pthread_setspecific(pc, new C(30))) {

cout 《《 “pthread_setspecific” 《《 endl;

return;

}

#else

pc = new C(20);

#endif

int switches = 5000000;

int i = switches;

struct timeval tm_start, tm_end;

gettimeofday(&tm_start, NULL);

while (i--) {

#if TM == 1

C *c1 = pc.get();

C *c2 = pc.get();

C *c3 = pc.get();

C *c4 = pc.get();

C *c5 = pc.get();

C *c6 = pc.get();

C *c7 = pc.get();

C *c8 = pc.get();

C *c9 = pc.get();

C *c10 = pc.get();

C *c11 = pc.get();

C *c12 = pc.get();

C *c13 = pc.get();

C *c14 = pc.get();

C *c15 = pc.get();

C *c16 = pc.get();

C *c17 = pc.get();

C *c18 = pc.get();

C *c19 = pc.get();

C *c20 = pc.get();

C *c21 = pc.get();

C *c22 = pc.get();

C *c23 = pc.get();

C *c24 = pc.get();

C *c25 = pc.get();

C *c26 = pc.get();

C *c27 = pc.get();

C *c28 = pc.get();

C *c29 = pc.get();

C *c30 = pc.get();

C *c31 = pc.get();

C *c32 = pc.get();

C *c33 = pc.get();

C *c34 = pc.get();

C *c35 = pc.get();

C *c36 = pc.get();

C *c37 = pc.get();

C *c38 = pc.get();

C *c39 = pc.get();

C *c40 = pc.get();

#elif TM == 2

C *c1 = pc;

C *c2 = pc;

C *c3 = pc;

C *c4 = pc;

C *c5 = pc;

C *c6 = pc;

C *c7 = pc;

C *c8 = pc;

C *c9 = pc;

C *c10 = pc;

C *c11 = pc;

C *c12 = pc;

C *c13 = pc;

C *c14 = pc;

C *c15 = pc;

C *c16 = pc;

C *c17 = pc;

C *c18 = pc;

C *c19 = pc;

C *c20 = pc;

C *c21 = pc;

C *c22 = pc;

C *c23 = pc;

C *c24 = pc;

C *c25 = pc;

C *c26 = pc;

C *c27 = pc;

C *c28 = pc;

C *c29 = pc;

C *c30 = pc;

C *c31 = pc;

C *c32 = pc;

C *c33 = pc;

C *c34 = pc;

C *c35 = pc;

C *c36 = pc;

C *c37 = pc;

C *c38 = pc;

C *c39 = pc;

C *c40 = pc;

#elif TM == 3

C *c1 = (C *)pthread_getspecific(pc);

C *c2 = (C *)pthread_getspecific(pc);

C *c3 = (C *)pthread_getspecific(pc);

C *c4 = (C *)pthread_getspecific(pc);

C *c5 = (C *)pthread_getspecific(pc);

C *c6 = (C *)pthread_getspecific(pc);

C *c7 = (C *)pthread_getspecific(pc);

C *c8 = (C *)pthread_getspecific(pc);

C *c9 = (C *)pthread_getspecific(pc);

C *c10 = (C *)pthread_getspecific(pc);

C *c11 = (C *)pthread_getspecific(pc);

C *c12 = (C *)pthread_getspecific(pc);

C *c13 = (C *)pthread_getspecific(pc);

C *c14 = (C *)pthread_getspecific(pc);

C *c15 = (C *)pthread_getspecific(pc);

C *c16 = (C *)pthread_getspecific(pc);

C *c17 = (C *)pthread_getspecific(pc);

C *c18 = (C *)pthread_getspecific(pc);

C *c19 = (C *)pthread_getspecific(pc);

C *c20 = (C *)pthread_getspecific(pc);

C *c21 = (C *)pthread_getspecific(pc);

C *c22 = (C *)pthread_getspecific(pc);

C *c23 = (C *)pthread_getspecific(pc);

C *c24 = (C *)pthread_getspecific(pc);

C *c25 = (C *)pthread_getspecific(pc);

C *c26 = (C *)pthread_getspecific(pc);

C *c27 = (C *)pthread_getspecific(pc);

C *c28 = (C *)pthread_getspecific(pc);

C *c29 = (C *)pthread_getspecific(pc);

C *c30 = (C *)pthread_getspecific(pc);

C *c31 = (C *)pthread_getspecific(pc);

C *c32 = (C *)pthread_getspecific(pc);

C *c33 = (C *)pthread_getspecific(pc);

C *c34 = (C *)pthread_getspecific(pc);

C *c35 = (C *)pthread_getspecific(pc);

C *c36 = (C *)pthread_getspecific(pc);

C *c37 = (C *)pthread_getspecific(pc);

C *c38 = (C *)pthread_getspecific(pc);

C *c39 = (C *)pthread_getspecific(pc);

C *c40 = (C *)pthread_getspecific(pc);

#else

C *c1 = pc;

C *c2 = pc;

C *c3 = pc;

C *c4 = pc;

C *c5 = pc;

C *c6 = pc;

C *c7 = pc;

C *c8 = pc;

C *c9 = pc;

C *c10 = pc;

C *c11 = pc;

C *c12 = pc;

C *c13 = pc;

C *c14 = pc;

C *c15 = pc;

C *c16 = pc;

C *c17 = pc;

C *c18 = pc;

C *c19 = pc;

C *c20 = pc;

C *c21 = pc;

C *c22 = pc;

C *c23 = pc;

C *c24 = pc;

C *c25 = pc;

C *c26 = pc;

C *c27 = pc;

C *c28 = pc;

C *c29 = pc;

C *c30 = pc;

C *c31 = pc;

C *c32 = pc;

C *c33 = pc;

C *c34 = pc;

C *c35 = pc;

C *c36 = pc;

C *c37 = pc;

C *c38 = pc;

C *c39 = pc;

C *c40 = pc;

#endif

}

gettimeofday(&tm_end, NULL);

switches *= 40;

long long ns = (tm_end.tv_sec - tm_start.tv_sec) * 1000LL * 1000LL * 1000LL +

(tm_end.tv_usec - tm_start.tv_usec) * 1000LL;

std::cout 《《 “####Benchmark result#### ” 《《 testType 《《 std::endl;

std::cout 《《 “Totol switches : ” 《《 switches 《《 std::endl;

std::cout 《《 “Cost per switch(ns) : ” 《《 (double)ns/switches 《《 std::endl;

std::cout 《《 “All cost switch(ns) : ” 《《 ns 《《 std::endl;

std::cout 《《 “####Benchmark result####” 《《 std::endl;

}

int main() {

boost::thread bt(&boostthreadFunc);

bt.join();

printf(“main exit.。\n”);

return 0;

}


『本文转载自网络,版权归原作者所有,如有侵权请联系删除』

热门文章 更多
DSP的数码望远相机的研究与设计