Answer the question
In order to leave comments, you need to log in
Why is a coprocessor a thousand times slower than a processor?
I study interest for the sake of the work of the coprocessor.
And now, I ran into an unexpected result.
#include "stdafx.h"
#include <conio.h>
#include <ctime>
#include <thread>
#define MAX_I 4000000
unsigned __int32 rezult[64];
std::thread* thr[8];
void StartPR(int str)
{
int i = MAX_I;
int x = str;
__asm {
mov eax, MAX_I
xor ebx, ebx
xor ecx, ecx
xor edx, edx
circle:
dec eax
mov i, eax
add ebx, i
add ecx, i
add edx, i
cmp eax, 0
jnz circle
mov eax, x
mov esi, eax
mov [rezult + esi], eax
add esi, 4
mov [rezult + esi], ebx
add esi, 4
mov [rezult + esi], ecx
add esi, 4
mov [rezult + esi], edx
}
}
void StartMMX(int str)
{
int i = MAX_I;
int x = str;
_asm {
mov eax, MAX_I
fldz
xor ebx, ebx
fldz
xor ecx, ecx
circle2:
dec eax
mov i, eax
add ecx, i
add ebx, i
fincstp
fld i
fadd st(1), st(0)
fadd st(2), st(0)
cmp eax, 0
jnz circle2
mov eax, x
mov esi, eax
mov dword ptr[rezult + esi], ebx
add esi, 4
fstp [rezult + esi]
add esi, 4
mov dword ptr[rezult + esi], ecx
add esi, 4
fstp [rezult + esi]
}
}
void stardThreads(int numOfThread)
{
printf("threads: %i", numOfThread);
int i = 0;
clock_t strt = clock();
for (int i = 1; i < numOfThread; i++)
{
thr[i] = new std::thread(StartPR, i * 32);
}
StartPR(0);
clock_t strt2 = clock();
clock_t bk1 = strt2 - strt;
i = 16;
for (int i = 1; i < numOfThread; i++)
{
thr[i] = new std::thread(StartMMX, i * 32 + 16);
}
StartMMX(i);
clock_t bk2 = clock() - strt2;
printf("time block1:%i, block2:%i, tics per second:%i\n", (__int32)bk1, (__int32)bk2, CLOCKS_PER_SEC);
//for (int i = 0; i < numOfThread * 8; i++)
//printf("rezult[%i] = %u \n", i, rezult[i]);
}
int _tmain(int argc, _TCHAR* argv[])
{
for (int i = 1; i < 9; i++)
stardThreads(i);
_getch();
return 0;
}
Answer the question
In order to leave comments, you need to log in
Who can tell me what I'm doing wrong?
clock_t strt = clock(); ... clock_t bk1 = clock() - strt; ... clock_t bk2 = clock() - bk1;
#include <stdio.h>
#include <inttypes.h>
#include <time.h>
#define MAX_I 4000000
int32_t rezult[64];
void StartPR(int str)
{
asm volatile ("mov $4000000, %%eax\n\t"
"xor %%ebx, %%ebx\n\t"
"xor %%ecx, %%ecx\n\t"
"xor %%edx, %%edx\n"
"circle:\n\t"
"add %%eax, %%ebx\n\t"
"add %%eax, %%ecx\n\t"
"add %%eax, %%edx\n\t"
"dec %%eax\n\t"
"jnz circle\n\t" ::: "memory");
}
void StartMMX(int str)
{
double v = 4000000;
asm volatile ("mov $4000000, %%eax\n\t"
"fldz\n\t"
"circle2:\n\t"
"fld %0\n\t"
"faddp \n\t"
"fld %0\n\t"
"faddp \n\t"
"fld %0\n\t"
"faddp \n\t"
"fld %0\n\t"
"faddp \n\t"
"sub $4, %%eax\n\t"
"jnz circle2\n\t"
"fstp %0" :"+m"(v):: "memory");
}
void stardThreads(int numOfThread)
{
printf("threads: %i", numOfThread);
int i = 0;
clock_t strt = clock();
StartPR(0);
clock_t strt2 = clock();
clock_t bk1 = strt2 - strt;
StartMMX(i);
clock_t bk2 = clock() - strt2;
printf("time block1:%i, block2:%i, tics per second:%i\n", (int32_t)bk1, (int32_t)bk2, CLOCKS_PER_SEC);
}
int main(int argc, char *argv[])
{
int i;
for (i = 1; i < 9; i++)
stardThreads(i);
return 0;
}
threads: 1time block1:6949, block2:9311, tics per second:1000000
threads: 2time block1:4061, block2:7872, tics per second:1000000
threads: 3time block1:3901, block2:7398, tics per second:1000000
threads: 4time block1:3615, block2:7045, tics per second:1000000
threads: 5time block1:3389, block2:6716, tics per second:1000000
threads: 6time block1:3250, block2:6342, tics per second:1000000
threads: 7time block1:3189, block2:6036, tics per second:1000000
threads: 8time block1:3032, block2:5885, tics per second:1000000
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question