#include #include #include #include #define NELEMENTS 1024*1024*16 #define THREADS 256 // deklariram strukturo za prenašanje argumentov v niti struct argumenti { int myID; // ID posamezne niti //float myDotProd; }; pthread_t thread[THREADS]; float DPs[THREADS]; float *vecA; float *vecB; float *vecC; float dotProd; // prepreka: pthread_barrier_t barrier; /* timespec is a struct defined in ctime as * struct timespec { * time_t tv_sec; // seconds * long tv_nsec; // nanoseconds * }; */ struct timespec timeStart, timeEnd; // instance struktur - vsak anit dobi svojo struct argumenti args[THREADS]; void *funkcijaNiti (void* arg){ int mojID; struct argumenti *args ; args = (struct argumenti *) arg; // vrni mi element myID iz strukture, ki je v pomnilniku shranjena na naslovu args: mojID = args->myID; printf("Nit %d racuna... \n", mojID); // 1. vsaka nit najprej računa skalarni pordukt iz svojih rezin vektorjev: for (int i = 0; i < NELEMENTS/THREADS; i++) { vecC[mojID*(NELEMENTS/THREADS)+i] = *(vecA + (mojID*(NELEMENTS/THREADS) + i)) * vecB[mojID*(NELEMENTS/THREADS)+i]; DPs[mojID] += vecC[mojID*(NELEMENTS/THREADS)+i]; } // 2. Redukcija: int j = THREADS/2; // ================ ZAPORA ================= // === DO TUKAJ MORAJO PRITI VSE NITI ====== pthread_barrier_wait(&barrier); while (j > 0) { if( mojID < j ) { DPs[mojID] += DPs[mojID+j]; } j = j/2; // ================ ZAPORA ================= // === DO TUKAJ MORAJO PRITI VSE NITI ====== pthread_barrier_wait(&barrier); } } int main () { vecA = (float *) malloc(NELEMENTS * sizeof(float)); vecB = (float *) malloc(NELEMENTS * sizeof(float)); vecC = (float *) malloc(NELEMENTS * sizeof(float)); // init vhodnih vektorjev: for (int i = 0; i < NELEMENTS; i++) { vecA[i] = 1.0; //*(vecA + i) = 1.0; vecB[i] = 1.0; } // init prepreke: pthread_barrier_init(&barrier, NULL, THREADS); dotProd = 0.0; clock_t start = clock(); clock_gettime(CLOCK_REALTIME, &timeStart); // ustvarimo THREADS niti: for (int i = 0; i < THREADS; i++) { // inicializiraj IDje: args[i].myID = i; //args[i].myDotProd = 0.0; DPs[i] = 0.0; // ustvari niti: pthread_create( &thread[i], NULL, funkcijaNiti, (void *) &args[i]); } // pocakaj, da vse niti zaključijo z delom: for (int i = 0; i < THREADS; i++) { pthread_join(thread[i], NULL); //dotProd += args[i].myDotProd; } clock_gettime(CLOCK_REALTIME, &timeEnd); clock_t end = clock(); double time_taken = ((double)(end-start))/CLOCKS_PER_SEC; // calculate the elapsed time double elapsed_time = (timeEnd.tv_sec - timeStart.tv_sec) + (timeEnd.tv_nsec - timeStart.tv_nsec) / 1e9; // in seconds dotProd = DPs[0]; printf("Skalarni produkt = %f \n", dotProd); printf("Čas izvajanja: %f sekund \n", time_taken); printf("Čas izvajanja: %f sekund \n", elapsed_time); // clean-up: pthread_barrier_destroy(&barrier); return 0; }