Review of Processes
To understand threads, it's important to review what a process is.Processes
#include <stdio.h> /* printf */
#include <unistd.h> /* fork, getpid */
#include <stdlib.h> /* exit */
#include <sys/wait.h> /* wait */
int main(void)
{
int pid = fork();
if (pid == 0) /* child */
{
printf(" child pid: %i\n", getpid());
/* do child stuff */
exit(0);
}
else /* parent */
{
printf("parent pid: %i\n", getpid());
/* do parent stuff */
wait(NULL);
}
return 0;
}
_______________________________________________
Output:
parent pid: 32491
child pid: 32492
Threads Overview
Types of threads
With a single CPU/core:
With multiple CPUs/cores
No parallelism Pseudo-parallelism ![]()
![]()
Pseudo-parallelism (detail) ![]()
Parallelism ![]()
Thread States
There are several states in which a thread can be, and they are very similar to process states. They are mutually-exclusive, so a thread can only be in exactly one of these states at any given time:
Thread Libraries
POSIX Pthreads
This program simply creates a thread which prints the word Hello! to the screen and then exits.
/* To compile under Linux, use -pthread or -lpthread option */
#include <stdio.h> /* printf */
#include <pthread.h> /* thread create/join/exit */
void *hello(void *p)
{
printf("Hello!\n");
return NULL;
}
int main(void)
{
pthread_t tid;
pthread_create(&tid, NULL, hello, NULL);
pthread_join(tid, NULL);
return 0;
}
int pthread_create(pthread_t *thread, pthread_attr_t *attr, void * (*start_routine)(void *), void *arg);
int pthread_join(pthread_t thread, void **retval);
void pthread_exit(void *retval);
This program uses a global array of NUL-terminated strings (character pointers). The main function creates several threads. Each thread runs the same function palindrome, which simply chooses a number of random palindromes (from the global array) to print to the screen. There is only one process, but many threads.
#include <stdio.h> /* printf */
#include <unistd.h> /* sleep, getpid */
#include <pthread.h> /* thread create/join/exit */
char const *table[]
= { "A man, a plan, a canal: Panama!",
"No lemons, no melon.",
"Step on no pets.",
"Was it a rat I saw?",
"Dog as a devil deified lived as a god.",
"Able was I ere I saw Elba.",
"Yawn! Madonna fan? No damn way!",
"Go hang a salami. I'm a lasagna hog!"
};
const int table_size = sizeof(table) / sizeof(*table);
void *palindrome(void *p) /* p is unused */
{
pthread_t tid = pthread_self();
int max = 5;
int i, pid;
pid = getpid(); /* all threads have the same pid */
for (i = 0; i < max; ++i)
{
int index = rand() % table_size;
printf("[p:%i][t:%u]: %s\n", pid, (unsigned)tid / 1000000, table[index]);
}
return NULL;
}
int main(void)
{
#define COUNT 8
int i;
pthread_t thread_id[COUNT];
srand(0);
printf("Creating threads...\n");
for (i = 0; i < COUNT; ++i)
pthread_create(&thread_id[i], NULL, palindrome, NULL);
printf("Done creating threads.\n");
printf("Waiting on threads...\n");
for (i = 0; i < COUNT; ++i)
{
printf("Joining thread %i\n", i);
pthread_join(thread_id[i], NULL);
}
printf("Done waiting on threads.\n");
return 0;
}
Output from above
This program is similar to the one above but instead of using global data, the data is passed to each thread. The data is a user-defined struct that contains a pointer to the palindrome strings, the size of the table (array), and a count of how many strings to print. Although each thread receives its own copy of the pointer to the strings, they are all reading from the same array. This is completely safe. As before, there is only one process, but many threads.
#include <stdio.h> /* printf */
#include <unistd.h> /* sleep, getpid */
#include <stdlib.h> /* rand, srand */
#include <pthread.h> /* thread create/join/exit */
/* User-defined data for threads */
typedef struct
{
const char **table;
int table_size;
int to_do_count;
}data_struct; /* an unimaginative name for our structure ... */
void *palindrome(void *data)
{
data_struct *ds = (data_struct *)data; /* typical use when providing arguments */
pthread_t tid = pthread_self(); /* get this thread's tid */
int pid = getpid(); /* all threads have the same pid */
int i;
for (i = 0; i < ds->to_do_count; ++i)
{
int index = rand() % ds->table_size;
printf("[p:%i][t:%u]: %s\n", pid, (unsigned)tid / 1000000, ds->table[index]);
}
return NULL;
}
int main(void)
{
#define COUNT 8
int i;
const char *table[]
= { "A man, a plan, a canal: Panama!",
"No lemons, no melon.",
"Step on no pets.",
"Was it a rat I saw?",
"Dog as a devil deified lived as a god.",
"Able was I ere I saw Elba.",
"Yawn! Madonna fan? No damn way!",
"Go hang a salami. I'm a lasagna hog!"
};
const int table_size = sizeof(table) / sizeof(*table);
srand(0);
data_struct ds[COUNT];
pthread_t thread_id[COUNT];
printf("Creating threads...\n");
for (i = 0; i < COUNT; ++i)
{
ds[i].table = table;
ds[i].table_size = table_size;
ds[i].to_do_count = 5; /*1 + rand() % 10;*/
pthread_create(&thread_id[i], NULL, palindrome, &ds[i]);
}
printf("Done creating threads.\n");
printf("Waiting on threads...\n");
for (i = 0; i < COUNT; ++i)
{
printf("Joining thread %i\n", i);
pthread_join(thread_id[i], NULL);
}
printf("Done waiting on threads.\n");
return 0;
}
Output: (Like before, each run will produce a different ordering.)
Creating threads... [p:20030][t:791]: Go hang a salami. I'm a lasagna hog! [p:20030][t:791]: Was it a rat I saw? [p:20030][t:791]: No lemons, no melon. [p:20030][t:791]: Step on no pets. [p:20030][t:766]: Go hang a salami. I'm a lasagna hog! [p:20030][t:783]: Yawn! Madonna fan? No damn way! [p:20030][t:783]: Able was I ere I saw Elba. [p:20030][t:783]: Step on no pets. [p:20030][t:783]: Step on no pets. [p:20030][t:783]: Was it a rat I saw? [p:20030][t:741]: Was it a rat I saw? [p:20030][t:758]: No lemons, no melon. [p:20030][t:758]: Yawn! Madonna fan? No damn way! [p:20030][t:758]: Step on no pets. [p:20030][t:758]: Dog as a devil deified lived as a god. [p:20030][t:758]: A man, a plan, a canal: Panama! Done creating threads. [p:20030][t:733]: A man, a plan, a canal: Panama! [p:20030][t:733]: Go hang a salami. I'm a lasagna hog! [p:20030][t:766]: Dog as a devil deified lived as a god. [p:20030][t:766]: Yawn! Madonna fan? No damn way! [p:20030][t:766]: Step on no pets. [p:20030][t:766]: Yawn! Madonna fan? No damn way! [p:20030][t:733]: Able was I ere I saw Elba. [p:20030][t:733]: Was it a rat I saw? [p:20030][t:733]: Was it a rat I saw? [p:20030][t:741]: Was it a rat I saw? [p:20030][t:741]: Go hang a salami. I'm a lasagna hog! [p:20030][t:791]: Dog as a devil deified lived as a god. [p:20030][t:749]: Was it a rat I saw? [p:20030][t:749]: No lemons, no melon. [p:20030][t:749]: Step on no pets. [p:20030][t:749]: Yawn! Madonna fan? No damn way! [p:20030][t:749]: Step on no pets. Waiting on threads... Joining thread 0 [p:20030][t:775]: No lemons, no melon. [p:20030][t:775]: Able was I ere I saw Elba. [p:20030][t:775]: No lemons, no melon. [p:20030][t:775]: A man, a plan, a canal: Panama! [p:20030][t:775]: Was it a rat I saw? Joining thread 1 Joining thread 2 Joining thread 3 [p:20030][t:741]: Go hang a salami. I'm a lasagna hog! [p:20030][t:741]: Step on no pets. Joining thread 4 Joining thread 5 Joining thread 6 Joining thread 7 Done waiting on threads.Notes:
A More Real-World™ Example
The previous examples were a good introduction to how threads work, but they were contrived. There was little to be gained by having a seperate thread print out those strings at the same time to the screen. In fact, it may have been worse using threads because the output appeared chaotic. Let's do something that actually makes sense.
In this example, we have 3 different ways to calculate the value of pi: (pi.c)Remember, the goal of multi-threading is to keep the CPU busy. We never want it to be idle if there is work to be done.
Method Time Circle method 2.2 seconds Leibniz method 2.4 seconds atan method 1.5 seconds
To give you an idea of how these function work, this is what the atan method looks like:
The details are not important. What is important is that this function is going to take a while to perform 200,000,000 iterations. The other two functions are similar in that they are going to loop 200,000,000 times.double atan_pi(int rectangles) { double width = 1.0 / rectangles; double length; double area = 0.0; int i; double midpoint = width / 2; for (i = 0; i < rectangles; i++) { double area_of_rectangle; length = 4.0 / (1 + midpoint * midpoint); area_of_rectangle = length * width; area += area_of_rectangle; midpoint += width; } return area; }
Before we learned about processes, threads, and multi-core CPUs, we would have programmed it like this: (pi-nothread.c)
/* To compile under Linux, use -lm linker option */
#include <stdio.h> /* printf */
#include <stdlib.h> /* atoi */
double circle_pi(int rectangles); /* Calculates PI using a quarter circle */
double leibniz_pi(int iterations); /* Calculates PI using a series */
double atan_pi(int rectangles); /* Calculates PI using a curve */
int main(int argc, char **argv)
{
int iterations = 1000 * 1000 * 100;
double pi_circle, pi_leibniz, pi_atan;
if (argc > 1)
iterations = 1000 * 1000 * atoi(argv[1]);
pi_circle = circle_pi(iterations);
pi_leibniz = leibniz_pi(iterations);
pi_atan = atan_pi(iterations);
printf("Iterations: %10i\n", iterations);
printf(" circle:%20.16f\n", pi_circle);
printf("leibniz:%20.16f\n", pi_leibniz);
printf(" atan:%20.16f\n", pi_atan);
return 0;
}
Then, run from the command line and timing it:
the output:time ./pi-nothread 200
Reminder of the times for each function:Iterations: 200000000 circle: 3.1415926568498080 leibniz: 3.1415926485894077 atan: 3.1415926536631549 real 0m6.168s user 0m6.150s sys 0m0.010s
As expected, since we ran each function serially (one after the other), the total time is the sum of the times of each function. (The time to run the program without calling any pi function was 0.001 s.)
Method Time Circle method 2.2 seconds Leibniz method 2.4 seconds atan method 1.5 seconds
On a single-core CPU, that's the best we can do. Even putting each function in its own process or own thread would likely make the timings worse. Why?
BUT, on a multi-core system, we can certainly improve things. Let's run each function in its own process and hope that the operating system will give each process its own core to run on simultaneously. Now we program it like this: (pi-mpsh.c)
/* To compile under Linux, use -lm linker option */
#include <stdio.h> /* printf */
#include <stdlib.h> /* exit, atoi */
#include <string.h> /* strcpy */
#include <unistd.h> /* sleep, fork */
#include <sys/shm.h> /* shmget, shmat, shmdt, shmctl */
#include <sys/wait.h> /* waitpid */
double circle_pi(int rectangles); /* Calculates PI using a quarter circle */
double leibniz_pi(int iterations); /* Calculates PI using a series */
double atan_pi(int rectangles); /* Calculates PI using a curve */
int main(int argc, char **argv)
{
int iterations = 1000 * 1000 * 100;
int child1, child2, child3;
int shmid;
double *buffer; /* shared buffer */
key_t key = 123; /* arbitrary key */
if (argc > 1)
iterations = 1000 * 1000 * atoi(argv[1]);
/* Buffer will hold 3 doubles; one from each child process */
shmid = shmget(key, 3 * sizeof(double), 0600 | IPC_CREAT);
buffer = (double *) shmat(shmid, NULL, 0);
/* Initialize the buffer */
*buffer = 0.0;
*(buffer + 1) = 0.0;
*(buffer + 2) = 0.0;
if ( (child1 = fork()) == 0 ) /* first child */
{
*buffer = circle_pi(iterations);
shmdt(buffer); /* detach memory from child process */
exit(0);
}
/* parent */
if ( (child2 = fork()) == 0 ) /* second child */
{
*(buffer + 1) = leibniz_pi(iterations);
shmdt(buffer); /* detach memory from second process */
exit(0);
}
/* parent */
if ( (child3 = fork()) == 0 ) /* third child */
{
*(buffer + 2) = atan_pi(iterations);
shmdt(buffer); /* detach memory from third child process */
exit(0);
}
/* parent */
waitpid(child1, NULL, 0);
waitpid(child2, NULL, 0);
waitpid(child3, NULL, 0);
printf("Iterations: %10i\n", iterations);
printf(" circle:%20.16f\n", *buffer);
printf("leibniz:%20.16f\n", *(buffer + 1));
printf(" atan:%20.16f\n", *(buffer + 2));
shmdt(buffer); /* detach memory from parent process */
shmctl(shmid, IPC_RMID, 0); /* delete memory block */
return 0;
}
Then, run from the command line and timing it:
the output:time ./pi-mpsh 200
Reminder:Iterations: 200000000 circle: 3.1415926568498080 leibniz: 3.1415926485894077 atan: 3.1415926536631549 real 0m2.405s user 0m6.140s sys 0m0.000s
The single-process version took 6.15 seconds. This is a clearly significant improvement. Why do you think the time is 2.4 seconds? (It was run on a machine with 4 cores.)
Method Time Circle method 2.2 seconds Leibniz method 2.4 seconds atan method 1.5 seconds
Now, let's run each function in its own thread and see how that goes. We program it like this: (pi-mt.c)
/* To compile under Linux, use -lm linker option */
#include <stdio.h> /* printf */
#include <stdlib.h> /* exit, atoi */
#include <pthread.h> /* thread create/join/exit */
double circle_pi(int rectangles); /* Find PI using a quarter circle */
double leibniz_pi(int iterations); /* Find PI using a series */
double atan_pi(int rectangles); /* Find PI using a curve */
double pi_circle;
double pi_leibniz;
double pi_atan;
void *th1(void *p)
{
int it = *(int *)p;
pi_circle = circle_pi(it);
return NULL;
}
void *th2(void *p)
{
int it = *(int *)p;
pi_leibniz = leibniz_pi(it);
return NULL;
}
void *th3(void *p)
{
int it = *(int *)p;
pi_atan = atan_pi(it);
return NULL;
}
int main(int argc, char **argv)
{
int iterations = 1000 * 1000 * 100;
pthread_t thread1, thread2, thread3;
if (argc > 1)
iterations = 1000 * 1000 * atoi(argv[1]);
/* Create threads with data passing */
pthread_create(&thread1, NULL, th1, &iterations);
pthread_create(&thread2, NULL, th2, &iterations);
pthread_create(&thread3, NULL, th3, &iterations);
/* Wait on the threads */
pthread_join(thread1, NULL);
pthread_join(thread2, NULL);
pthread_join(thread3, NULL);
printf("Iterations: %10i\n", iterations);
printf(" circle:%20.16f\n", pi_circle);
printf("leibniz:%20.16f\n", pi_leibniz);
printf(" atan:%20.16f\n", pi_atan);
return 0;
}
Then, run from the command line and timing it:
the output:time ./pi-mt 200
Another version using arrays for scaling. This example also shows how to return data from one thread to another thread via the thread-function argument.Iterations: 200000000 circle: 3.1415926568498080 leibniz: 3.1415926485894077 atan: 3.1415926536631549 real 0m2.398s user 0m6.130s sys 0m0.000s
Some interesting benchmarks: Various computer specs for testing in this class.
Below are timings for calculating pi using the methods and systems described above. There are two primary factors that determine the overall performance of the calculations. One is the version of the GNU compiler used, and the other is the architecture of the processor, which includes the speed of the CPU itself, as well as the size, speed, and architecture of the caches. We will learn more about the details of cache memory later. Don't forget Mead's informal rule of thumb: Cache is King™
Finally, realize that, although this is not quite a synthetic benchmark, (WhetstoneWhetstone for floating point and DrhystoneDrhystone for integers ) it really doesn't represent all of the tasks and programs a system is going to run during an average day, so you can't just look at this one benchmark and conclude which system is best.
Some real stress testing: (Do try this at home! pi-mtv.c)
Computer
(200,000,000 iterations)Individual
timings (secs)
pi-nothread.cOne process
One thread
pi-nothread.cMultiple processes
One thread each
pi-mpsh.cOne process
Multiple Threads
pi-mt.cMaya (3.4 GHz) 4 cores
Linux 64-bit (3.2.0-23)
gcc 4.6.3 circle: 0.8 Leibniz: 1.7 atan: 0.8 3.3 real 0m3.299s user 0m3.288s sys 0m0.000s real 0m1.717s user 0m3.420s sys 0m0.008s real 0m1.713s user 0m3.680s sys 0m0.000sClara (3.3 GHz) 4 cores
Linux 64-bit (3.2.0-23)
gcc 4.6.3 circle: 1.0 Leibniz: 1.7 atan: 1.1 3.8 real 0m3.730s user 0m3.724s sys 0m0.000s real 0m1.673s user 0m3.724s sys 0m0.000s real 0m1.699s user 0m3.764s sys 0m0.000sServer (3.1 GHz) 4 cores
Linux 64-bit
gcc 4.6.3 circle: 1.7 Leibniz: 1.0 atan: 1.7 4.4 real 0m4.324s user 0m4.304s sys 0m0.000s real 0m1.743s user 0m4.292s sys 0m0.000s real 0m1.704s user 0m4.252s sys 0m0.000sAthena (2.83 GHz) 4 cores
Linux 32-bit (2.6.18-2)
gcc 4.1.2 circle: 2.1 Leibniz: 1.6 atan: 1.6 5.3 real 0m5.200s user 0m5.196s sys 0m0.004s real 0m2.105s user 0m5.252s sys 0m0.000s real 0m2.100s user 0m5.256s sys 0m0.000sLulu (2.7 GHz) 6 cores
Linux 64-bit
gcc 4.6.3 circle: 2.0 Leibniz: 2.1 atan: 1.9 6.0 real 0m6.087s user 0m6.084s sys 0m0.000s real 0m2.224s user 0m6.288s sys 0m0.000s real 0m2.209s user 0m6.244s sys 0m0.000sOlga (2.8 GHz) 4 cores
Linux 64-bit (2.6.32-24)
gcc 4.4.3 circle: 2.2 Leibniz: 2.4 atan: 1.5 6.1 real 0m6.135s user 0m6.130s sys 0m0.000s real 0m2.405s user 0m6.150s sys 0m0.000s real 0m2.408s user 0m6.120s sys 0m0.000sLisa (2.67 GHz) 2 core
Linux 64-bit (2.6.38)
gcc 4.5.2 circle: 2.2 Leibniz: 2.4 atan: 1.5 6.1 real 0m6.131s user 0m6.120s sys 0m0.010s real 0m3.007s user 0m8.360s sys 0m0.000s real 0m2.984s user 0m8.400s sys 0m0.000sGina (3.0 GHz) 2 cores
Linux 64-bit
gcc 4.8.2 circle: 4.8 Leibniz: 2.2 atan: 1.0 8.0 real 0m7.879s user 0m7.813s sys 0m0.008s real 0m3.011s user 0m4.026s sys 0m0.000s real 0m4.105s user 0m5.550s sys 0m0.021sDigipen (2.4 GHz) 2 cores
Windows 32-bit (XP)
gcc 4.3.2 circle: 6.3 Leibniz: 3.2 atan: 3.2 12.7 real 0m12.516s user 0m12.499s sys 0m0.046s real 0m7.406s user 0m12.341s sys 0m0.015s real 0m5.281s user 0m10.374s sys 0m0.031sJessica (2.8 GHz) 1 core
Windows 32-bit (Server 2003)
gcc 4.3.2 circle: 6.2 Leibniz: 3.4 atan: 3.7 13.3 real 0m6.141s user 0m5.953s sys 0m0.015s real 0m13.547s user 0m23.046s sys 0m0.015s real 0m14.718s user 0m26.093s sys 0m0.015sSabrina (2.4 GHz) 1 core
Linux 32-bit (2.4.20)
gcc 3.3 circle:10.7 Leibniz: 3.9 atan: 4.0 18.6 real 0m18.645s user 0m18.200s sys 0m0.000s real 0m18.610s user 0m18.180s sys 0m0.000s real 0m19.046s user 0m18.590s sys 0m0.000sMaria (1.66 GHz) 2 cores
Linux 32-bit (2.6.31-16)
gcc 4.4.1 circle:17.7 Leibniz:12.9 atan:16.3 46.9 real 0m46.806s user 0m46.759s sys 0m0.048s real 0m27.072s user 0m52.855s sys 0m0.052s real 0m27.306s user 0m52.091s sys 0m0.024s
This program runs many threads (up to 1024, but you can set it higher) and executes the function atan_pi for calculating pi. Under Windows, you can watch the program with Task Manager or Process Explorer and see all of the threads. The command line takes two parameters: The number of threads (default 1) and the number of iterations (times one million) for the function (default 100).
/* To compile under Linux, use -lm linker option */
#include <stdio.h> /* printf */
#include <stdlib.h> /* atoi */
#include <pthread.h> /* thread create/join/exit */
#define MAX_THREADS 1024
double atan_pi(int rectangles); /* Find PI using a curve */
void *th1(void *p)
{
int it = *(int *)p;
atan_pi(it);
return NULL;
}
int main(int argc, char **argv)
{
int iterations = 1000 * 1000 * 100;
int thread_count = 1;
int i;
pthread_t threads[MAX_THREADS];
if (argc > 1)
{
thread_count = atoi(argv[1]);
thread_count = thread_count > MAX_THREADS ? MAX_THREADS : thread_count;
}
if (argc > 2)
iterations = 1000 * 1000 * atoi(argv[2]);
printf(" Threads: %i\n", thread_count);
printf("Iterations: %i\n", iterations);
for (i = 0; i < thread_count; i++)
pthread_create(&threads[i], NULL, th1, &iterations);
for (i = 0; i < thread_count; i++)
pthread_join(threads[i], NULL);
return 0;
}
Win32 Threads
Windows has its own API for threads:
This program simply creates a thread which prints the word Hello! to the screen and then exits.
#include <stdio.h> /* printf */
#include <windows.h> /* Windows stuff */
DWORD WINAPI hello(LPVOID)
{
printf("Hello!\n");
return 0;
}
int main(void)
{
HANDLE thread_handle;
thread_handle = CreateThread(0, /* Default attributes */
0, /* Default stack size */
hello, /* Start function */
0, /* No parameters */
0, /* Creation flags */
0 /* Returned thread id */
);
/* Wait for the thread (no timeout) */
WaitForSingleObject(thread_handle, INFINITE);
/* Release the thread */
CloseHandle(thread_handle);
return 0;
}
This program uses a global array of strings (character pointers). The main function creates several threads. Each thread runs the same function palindrome, which simply chooses a random number of palindromes (from the global array) to print to the screen. There is only one process, but many threads.
#include <stdio.h> /* printf */
#include <stdlib.h> /* rand, srand */
#include <windows.h> /* Windows stuff */
char const *table[]
= { "A man, a plan, a canal: Panama!",
"No lemons, no melon.",
"Step on no pets.",
"Was it a rat I saw?",
"Dog as a devil deified lived as a god.",
"Able was I ere I saw Elba.",
"Yawn! Madonna fan? No damn way!",
"Go hang a salami. I'm a lasagna hog!"
};
const int table_size = sizeof(table) / sizeof(*table);
DWORD WINAPI palindrome(LPVOID)
{
DWORD tid = GetCurrentThreadId(); /* each thread has unique tid */
DWORD pid = GetCurrentProcessId(); /* all threads have the same pid */
int max = 5;
int i;
for (i = 0; i < max; ++i)
{
int index = rand() % table_size;
printf("[p:%i][t:%u]: %s\n", pid, tid, table[index]);
}
return 0;
}
int main(void)
{
#define COUNT 8
int i;
HANDLE th_handles[COUNT];
srand(0);
printf("Creating threads...\n");
for (i = 0; i < COUNT; ++i)
th_handles[i] = CreateThread(0, /* Default attributes */
0, /* Default stack size */
palindrome, /* Start function */
0, /* No parameters */
0, /* Default flags */
0 /* Returned thread id */
);
printf("Done creating threads.\n");
printf("Waiting on threads...\n");
WaitForMultipleObjects(COUNT, th_handles, TRUE, INFINITE);
printf("Done waiting on threads.\n");
for (i = 0; i < COUNT; ++i)
CloseHandle(th_handles[i]);
return 0;
}
This program is similar to the one above but instead of using global data, the data is passed to each thread. The data is a user-defined struct that contains a pointer to the palindrome strings, the size of the table (array), and a count of how many strings to print. Although each thread receives its own copy of the pointer to the strings, they are all reading from the same array. This is completely safe. As before, there is only one process, but many threads.
#include <stdio.h> /* printf */
#include <stdlib.h> /* rand, srand */
#include <windows.h> /* Windows stuff */
typedef struct
{
const char **table;
int table_size;
int to_do_count;
}data_struct;
DWORD WINAPI palindrome(LPVOID data)
{
data_struct *ds = (data_struct *)data;
DWORD tid = GetCurrentThreadId();
DWORD pid = GetCurrentProcessId();; /* all threads have the same pid */
int i;
for (i = 0; i < ds->to_do_count; ++i)
{
int index = rand() % ds->table_size;
printf("[p:%i][t:%u]: %s\n", pid, tid, ds->table[index]);
}
return 0;
}
int main(void)
{
#define COUNT 8
int i;
char const *table[]
= { "A man, a plan, a canal: Panama!",
"No lemons, no melon.",
"Step on no pets.",
"Was it a rat I saw?",
"Dog as a devil deified lived as a god.",
"Able was I ere I saw Elba.",
"Yawn! Madonna fan? No damn way!",
"Go hang a salami. I'm a lasagna hog!"
};
const int table_size = sizeof(table) / sizeof(*table);
srand(0);
data_struct ds[COUNT];
HANDLE th_handles[COUNT];
printf("Creating threads...\n");
for (i = 0; i < COUNT; ++i)
{
ds[i].table = table;
ds[i].table_size = table_size;
ds[i].to_do_count = 5; /*1 + rand() % 10;*/
th_handles[i] = CreateThread(0, 0, palindrome, &ds[i], 0, 0);
}
printf("Done creating threads.\n");
printf("Waiting on threads...\n");
WaitForMultipleObjects(COUNT, th_handles, TRUE, INFINITE);
printf("Done waiting on threads.\n");
for (i = 0; i < COUNT; ++i)
CloseHandle(th_handles[i]);
return 0;
}
The threaded stress test using the Win32 API. Use Windows Task Manager or Process Explorer to see the processes and threads. (wpi-mtv.c)
#include <stdio.h> /* printf */ #include <stdlib.h> /* exit, atoi */ #include <windows.h> /* Windows stuff */ #define MAX_THREADS 1024 double atan_pi(int rectangles); /* Find PI using a curve */ DWORD WINAPI th1(LPVOID p) { int it = *(int *)p; atan_pi(it); return 0; } int main(int argc, char **argv) { int iterations = 1000 * 1000 * 100; int thread_count = 1; int i; HANDLE threads[MAX_THREADS]; if (argc > 1) { thread_count = atoi(argv[1]); thread_count = thread_count > MAX_THREADS ? MAX_THREADS : thread_count; } if (argc > 2) { iterations = 1000 * 1000 * atoi(argv[2]); } printf(" Threads: %i\n", thread_count); printf("Iterations: %i\n", iterations); for (i = 0; i < thread_count; i++) threads[i] = CreateThread(0, 0, th1, &iterations, 0, 0); WaitForMultipleObjects(thread_count, threads, TRUE, INFINITE); for (i = 0; i < thread_count; i++) CloseHandle(threads[i]); return 0; }
Multi-threading Problems
So far, everything seems to be working out just fine. However, consider the following two programs:Single-threaded version: (thread-safe.c)
#include <stdio.h> /* printf */
int Count = 0;
void Increment(void)
{
int i;
int loops = 1000;
for (i = 0; i < loops; i++)
Count++;
}
int main(void)
{
#define N 10
int i;
for (i = 0; i < N; i++)
Increment();
printf("Count = %i\n", Count);
return 0;
}
Output:
Multi-threaded version: (thread-unsafe.c)Count = 10000
#include <stdio.h> /* printf */
#include <pthread.h> /* thread create/join/exit */
int Count;
void *Increment(void *p)
{
int i;
int loops = 1000;
for (i = 0; i < loops; i++)
Count++;
return NULL;
}
int main(void)
{
#define N 10
int i;
pthread_t thread_id[N];
for (i = 0; i < N; i++)
pthread_create(thread_id + i, NULL, Increment, NULL);
for (i = 0; i < N; i++)
pthread_join(thread_id[i], NULL);
printf("Count = %i\n", Count);
return 0;
}
Output:
Count = 10000
to this:int loops = 1000;
Results:int loops = 1000 * 100;
Single-threaded Multi-threaded Run #1 Count = 1000000 Count = 591274 Run #2 Count = 1000000 Count = 636767 Run #3 Count = 1000000 Count = 591146 Run #4 Count = 1000000 Count = 611036 Run #5 Count = 1000000 Count = 625463
This is the fundamental problem with multi-threaded programs accessing a shared resource. Later, we'll see various ways of dealing with this issue and others when we talk about synchronization.
And the Windows API has the exact same problem:
#include <stdio.h> /* printf */
#include <windows.h> /* Windows stuff */
int Count;
DWORD WINAPI Increment(LPVOID p)
{
int i;
int loops = 1000 * 100;
for (i = 0; i < loops; i++)
Count++;
return 0;
}
int main(void)
{
#define N 10
int i;
HANDLE thread[N];
for (i = 0; i < N; i++)
thread[i] = CreateThread(0, 0, Increment, 0, 0, 0);
WaitForMultipleObjects(N, thread, TRUE, INFINITE);
for (i = 0; i < N; i++)
CloseHandle(thread[i]);
printf("Count = %i\n", Count);
return 0;
}
Output:
Other points to make:Count = 629261 Count = 652001 Count = 628380 Count = 743270 Count = 653191