''' ========================================================================== Python for Parallelism in Introductory Computer Science Education SC '13 HPC Educators Program Steven Bogaerts, Wittenberg University Joshua Stough, Washington and Lee http://www.joshuastough.com/SC13 MIT License: see README_LICENSE.txt file: speedup.py author: bogaerts Summary:These examples demonstrate how to conduct basic timing in Python. This is then applied to examine the speedup of the use of multiple processors on a problem. ========================================================================== ''' from multiprocessing import * ''' -------------------------------------- Timing the Summation of Random Numbers -------------------------------------- This introduces students to the notion of timing the execution of a part of a program, and the generation of random numbers in Python. ''' import random import time def timeSum(): numNumbers = 10000 startTime = time.time() s = 0 for i in range(numNumbers): s = s + random.randint(1, 100) # randint(x, y) gives a random integer between x and y, inclusive stopTime = time.time() elapsedTime = stopTime - startTime print "The summation took", elapsedTime, "seconds." # This version just shows a small shortcut in the timing code. def timeSum2(): numNumbers = 10000 startTime = time.time() s = 0 for i in range(numNumbers): s = s + random.randint(1, 100) elapsedTime = time.time() - startTime print "The summation took", elapsedTime, "seconds." ''' ----------------------------------------------- Using Join to Know When a Process is Terminated ----------------------------------------------- This demonstrates how to use the join() method to wait for a process to terminate. ''' def addEmUp(maxNum): s = 0 for i in range(maxNum): s = s + i def joinEx(): # Compare execution times of the two processes below # p1 = Process(target=addEmUp, args=(50,)) p1 = Process(target=addEmUp, args=(5000000,)) p1.start() print "Still working..." p1.join() # The main process waits here until p1 "joins" the main process. # That is, the main process waits until p1 is done. print "Done!" # This version times the process. def joinEx2(): startTime = time.time() # Compare execution times of the two processes below p1 = Process(target=addEmUp, args=(50,)) # p1 = Process(target=addEmUp, args=(5000000,)) p1.start() print "Still working..." p1.join() print "Done!" elapsedTime = time.time() - startTime print "The process took", elapsedTime, "seconds." ''' --------------------------------------------- Comparing Parallel and Sequential Performance --------------------------------------------- This example demonstrates and allows students to experiment with the speedup that having multiple processors allows. This example is set up to make use of two processors. Interprocess communication is avoided in this example. The spawned processes do not merge their results into a final computed sum, and therefore one less addition is carried out. Students can be reminded that the same numbers are not being summed, but this will not make a difference in the total times. To sum the exact same numbers would require accumulation of numbers in a list, which is not assumed to have been covered at this point in the course. This problem can make a good base for an exercise in or out of the classroom. Students can be instructed to experiment with the code and perhaps write a report with graphs showing performance across multiple variables: - Adjusing totalNumNumbers - Modifying difficulty of the operation to be performed (e.g. summing square roots of numbers) In such an exercise, students should observe the following key points: - When operations are longer and/or more complex, working in parallel has the potential to bring more significant speedups. (This is definitely true in embarassingly parallel problems like this summation.) - Speedup of n processors over 1 on the same algorithm will never exceed n ''' from multiprocessing import * def addNumbers(numNumbers): s = 0 for i in range(numNumbers): s = s + random.randint(1, 100) print s def compareParSeq(): totalNumNumbers = 1000000 # START TIMING PARALLEL startTime = time.time() p1 = Process(target=addNumbers, args=(totalNumNumbers/2,)) p2 = Process(target=addNumbers, args=(totalNumNumbers/2,)) p1.start() p2.start() # Wait until processes are done p1.join() p2.join() parTime = time.time() - startTime # DONE TIMING PARALLEL print "It took", parTime, "seconds to compute in parallel." # START TIMING SEQUENTIAL startTime = time.time() s = 0 for i in range(totalNumNumbers): s = s + random.randint(1, 100) seqTime = time.time() - startTime # DONE TIMING SEQUENTIAL print "It took", seqTime, "seconds to compute sequentially." print "Speedup: ", seqTime / parTime ''' This version uses n processors, instead of a hardcoded 2. It requires the use of accumulation in lists. In experimenting with this code, students should see that simply adding more processes, when there are no dedicated processors for them, doesn't speed up the computation. In fact, the extra overhead may slow things down further. ''' def compareParSeqN(): totalNumNumbers = 1000000 numProcesses = 5 # START TIMING PARALLEL startTime = time.time() pList = [] for i in range(numProcesses): pList.append(Process(target = addNumbers, args = (totalNumNumbers/numProcesses,))) pList[-1].start() # Wait until processes are done for i in range(numProcesses): pList[i].join() parTime = time.time() - startTime # DONE TIMING PARALLEL print "It took", parTime, "seconds to compute in parallel." # START TIMING SEQUENTIAL startTime = time.time() s = 0 for i in range(totalNumNumbers): s = s + random.randint(1, 100) seqTime = time.time() - startTime # DONE TIMING SEQUENTIAL print "It took", seqTime, "seconds to compute sequentially." print "Speedup: ", seqTime / parTime ''' ------------------ Classroom Exercise ------------------ Add timers to the code below to determine whether the long list and short calculations or the short list with long calculations runs faster in parallel. Analyze your results and come up with a reason why they show the results that they show ''' # Give these to students def addNumbers(inQ, outQ): ls = inQ.get() total = 0 for i in ls: total += i outQ.put(total) def longOperation(inQ, outQ): ls = inQ.get() total = 0 for i in ls: total += i time.sleep(1) # Simulate long calculations outQ.put(total) def makeRandomLs(n): ls = [] for i in range(n): ls.append(random.randint(0, 100)) return ls # Give this to students def testSpeedup(): inQ = Queue() outQ = Queue() longLs = makeRandomLs(1000) shortLs = makeRandomLs(10) # Long List chunkSize = len(longLs) / 4 procs = [] for i in range(4): procs.append(Process(target=addNumbers, args=(inQ, outQ))) procs[i].start() for i in range(3): inQ.put(longLs[i*chunkSize:(i+1)*chunkSize]) inQ.put(longLs[chunkSize*3:]) for i in range(4): procs[i].join() total = 0 for i in range(4): total += outQ.get() print "(Long List Parallel) The total is:", total total = 0 for i in longLs: total += i print "(Long List Sequential) The total is:", total # Short List chunkSize = len(shortLs) / 4 procs = [] for i in range(4): procs.append(Process(target=longOperation, args=(inQ, outQ))) procs[i].start() for i in range(3): inQ.put(shortLs[i*chunkSize:(i+1)*chunkSize]) inQ.put(shortLs[chunkSize*3:]) for i in range(4): procs[i].join() total = 0 for i in range(4): total += outQ.get() print "\n(Short List Parallel) The total is:", total total = 0 for i in shortLs: total += i time.sleep(1) print "(Short List Sequential) The total is:", total # Finished Exercise def testSpeedupSolution(): inQ = Queue() outQ = Queue() longLs = makeRandomLs(100000) shortLs = makeRandomLs(10) # Long List chunkSize = len(longLs) / 4 procs = [] startTime = time.time() for i in range(4): procs.append(Process(target=addNumbers, args=(inQ, outQ))) procs[i].start() for i in range(3): inQ.put(longLs[i*chunkSize:(i+1)*chunkSize]) inQ.put(longLs[chunkSize*3:]) for i in range(4): procs[i].join() total = 0 for i in range(4): total += outQ.get() stopTime = time.time() print "(Long List Parallel) The total is:", total print "Time:", stopTime - startTime, "\n" startTime = time.time() total = 0 for i in longLs: total += i stopTime = time.time() print "(Long List Sequential) The total is:", total print "Time:", stopTime - startTime, "\n" # Short List chunkSize = len(shortLs) / 4 procs = [] startTime = time.time() for i in range(4): procs.append(Process(target=longOperation, args=(inQ, outQ))) procs[i].start() for i in range(3): inQ.put(shortLs[i*chunkSize:(i+1)*chunkSize]) inQ.put(shortLs[chunkSize*3:]) for i in range(4): procs[i].join() total = 0 for i in range(4): total += outQ.get() stopTime = time.time() print "\n(Short List Parallel) The total is:", total print "Time:", stopTime - startTime, "\n" startTime = time.time() total = 0 for i in shortLs: total += i time.sleep(1) stopTime = time.time() print "(Short List Sequential) The total is:", total print "Time:", stopTime - startTime # --------------------------------------------------------------------- #if __name__ == '__main__': # timeSum() # timeSum2() # joinEx() # joinEx2() # compareParSeq() # compareParSeqN()