''' ========================================================================== Python for Parallelism in Introductory Computer Science Education SC '13 HPC Educators Program Steven Bogaerts, Wittenberg University Joshua Stough, Washington and Lee http://www.joshuastough.com/SC13 MIT License: see README_LICENSE.txt file: parallelHello.py author: bogaerts Summary: pools of processes ========================================================================== ''' ''' ----------------------- Creating a Process Pool ----------------------- This example demonstrates how to create and use a pool of processes. When the apply is called, the pool calls the function using one of the processes. The advantages of process pools is that they have a specified number of processes, and manage process spawning for you. ''' from multiprocessing import * def square(number): return number * number def squareNumbers(numbers): print "Numbers:", numbers pool = Pool(processes=4) print "Squares:", for i in numbers: result = pool.apply(square, (i,)) # Evaluates square(i) in parallel print result, print ''' -------------------- Improving Efficiency -------------------- This example shows how to speed up the above program by using asynchronous operations. The apply() function blocks until it has completed its result, so even though the function is sent to another process, it still waits at the end. apply_async() does not block for the result to be calculated. The get blocks until the result is obtained. ''' def squareNumbers2(numbers): print"Numbers:", numbers pool = Pool(processes=4) print "Squares:", result = [] for i in numbers: result.append(pool.apply_async(square, (i,))) # Get is a blocking operation so it must be in the next for loop. # Otherwise the same problem would occur. for i in result: print i.get(), print ''' --------------------- Parallel Map Function --------------------- This is the same as the above example except that it uses the built in parallel map function in the Pool class. ''' def squareNumbers3(numbers): print"Numbers:", numbers pool = Pool(processes=4) print "Squares:", print pool.map(square, numbers) ''' ------------------ Classroom Exercise ------------------ Change the code below so that instead of using apply_async, map is used. Use the map function to complete the same thing that is completed by the function with the apply_async functions. ''' # Give students this def findMaxPart(ls): maxVal = 0 for i in ls: if i > maxVal: maxVal = i return maxVal # Give students this def findMax(ls): pool = Pool(processes=4) results = [] chunkSize = len(ls) / 4 for i in range(3): start = chunkSize * i stop = chunkSize * (i+1) results.append(pool.apply_async(findMaxPart, (ls[start:stop],))) results.append(pool.apply_async(findMaxPart, (ls[chunkSize*3:],))) maxValue = results[0].get() for i in range(1, 4): temp = results[i].get() if temp > maxValue: maxValue = temp return maxValue def findMaxMap(ls): pool = Pool(processes=4) sendLs = [] chunkSize = len(ls) / 4 for i in range(3): start = chunkSize * i stop = chunkSize * (i+1) sendLs.append(ls[start:stop]) sendLs.append(ls[chunkSize*3:]) results = pool.map(findMaxPart, sendLs) maxValue = results[0] for i in range(1, 4): temp = results[i] if temp > maxValue: maxValue = temp return maxValue ''' -------- Add List -------- This example compares adding a list in parallel by using Processes and Process Pools. ''' ################################################################ # Without Process Pool def addSection(ls, low, high, q): result = 0 for i in range(low, high): result += ls[i] q.put(result) def addList(ls, nProcs): p = [] q = Queue() size = len(ls) inc = size / nProcs result = 0 for i in range(nProcs - 1): p.append(Process(target=addSection, args=(ls, i*inc, (i+1)*inc, q))) # Add the last process seperate to account for leftover numbers. p.append(Process(target=addSection, args=(ls, (nProcs-1)*inc, size, q))) for i in p: i.start() for i in range(nProcs): result += q.get() print result ################################################################ # With Process Pool def addSection(ls, low, high): result = 0 for i in range(low, high): result += ls[i] return result def addListPool(ls, nProcs): pool = Pool(processes=nProcs) size = len(ls) inc = size / nProcs result = 0 resultArr = [] for i in range(nProcs - 1): resultArr.append(pool.apply_async(addSection, (ls, i*inc, (i+1)*inc))) resultArr.append(pool.apply_async(addSection, (ls, (nProcs-1)*inc, size))) for i in resultArr: result += i.get() print result #if __name__ == '__main__': # ls = [1, 2, 3, 4, 5] # squareNumbers(ls) # squareNumbers2(ls) # squareNumbers3(ls) # addList(ls, 4) # addListPool(ls, 4)