Erlang (programming language)/Tutorials/Linda Sieve

From Citizendium
Jump to navigation Jump to search

Prime sieve with Linda coordination

Linda

Linda uses a tuple space to store data. Tuple space is a way to mix message passing with shared memory to coordinate computation between parallel processes.

Prime sieve

How many processes can this program use? This program creates as many sieves as the square root of the numbers in the matrix. If we are looking for the primes below 100 then there are ~10 parallel sieve processes. Actually, most of the sieve processes are halted and only (the number of prime numbers under the square root of Max) processes are left at the end. This allows an easy parallelism of 10 for 100 and 100 for 10000 with little modification.

An aside about the soft realtime nature of erlang: 
One definition of real time programming is: has totally deterministic operation.

The P2 process should have the most work to do. It starts first and should end last if each sieve is given an equal time-slice. This primes program will generate the correct list of prime numbers about 9 out of 10 times. The P2 sieve is hard coded to cause the list of primes to dump when P2 finishes. This dump works almost all the time. The tenth time out of ten the P3 process will run after the P2 process stops, causing the list of primes to include some multiples of 3. This is the half-promise of a soft real time: being deterministic, almost all the time.

The late-P3 error is easy to fix, we simply add a function to check if all the workers(sieves) are done in the base case of P2 and P3. If all the workers are done, then send linda the {dump,output} message. For educational purposes, this safety check function has been left as an exercise for the reader. To be extra safe, we might as well add the safety check to the base case of all the PN(sieve) processes.

All the processes of the form, PN, are in a peer management relationship; they can all stop one another. This is not usually recommended. Processes should be spawned in a tree relationship with strict sets of bosses and workers to avoid timing problems like the ones we see with the peer sieves. Other timing problems were avoided by giving each sieve process a delay at the beginning.

Prime sieve program (parallel)

   -module(primes).
   % This is a simple linda tuplespace. Here we use it to find primes numbers.
   % This tuple-space can not have duplicate tuples, but with a prime sieve it does
   %  not matter.
   -compile(export_all).
   start() -> start(100).  % default value for max is 100
   start(Max) -> 
       io:format("  Loading ~w numbers into matrix (+N) \n ", [ Max ] ),
       Lid = spawn_link( primes, linda, [Max, [], [] ]),
       Sqrt = round(math:sqrt(Max)+0.5),  
       io:format(" Sqrt(~w) + 1 = ~w \n " , [Max,Sqrt] ),  
       io:format(" Tuple space is started ~n ",[]),  
       io:format(" ~w sieves are spawning (+PN) ~n ", [Sqrt] ),
       io:format(" Non prime sieves are being halted (-PN) ~n ", [] ),
       % load numbers into tuplespace 
       % and spawn sieve process
       spawn( primes, put_it, [Max, Max, Lid] ).
   start_sieves(Lid) ->
       Lid ! {self(), get, all, pids},  
       receive 
           {lindagram, pids, Pids} -> done
       end,
       start_sieve_loop(Pids).
   start_sieve_loop([]) -> done;
   start_sieve_loop([Pid|Pids]) ->
       receive
       after 100 -> done
       end,
       Pid ! {start},
       start_sieve_loop(Pids).
   spawn_sieves( _Max, Sqrt, _Lid, Sqrt ) -> done;  
   spawn_sieves( Max, Inc, Lid, Sqrt ) ->
       T = 1000,
       Pid = spawn( primes, sieve, [ Max, Inc+Inc, Inc, Lid, T ]),
       Name = list_to_atom("P" ++ integer_to_list(Inc)),
       Lid ! {put, pid, Name},
       register( Name, Pid),
       io:format(" +~s ", [atom_to_list(Name)]),
       spawn_sieves( Max, Inc+1, Lid, Sqrt ).
   put_it(Max, N, Lid) when N =< 1 ->
       Sqrt = round(math:sqrt(Max)+0.5),
       spawn_sieves( Max, 2, Lid, Sqrt );  
   put_it(Max, N,Lid) when N > 1 ->
       receive
       after 0 ->
           Lid ! {put, N, N},
           if 
               N rem 1000 == 0 ->
                   io:format(" +~w ", [N]);
               true -> done
           end,
           put_it(Max, N-1,Lid)
       end.
   % the 2 sieve starts last and has the most to do so it finishes last
   sieve(Max, N, 2, Lid, _T) when N > Max -> 
       io:format("final sieve ~w done, ~n", [2] ),
       Lid ! {dump,output};
   sieve(Max, N, Inc, _Lid, _T) when N > Max ->    
       io:format("sieve ~w done ", [Inc] );
   sieve(Max, N, Inc, Lid, T) when N =< Max ->   
       receive 
       after 
           T -> NT = 0   
       end,
       receive 
           {lindagram,Number} when Number =/= undefined -> 
               clearing_the_queue;
           {exit} -> exit(normal)
       after
           1 -> done 
       end,
       % remove multiple of number from tuple-space
       Lid ! {self(), get, N},
       Some_time = (N rem 1000)==0,
       if Some_time -> io:format("."); true -> done end,
       % remove (multiple of Inc) from sieve-process space
       Name = list_to_atom("P" ++ integer_to_list(N)),
       Exists = lists:member( Name, registered()),
       if 
           Exists ->
               Name ! {exit},
               io:format(" -~s ", [atom_to_list(Name)] );
           true -> done
       end,
       sieve(Max, N+Inc, Inc, Lid, NT).        % next multiple
       
   %% linda is a simple tutple space 
   %%    if it receives no messages for 2 whole seconds it dumps its contents 
   %%    as output and halts
   linda(Max, Keys, Pids) ->
       receive
       {put, pid, Pid} ->
           linda(Max, Keys, Pids++[Pid]);
       {put, Name, Value} ->
           put( Name, Value),
           linda(Max, Keys++[Name], Pids);
       {From, get, Name} ->
           From ! {lindagram, get( Name)},
           erase( Name ),                          % get is a destructive read  
           linda(Max, Keys--[Name],Pids);
       {From, get, all, pids} ->
           From ! {lindagram, pids, Pids},
           linda(Max, Keys, Pids );
       {From, get, pid, Pid} ->
           L1 = length( Pids ),
           L2 = length( Pids -- [Pid]),
           if 
               L1 > L2 ->  % if it exists
                   From ! {lindagram, pid, Pid};
               true -> 
                   From ! {lindagram, pid, undefined}
           end,
           linda(Max, Keys, Pids );
       {dump,output} ->
           io:format(" ~w final primes remain: ~w ~n ", [length(Keys),  lists:sort(Keys) ])
       after (100*Max) -> % if there is not tuple action after some time then print the results
           io:format(" ~w primes remain: ~w ~n ", [length(Keys),  lists:sort(Keys) ])
       end.

Sample output for prime sieve

c(primes).
primes:start(1000).
 Loading 1000 numbers into matrix (+N)
 Sqrt(1000) + 1 = 32
 Tuple space is started
 32 sieves are spawning (+PN)
 Non prime sieves are being halted (-PN)
 +1000 <0.46.0>
+P2  +P3  +P4  +P5  +P6  +P7  +P8  +P9  +P10  
+P11  +P12  +P13  +P14  +P15  +P16   
+P17  +P18  +P19  +P20  +P21  +P22  +P23  +P24  
+P25  +P26  +P27  +P28  +P29  +P30  
+P31  -P8  -P6  -P4  -P9  -P12  -P10  -P15  
-P15  -P18  -P14  -P21  -P21  -P22  
-P26  -P20  -P24  -P25  -P27  -P28  -P30  -P30  -P16 
sieve 31 done sieve 29 done 
sieve 19 done sieve 23 done sieve 11 done 
sieve 13 done sieve 17 done sieve 7 done 
.sieve 5 done sieve 3 done .final sieve 2 done,
168 final primes remain: 
[2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,
71,73,79,83,89,97,
101,103,107,109,113,127,131,137,139,149,151,157,163,
167,173,179,181,191,193,197,199,
211,223,227,229,233,239,241,251,257,263,269,271,277,281,283,293,
307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,
401,409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,
499,503,509,521,523,541,547,557,563,569,571,577,587,593,599,
601,607,613,617,619,631,641,643,647,653,659,661,673,677,683,691,
701,709,719,727,733,739,743,751,757,761,769,773,787,797, 
809,811,821,823,827,829,839,853,857,859,863,877,881,883,887,
907,911,919,929,937,941,947,953,967,971,977,983,991,997]