Ahmad Bilal احمد بلال

Parallel Virtual Machine Implementation of Averaging Algorithm

(Applied To An Image, Using Task Farming Scheme With Square Grains)

Implementation

The Mask Applied

A 3X3 mask is to be applied to each and every element of the image. For each element, the output element will be average of the intensities of this element and all neighboring elements (hence the term averaging algorithm).

Division of Grains

The grains used are square. When a grain is sent to a slave, its neighboring rows and columns are also sent. The reason for this being our need to apply mask to the elements on edges.

Distribution of Grains

The grains are distributed in two phases. Firstly, an initial static allocation to all processing elements is made, and then task farming dynamic allocation strategy is adopted.

Expected Result

The expected result of applying averaging algorithms on an image (which was later confirmed by execution of the program) is “smoothness”. The output image is smoother and slightly blurred when compared to the input image. So images that contain a bit of “noise” can be smoothed using this algorithm.

Performance

Although this implementation is relatively slower than its sequential counterpart for small images; it will surely be better if the input image is sufficiently large, and the grain size and number of processing elements is carefully selected.

Algorithms

Mater Program

Following is the sequence of operations performed by the master program:
• Initialize input matrix.
• Initialize input matrix by filling it with zeros
• Leaving the edges, fill the input matrix with the input image read from file.
• Spawn all the slaves.
• Broadcast grain size to all slaves.
• Statically allocate grains to all slaves initially.
• Send a grain to each slave.
• Save x and y coordinates of the top left corner of each grain in grain index table.
• Dynamically allocate rest of the grains.
• Whenever a slave sends results.
• Check the correct position of the matrix received inside output matrix from grain index table.
• Dump the result inside the output matrix.
• Send another grain to the slave, which sent this grain.
• Repeat until all grains are finished.
• Collect remaining results.
• Check the correct position of the matrix received inside output matrix from grain index table.
• Dump the result inside the output matrix.
• Write output matrix to output file.
• Kill all slaves.
• Exit master.

Slave Program

Following is the sequence of operations performed by the slave program:
• Get the grain size from master.
• Get grain from the master.
• Apply mask to the grain.
• Send results back to the master.

Appendix A: Master Program

#include "pvm3.h"

#include <stdio.h>

#include <stdlib.h>

#include <math.h>

#include <string.h>

#include <sys/types.h>

#include <sys/stat.h>

#include <fcntl.h>



#define SLAVENAME "pvm3/bin/s_img" /*Name of the slave*/

#define DIM_IMG 256 /*Dimension of image*/

#define NUM_SLAVE 16 /*Number of slaves*/

#define DIM_GRAIN 32 /*Grain Size*/

#define NUM_GRAIN (DIM_IMG/DIM_GRAIN)*(DIM_IMG/DIM_GRAIN)

#define INFILE "lena256b"

#define OUTFILE "lena.smooth"



struct dim

{

   
int x;

   
int y;

};



/*ID and dimensions grain of slaves*/

int id_slave[NUM_SLAVE];

struct dim g_slave[NUM_SLAVE];

/* Input and Output Images*/

unsigned char in_img[DIM_IMG+2][DIM_IMG+2],out_img[DIM_IMG][DIM_IMG];

unsigned char out_img_s[DIM_IMG][DIM_IMG];

unsigned char grain[DIM_GRAIN][DIM_GRAIN],ch;



/*Utility Variables*/

int i,j,k,p,q,r,s,u,v,ret,type,tmp,s_buf_id,r_buf_id,s_id,my_id,fd1,fd2;



/*Time Calculations*/

double t_S,t_P;

struct timeval t;



void main()

{

   
my_id=pvm_mytid();

   
/*Fill the input matrix with zeros*/

   
for(i=0;i<DIM_IMG+2;i++)

   
{

       
for(j=0;j<DIM_IMG+2;j++)

       
{

           
in_img[i][j]=(char)0;

       
}

   
}



   
fd1=open(INFILE,O_RDONLY);

   
for(i=1;i<DIM_IMG+1;i++)

   
{

       
for(j=1;j<DIM_IMG+1;j++)

       
{

           
read(fd1,&in_img[i][j],1);

       
}

   
}

   
close(fd1);



   
/*Spawn the Slaves*/

   
for(i=0;i<NUM_SLAVE;i++)

   
{

       
ret=pvm_spawn(SLAVENAME,NULL,PvmTaskDefault,"",1,&id_slave[i]);

       
if(ret==1) /*Success*/

       
{

           
printf("Slave %d with id %d spawned...\n",i,id_slave[i]);

       
}

       
else /*Terminate*/

       
{

           
printf("Spawning Error!!!\n");

           
for(j=0;j<i;j++)

           
{

               
pvm_kill(id_slave[j]);

           
}

           
pvm_exit();

           
exit(1);

       
}

   
}



   
gettimeofday(&t,(struct timezone*)0);

   
t_P=t.tv_sec+t.tv_usec*1.0e-6;



   
/*Begin Parallel Algorithm for Applying mask to the image*/



   
/*Broadcast grain size to all PVM slaves*/

   
type=0;

   
tmp=DIM_GRAIN;

   
pvm_initsend(PvmDataDefault);

   
pvm_pkint(&tmp,1,1);

   
pvm_mcast(id_slave,NUM_SLAVE,type);



   
/*Phase 1: Send grains to all slaves, and start task farming*/

   
k=0; /*Slave Number*/

   
type=1;

   
for(i=0;i<DIM_IMG;i+=DIM_GRAIN)

   
{

       
for(j=0;j<DIM_IMG;j+=DIM_GRAIN)

       
{

           
if(k<NUM_SLAVE) /*Initial static assignment of grains*/

           
{

               
/*Send initial grains to all slaves*/

               
pvm_initsend(PvmDataDefault);

               
for(p=i;p<i+DIM_GRAIN+2;p++)

               
{

                   
pvm_pkbyte((in_img[p]+j),DIM_GRAIN+2,1);

               
}

               
pvm_send(id_slave[k],type);

               
/* Starting points of the grain saved */

               
g_slave[k].x=i;

               
g_slave[k].y=j;

               
k++;

           
}

           
else /*Dynamic Assignment of slaves*/

           
{

               
/*Collect results */

               
pvm_mkbuf(PvmDataDefault);

               
r_buf_id=pvm_recv(-1,2);

               
pvm_bufinfo(r_buf_id,NULL,NULL,&s_id);

               
pvm_upkbyte((char *)grain,DIM_GRAIN*DIM_GRAIN,1);



               
/*Get index of slave*/

               
p=0;

               
while(id_slave[p]!=s_id) p++;



               
/*Save results in the output matrix*/

               
for(q=0;q<DIM_GRAIN;q++)

               
for(r=0;r<DIM_GRAIN;r++)

               
{

                   
out_img[g_slave[p].x+q][g_slave[p].y+r]=grain[q][r];

               
}



               
/*Update the g_slave table*/

               
g_slave[p].x=i;

               
g_slave[p].y=j;



               
/*Send new grain to the finished slave*/

               
pvm_initsend(PvmDataDefault);

               
for(r=i;r<i+DIM_GRAIN+2;r++)

               
{

                   
pvm_pkbyte(in_img[r]+j,DIM_GRAIN+2,1);

               
}

               
pvm_send(s_id,type);

           
}

       
}

   
}



   
/*Phase 2: Collect results */



   
for(i=0;i<NUM_SLAVE;i++)

   
{

       
pvm_mkbuf(PvmDataDefault);

       
r_buf_id=pvm_recv(-1,2);

       
pvm_bufinfo(r_buf_id,&tmp,NULL,&s_id);

       
pvm_upkbyte((char*)grain,DIM_GRAIN*DIM_GRAIN,1);



       
/*Get index of the g_slave table*/

       
p=0;

       
while(id_slave[p]!=s_id) p++;



       
/*Save results in the output matrix*/

       
for(q=0;q<DIM_GRAIN;q++)

       
for(r=0;r<DIM_GRAIN;r++)

       
{

           
out_img[g_slave[p].x+q][g_slave[p].y+r]=grain[q][r];

       
}

   
}





   
/*End Parallel Algorithm for Applying mask to the image*/

   
gettimeofday(&t,(struct timezone*)0);

   
t_P=(t.tv_sec+t.tv_usec*1.0e-6)-t_P;



   
gettimeofday(&t,(struct timezone*)0);

   
t_S=t.tv_sec+t.tv_usec*1.0e-6;



   
/*Start Sequential Algorithm for Applying mask to the image*/



   
for(i=0;i<DIM_IMG;i++)

   
{

       
for(j=0;j<DIM_IMG;j++)

       
{

           
out_img_s[i][j]=

           
(in_img[i-1][j-1]+

           
in_img[i-1][j]+

           
in_img[i][j-1]+

           
in_img[i][j]+

           
in_img[i][j+1]+

           
in_img[i+1][j-1]+

           
in_img[i+1][j]+

           
in_img[i+1][j+1])/9;

       
}

   
}



   
/*End Sequential Algorithm for Applying mask to the image*/



   
gettimeofday(&t,(struct timezone*)0);

   
t_S=(t.tv_sec+t.tv_usec*1.0e-6)-t_S;



   
printf("The ratio parallel by sequential is: %lf\n",t_P/t_S);



   
fd2=open(OUTFILE,O_TRUNC|O_CREAT|O_WRONLY);

   
write(fd1,(char*)&out_img,DIM_IMG*DIM_IMG);

   
close(fd2);



   
/*Cleanup when the work is done*/

   
for(i=0;i<NUM_SLAVE;i++)

   
{

       
pvm_kill(id_slave[i]);

   
}

   
pvm_exit();

}

Appendix B: Slave Program

#include "pvm3.h"

#include <stdio.h>

#include <stdlib.h>

#include <math.h>

#include <string.h>



unsigned char in_grain[500][500],out_grain[10000];

int i,j,k,ret,tmp,type,dim_grain,my_id,p_id,r_buf_id,s_buf_id;



void main()

{

   
/*Get grain size from master*/

   
p_id=pvm_parent();

   
my_id=pvm_mytid();



   
r_buf_id=pvm_mkbuf(PvmDataDefault);

   
type=0;

   
pvm_recv(p_id,type);

   
pvm_upkint(&dim_grain,1,1);



   
/*Process, send results and wait for more grains*/

   
while(1)

   
{

       
/*Receive a grain*/

       
type=1;

       
r_buf_id=pvm_mkbuf(PvmDataDefault);

       
pvm_recv(p_id,type);

       
for(i=0;i<dim_grain+2;i++)

       
{

           
pvm_upkbyte(in_grain[i],dim_grain+2,1);

       
}

       
/*process the grain*/

       
k=0;

       
for(i=1;i<dim_grain+1;i++)

       
{

           
for(j=1;j<dim_grain+1;j++)

           
{

               
out_grain[k]=(in_grain[i-1][j-1]+

               
in_grain[i-1][j]+

               
in_grain[i-1][j+1]+

               
in_grain[i][j-1]+

               
in_grain[i][j+1]+

               
in_grain[i+1][j-1]+

               
in_grain[i+1][j]+

               
in_grain[i+1][j+1]+

               
in_grain[i][j])/9;

               
k++;

           
}

       
}

       
/*Send back the results*/

       
type=2;

       
s_buf_id=pvm_initsend(PvmDataDefault);

       
pvm_pkbyte(out_grain,(dim_grain*dim_grain),1);

       
pvm_send(p_id,type);

   
}

}