[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[microblaze-uclinux] Microblaze Performance.



Hey Folks,

   This is my first post so try not to let my inexperience offend. My 
question relates to the hardware options one uses in setting up the 
microblaze. With EDK and BSB I quickly got a microblaze working. With some 
finagling and an XMD stub I got large, complicated programs running out of 
the external SDRAM (im using the Digilent S3 starter board, or an 
equivalent). But I see a major problem with the performance. Here are the 
numbers and comparissons I am using: The code runs a 64-point FFT on an 
array of data with real and imaginary parts. It is a lot of calculation, so 
it is easy to time. I run 1000 of these in a loop and it takes approximately 
4 minutes, an unfathomably long amount of time in the world of DSP. The 
comparison is vs a Motorola DSP board which is also doing emulated floating 
point arithmetic, which performs 1000 FFTs in under a second. My theory at 
this point is that I am missing a major design flaw which is inhibiting my 
performance. Technically, the microblaze has a core clock of 100 MHz as well 
as the SDRAM. They are interfaced using the Cache-Link FSL bus and a BRAM 
cache local to the microblaze. With these statistics, there is no real 
reason for it to behave so poorly. Even adding a hardware FPU doesn't change 
the result in the least.
   Here is what I would like, if anyone would be so kind: A basic rundown of 
the hardware configuration options necissary to optomize microblaze 
performance for running from external memory. There must be a way to make 
the microblaze run in an equivalent way to this outdated 1998 Motorola 
24-bit DSP chip. If not then Xilinx and IBM have a lot of explaining to do. 
For refference I will supply the relevant design files I have available.

Thanks So Much,
Andy

_________________________________________________________________
Don’t just search. Find. Check out the new MSN Search! 
http://search.msn.click-url.com/go/onm00200636ave/direct/01/
// YAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAR


#include "xtmrctr_l.h"
#include "xintc_l.h"
#include "xintc_i.h"
#include "xparameters.h"
#include "xgpio.h"
#include "xuartlite_l.h"

//from Transceiver code
//#include <stdio.h>
//#include <stdlib.h>
#include <math.h>
#include <c56.h>
#include <reg56307.h>
#include <irq563xx.h>
#include <iso646.h >
#include "tables.h"
#include "header.h"   //commented out.. it was complaining for no reason...
#include "declarations.h"
#include "CodecDef.h"


#define FIFO_SIZE     200
#define DELAY		  2000000

//SPI REGISTER DEFINITIONS
#define ADC_BASEADDR                       XPAR_ADC_BASEADDR
#define ADC_intr_global_enable_register    ADC_BASEADDR + 0x1C
#define ADC_intr_register                  ADC_BASEADDR + 0x20
#define ADC_intr_enable_register           ADC_BASEADDR + 0x28
#define ADC_reset_module                   ADC_BASEADDR + 0x40
#define ADC_control_register               ADC_BASEADDR + 0x060
#define ADC_status_register				   ADC_BASEADDR + 0x064
#define ADC_data_transmit_register		   ADC_BASEADDR + 0x068
#define ADC_data_recieve_register		   ADC_BASEADDR + 0x06C
#define ADC_slave_select_register		   ADC_BASEADDR + 0x070
#define ADC_transmit_FIFO_occupancy        ADC_BASEADDR + 0x074
#define ADC_receive_FIFO_occupancy		   ADC_BASEADDR + 0x078

#define DAC_BASEADDR                       XPAR_DAC_BASEADDR
#define DAC_intr_global_enable_register    DAC_BASEADDR + 0x1C
#define DAC_intr_register                  DAC_BASEADDR + 0x20
#define DAC_intr_enable_register           DAC_BASEADDR + 0x28
#define DAC_reset_module                   DAC_BASEADDR + 0x40
#define DAC_control_register               DAC_BASEADDR + 0x060
#define DAC_status_register				   DAC_BASEADDR + 0x064
#define DAC_data_transmit_register		   DAC_BASEADDR + 0x068
#define DAC_data_recieve_register		   DAC_BASEADDR + 0x06C
#define DAC_slave_select_register		   DAC_BASEADDR + 0x070
#define DAC_transmit_FIFO_occupancy        DAC_BASEADDR + 0x074
#define DAC_receive_FIFO_occupancy		   DAC_BASEADDR + 0x078


#define ss 16777216  //2^24

#define n31 2^32
#define n30 2^31
#define n29 2^30
#define n28 2^29
#define n27 2^28
#define n26 2^27
#define n25 2^26
#define n24 2^25
#define n23 2^24
#define n22 2^23
#define n21 2^22
#define n20 2^21




int x=1;
/* Function Declaration */

Xint16 FIFO_getchar(void);
void wait(int x);


/* Global variables */

static Xuint8 FIFO[FIFO_SIZE], FIFO_head, FIFO_tail, FIFO_ByteCount;
Xuint32 timer_count = 1; /* default timer_count */

extern Xuint32 count = 0;

XGpio gpio;


void Fft(){

struct SCOMPLEX xx[64];


int x[64] =       { 1527, 439, 739, 94, 1871, 1720, 379, 546,
					1366, 1801, 1757, 570, 1785, 478, 1290, 2043,
					597, 1043, 324, 293, 1681, 1111, 864, 1472,
					1323, 756, 1658, 178, 1185, 1060, 262, 315,
					1396, 790, 1061, 53, 892, 1031, 1432, 120,
					1466, 1522, 2122, 1675, 1222, 487, 673, 556,
					1431, 1827, 551, 1248, 203, 1245, 928, 1019,
					120, 1041, 2013, 231, 2113, 1005, 521, 779 };

int y[64] =       { 1290, 2043, 597, 1043, 324, 293, 1681, 1111,
					864, 1472, 1323, 756, 1658, 178, 1185, 1060,
					262, 315, 1396, 790, 1061, 53, 892, 1031,
					1432, 120, 1466, 1522, 2122, 1675, 1222, 487,
					673, 556, 1431, 1827, 551, 1248, 203, 1245,
					928, 1019, 120, 1041, 2013, 231, 2113, 1005,
					521, 779, 1301, 1675, 425, 1644, 620, 789,
					1391, 536, 2010, 1117, 684, 1291, 251, 775 };
int i;

for (i=0; i<64; i++) {
  xx[i].re = x[i];
  xx[i].im = y[i];
};


int isgn = 1;


// nn = 64;
int q = 64;
  /* Declare local variables here */
  int mmax, mm, jj, istep, ii;
  /* Trigonometric recurences */
  double wtemp;
  double wr, wi;
  double wpr, wpi;
  double theta;
  float tempr, tempi;

  jj=0;
  /* Peform FFT
  // This is the bit-reversal section of the routine
   */
  for(ii=0;ii<nn;ii++)
  {
	if(jj>ii)
	{
   /* Exchange the two complex numbers.   */
	  SWAP(xx[jj].re,xx[ii].re);
	  SWAP(xx[jj].im,xx[ii].im);
	}
	mm=nn/2;
	while((mm>=2)&&(jj>=mm))
	{
	  jj=jj-mm;
	  mm=mm>>1;
	}
	jj=jj+mm;
  }
  /* Here begins the Danielson-lanczos section of the routine   */
  mmax=1;
  wtemp=1.0;
  wpr=-2.0;
  wpi=0.0;
  while(q>mmax)
  {
	/* Outer loop executes log(2)aiv_nn timmes.    */
	istep=mmax<<1;
	/* Initialize the trigonometric recurence.     */
	wr=1;
	wi=0;
	for(mm=0;mm<mmax;mm++)
	{
	  /* Here are the two nestes inner loops       */
	  for(ii=mm;ii<q;ii+=istep)
	  {
		/* This is the Danielson-Lanczos formula:   */
		jj=ii+mmax;
		tempr=(float)(wr*xx[jj].re+wi*xx[jj].im);
		tempi=(float)(wr*xx[jj].im-wi*xx[jj].re);
		xx[jj].re=xx[ii].re-tempr;
		xx[jj].im=xx[ii].im-tempi;
		xx[ii].re+=tempr;
		xx[ii].im+=tempi;
	  }
	  /* Trinonometric recurence.      */
	  wtemp=wr;
	  wr*=(wpr+1.0);
	  wr-=wi*wpi;
	  wi*=(wpr+1.0);
	  wi+=wtemp*wpi;
	}
	mmax=istep;
	theta=0.5*(double)(isgn)*PI/(double)(mmax);
	wtemp=sin(theta);
	wpi=cos(theta);
	wpr=2.0*(wpi*wpi-1.0);
	wpi*=2.0*wtemp; /*sin(theta);*/
  }
  if (isgn==-1)
  {
	for(ii=0;ii<q;ii++)
	{
	  xx[ii].re/=q;
	  xx[ii].im/=q;
	}
  }
} // END OF FUNCTION Fft


void timer_int_handler(void * baseaddr_p) {
   Xint32 baseaddr = (int)baseaddr_p;
   Xuint32 csr0;
   Xuint32 csr1;

   Xuint32 count0 = 0;
   Xuint32 count1 = 0;

  /* Read timer 0 CSR to see if it raised the interrupt */
  csr0 = XTmrCtr_mGetControlStatusReg(XPAR_OPB_TIMER_1_BASEADDR, 0);

  /* Read timer 1 CSR to see if it raised the interrupt */
  csr1 = XTmrCtr_mGetControlStatusReg(XPAR_OPB_TIMER_1_BASEADDR, 1);

  /* See if the timer0 went off */
  if (csr0 & XTC_CSR_INT_OCCURED_MASK) {


//  SOME DEBUG OUTPUT YAY
//    x=XTimerCtr_mReadReg(XPAR_OPB_TIMER_1_BASEADDR, 0, 0x08);
//	xil_printf("\r\n*** Timer 0 = ");
//    xil_printf("%x",x);
//    xil_printf(" *** ");

//save/iterate count value

//	xil_printf("%D",count);
    count = count +1;

//    xil_printf(" ***");

	/* Clear the timer interrupt */
	XTmrCtr_mSetControlStatusReg(XPAR_OPB_TIMER_1_BASEADDR, 0, csr0);
  }

  /* See if the timer1 went off */
  if (csr1 & XTC_CSR_INT_OCCURED_MASK) {


    /* Clear the timer interrupt */
  	XTmrCtr_mSetControlStatusReg(XPAR_OPB_TIMER_1_BASEADDR, 1, csr1);
  }
//  xil_printf("\r\n*** Leaving ISR ***\r\n");
}



int main() {

int a=0;

  /* Start the interrupt controller */
  XIntc_mMasterEnable(XPAR_OPB_INTC_0_BASEADDR);

  /* set the number of cycles the timer counts before interrupting */
  XTmrCtr_mSetLoadReg(XPAR_OPB_TIMER_1_BASEADDR, 0, 
(timer_count*timer_count+1) * 50000000);
  XTmrCtr_mSetLoadReg(XPAR_OPB_TIMER_1_BASEADDR, 1, 
(timer_count*timer_count+1) * 50000000);

  /* reset the timers, and clear interrupts */
  XTmrCtr_mSetControlStatusReg(XPAR_OPB_TIMER_1_BASEADDR, 0, 
XTC_CSR_ENABLE_TMR_MASK | XTC_CSR_ENABLE_INT_MASK | XTC_CSR_AUTO_RELOAD_MASK 
| XTC_CSR_DOWN_COUNT_MASK );
  XTmrCtr_mSetControlStatusReg(XPAR_OPB_TIMER_1_BASEADDR, 1, 
XTC_CSR_ENABLE_TMR_MASK | XTC_CSR_ENABLE_INT_MASK | XTC_CSR_AUTO_RELOAD_MASK 
| XTC_CSR_DOWN_COUNT_MASK );

  /* Enable timer and uart interrupts in the interrupt controller */
  XIntc_mEnableIntr(XPAR_OPB_INTC_0_BASEADDR, 
XPAR_OPB_TIMER_1_INTERRUPT_MASK);

  /* Enable MB interrupts */
  microblaze_enable_interrupts();

//int count = 0;

  xil_printf("\r\n\r\n\r\nRunning Main\r\n");

  /* Wait for interrupts to occur */

//int a=0;
  while (a <= 1000) {
   a++;
   Fft();

  }
  xil_printf("DONEZORE!!!\r\n");
  xil_printf("%D\r\n",count);
}



Attachment: system.mhs
Description: Binary data

Attachment: system.mss
Description: Binary data