[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [microblaze-uclinux] Kernel BRAM usage



Hi again,

Attached revised patch has a #include that was missing from earlier version, causing compile errors if SIGNALS_IN_BRAM was selected.

Cheers,

John

John Williams wrote:

Question: What do you do with all that BRAM after FS-boot is finished with it?

Answer:  Use it for time-critical kernel functionality, of course!

Attached patch should apply cleanly to a recent petalinux-v0.20 era kernel (not the MMU test kernel yet, sorry). Apply at patchlevel -p0 from the linux-2.6.x directory.

It creates a few new kernel config menu options under "processor type and features":

[*] Allow placing code/data in BRAM
[ ]   Place interrupt entry path in BRAM
[ ]   Place low level signal handling and delivery path in BRAM
[ ]   Place system call entry path and related routines in BRAM
[ ]   Place cache flush code in BRAM
[ ]   Place exception handling code in BRAM
[ ]   Place kernel FASTCALL sybols in BRAM

These options should be fairly self explanatory. The "cache flush" option is a good one to try. Because of the MicroBlaze architecture we have to disable caches while flushing them. Flushing is done in a loop, so we run a big loop cache-off, it's very slow. Moving them to BRAM speeds things up nicely, and gives a noticeably "snappier" feel to application loading (the main client of the cache invalidation API).

The exception handling code should also have a nice effect if you are using unaligned exceptions (and causing them!).

Basically, anything asynchronous that runs once - and is therefore usually cache-cold - should see a win. Tight loops not so much, the subsequent iterations will be cache-hot and there's no benefit to using BRAM unless the loop footprint is huge.

Also the last option is an interesting one - a number of functions in the kernel are marked as FASTCALL - this is currently used only by i386 to specify some sort of optimised register-based function call ABI. However by overloading it so that all functions marked FASTCALL get loaded into BRAM, we again should see a bit of a speedup. This will use min 16Kbyte of BRAM, so if you don't have enough room you'll get link errors.

I haven't benchmarked it thoroughly but earlier work on a related patchset for the 2.4 kernel did show improved interrupt latencies when the IRQ handling was moved to BRAM.

In addition to the standard choices I've created, you can also tag your own functions (esp. driver IRQ's for example) and data structures as __bram_code__ or __bram_data__ respectively and they too will receive the magic treatment.

I'll fold this into the next PetaLinux release but any experience reports before then will be greatly appreciated.

Cheers,

John



------------------------------------------------------------------------

Index: include/asm-microblaze/cacheflush.h
===================================================================
--- include/asm-microblaze/cacheflush.h	(revision 3740)
+++ include/asm-microblaze/cacheflush.h	(working copy)
@@ -19,6 +19,15 @@
 /* Somebody depends on this; sigh...  */
 #include <linux/mm.h>
+#include <linux/autoconf.h>
+#include <asm/sections.h>
+
+#ifdef CONFIG_CACHEFLUSH_IN_BRAM
+#define CACHEFLUSH_TEXT __bram_code__
+#else
+#define CACHEFLUSH_TEXT
+#endif
+
 #define flush_cache_all()			__flush_cache_all()
 #define flush_cache_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	__flush_cache_all()
@@ -41,22 +50,22 @@
 struct vm_area_struct;
/* see arch/microblaze/kernel/cache.c */
-extern void __flush_icache_all (void);
-extern void __flush_icache_range (unsigned long start, unsigned long end);
-extern void __flush_icache_page (struct vm_area_struct *vma, struct page *page);
-extern void __flush_icache_user_range (struct vm_area_struct *vma,
+extern void CACHEFLUSH_TEXT __flush_icache_all (void);
+extern void CACHEFLUSH_TEXT __flush_icache_range (unsigned long start, unsigned long end);
+extern void CACHEFLUSH_TEXT __flush_icache_page (struct vm_area_struct *vma, struct page *page);
+extern void CACHEFLUSH_TEXT __flush_icache_user_range (struct vm_area_struct *vma,
 				     struct page *page,
 				     unsigned long adr, int len);
-extern void __flush_cache_sigtramp (unsigned long addr);
+extern void CACHEFLUSH_TEXT __flush_cache_sigtramp (unsigned long addr);
-extern void __flush_dcache_all (void);
-extern void __flush_dcache_range (unsigned long start, unsigned long end);
-extern void __flush_dcache_page (struct vm_area_struct *vma, struct page *page);
-extern void __flush_dcache_user_range (struct vm_area_struct *vma,
+extern void CACHEFLUSH_TEXT __flush_dcache_all (void);
+extern void CACHEFLUSH_TEXT __flush_dcache_range (unsigned long start, unsigned long end);
+extern void CACHEFLUSH_TEXT __flush_dcache_page (struct vm_area_struct *vma, struct page *page);
+extern void CACHEFLUSH_TEXT __flush_dcache_user_range (struct vm_area_struct *vma,
 				     struct page *page,
 				     unsigned long adr, int len);
-extern inline void __flush_cache_all(void) {
+extern inline void CACHEFLUSH_TEXT __flush_cache_all(void) {
 	__flush_icache_all();
 	__flush_dcache_all();
 }
Index: include/asm-microblaze/linkage.h
===================================================================
--- include/asm-microblaze/linkage.h	(revision 3740)
+++ include/asm-microblaze/linkage.h	(working copy)
@@ -14,4 +14,12 @@
 #define __ALIGN		.align 4
 #define __ALIGN_STR	".align 4"
+#if defined(CONFIG_USE_BRAM) && defined (CONFIG_FASTCALLS_IN_BRAM) +#define FASTCALL(x) x __attribute__((section(".bram.text"))) +#define fastcall +#else
+#define FASTCALL(x) x
+#define fastcall
+#endif
+
 #endif /* _ASM_LINKAGE_H */
Index: include/asm-microblaze/sections.h
===================================================================
--- include/asm-microblaze/sections.h	(revision 3740)
+++ include/asm-microblaze/sections.h	(working copy)
@@ -16,4 +16,14 @@
 extern char _ssbss[], _esbss[];
 extern unsigned long __ivt_start[], __ivt_end[];
+/* Attribute definitions for placing code and data into BRAM */
+#ifdef CONFIG_USE_BRAM
+#define __bram_code__ __attribute__((section(".bram.text"))) +#define __bram_data__ __attribute__((section(".bram.data"))) +#else
+#define __bram_code__
+#define __bram_data__
+#endif
+
+
 #endif /* _ASM_SECTIONS_H */
Index: arch/microblaze/kernel/setup.c
===================================================================
--- arch/microblaze/kernel/setup.c	(revision 3740)
+++ arch/microblaze/kernel/setup.c	(working copy)
@@ -97,6 +97,36 @@
 }
 #endif 	/* CONFIG_MTD_UCLINUX_EBSS */
+/* Populate the BRAM memory space with any code or data earmarked for there.
+   Note this is a pingpong - we copy the original BRAM contents back into
+   main memory, so the operation is reversible.  This lets us do a clean
+   soft restart back to any bootloader that was originally resident in BRAM
+ */
+
+#ifdef CONFIG_USE_BRAM
+void pingpong_bram(void)
+{
+	unsigned int *src, *dst;
+	extern unsigned int _bram_load_start, _bram_load_end;
+	extern unsigned int __bram_lma;
+
+	dst = (unsigned int *)&_bram_load_start;
+
+	/* Destination in BRAM to copy - note 0x40 word offset == 0x100 bytes
+ to skip past interrupt, exception and debug vectors. See + vmlinux.ld.S .bram section */
+	src = (unsigned int *)&__bram_lma+0x40;	
+
+	do {
+		unsigned int tmp=*dst;
+		*(dst) = *(src);
+		*src=tmp;
+		src++;
+		dst++;
+	} while (dst < &_bram_load_end);
+}
+#endif
+
 void machine_early_init(const char *cmdline)
 {
 	unsigned long *src, *dst = (unsigned long *)0x0;
@@ -144,6 +174,10 @@
 	for (src = __ivt_start; src < __ivt_end; src++, dst++)
 		*dst = *src;
+#ifdef CONFIG_USE_BRAM
+	pingpong_bram();
+#endif
+
 	/* Initialize global data */
 	per_cpu(KM,0)= 0x1;	/* We start in kernel mode */
 	per_cpu(CURRENT_SAVE,0) = (unsigned long)current;
Index: arch/microblaze/kernel/vmlinux.lds.S
===================================================================
--- arch/microblaze/kernel/vmlinux.lds.S	(revision 3740)
+++ arch/microblaze/kernel/vmlinux.lds.S	(working copy)
@@ -5,6 +5,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
+ * Copyright (C) 2006-2008 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  */
@@ -15,6 +16,15 @@
 #include <linux/autoconf.h>
 #include <asm-generic/vmlinux.lds.h>
+MEMORY {
+	ERAM	: ORIGIN = CONFIG_XILINX_ERAM_START,
+		  LENGTH = CONFIG_XILINX_ERAM_SIZE
+#if defined(CONFIG_XILINX_LMB_START) && defined(CONFIG_USE_BRAM)
+	LMB	: ORIGIN = CONFIG_XILINX_LMB_START,
+		  LENGTH = CONFIG_XILINX_LMB_SIZE
+#endif
+}
+
 jiffies = jiffies_64 + 4;
SECTIONS {
@@ -31,15 +41,151 @@
 		SCHED_TEXT
 		LOCK_TEXT
 		. = ALIGN (4) ;
+
 		_etext = . ;
-	}
+	} > ERAM
- . = ALIGN(16);
-	RODATA
+#if defined(CONFIG_XILINX_LMB_START) && defined(CONFIG_USE_BRAM)
+       /* Code and data loaded into LMB */
+       .bram : {
+		/* Skip past interrupt, exception and debug vectors.  If you
+		   change this, also change the 0x40 (word) offset in
+		   kernel/setup.c:pingpong_bram()  */
+               . = 0x100;
+               _bram_load_start = . ;
+               *(.bram*)               /* Everything */
+               *(.bram.text)
+               *(.bram.data)
+               _bram_load_end = . ;
+       } > LMB AT > ERAM
+ + __bram_lma = LOADADDR(.bram);
+#endif
+ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) {
+	. = ALIGN(4096);
+		VMLINUX_SYMBOL(__start_rodata) = .;
+		*(.rodata) *(.rodata.*)
+		*(__vermagic)		/* Kernel version magic */
+	} > ERAM
+
+	.rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {
+		*(.rodata1)
+	} > ERAM
+
+	/* PCI quirks */
+	.pci_fixup        : AT(ADDR(.pci_fixup) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start_pci_fixups_early) = .;
+		*(.pci_fixup_early)
+		VMLINUX_SYMBOL(__end_pci_fixups_early) = .;
+		VMLINUX_SYMBOL(__start_pci_fixups_header) = .;
+		*(.pci_fixup_header)
+		VMLINUX_SYMBOL(__end_pci_fixups_header) = .;
+		VMLINUX_SYMBOL(__start_pci_fixups_final) = .;
+		*(.pci_fixup_final)
+		VMLINUX_SYMBOL(__end_pci_fixups_final) = .;
+		VMLINUX_SYMBOL(__start_pci_fixups_enable) = .;
+		*(.pci_fixup_enable)
+		VMLINUX_SYMBOL(__end_pci_fixups_enable) = .;
+		VMLINUX_SYMBOL(__start_pci_fixups_resume) = .;
+		*(.pci_fixup_resume)
+		VMLINUX_SYMBOL(__end_pci_fixups_resume) = .;
+	} > ERAM
+
+	/* RapidIO route ops */
+	.rio_route        : AT(ADDR(.rio_route) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start_rio_route_ops) = .;
+		*(.rio_route_ops)
+		VMLINUX_SYMBOL(__end_rio_route_ops) = .;
+	} > ERAM
+
+	/* Kernel symbol table: Normal symbols */
+	__ksymtab         : AT(ADDR(__ksymtab) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___ksymtab) = .;
+		*(__ksymtab)
+		VMLINUX_SYMBOL(__stop___ksymtab) = .;
+	} > ERAM
+
+	/* Kernel symbol table: GPL-only symbols */
+	__ksymtab_gpl     : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___ksymtab_gpl) = .;
+		*(__ksymtab_gpl)
+		VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .;
+	} > ERAM
+
+	/* Kernel symbol table: Normal unused symbols */
+	__ksymtab_unused  : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___ksymtab_unused) = .;
+		*(__ksymtab_unused)
+		VMLINUX_SYMBOL(__stop___ksymtab_unused) = .;
+	} > ERAM
+
+	/* Kernel symbol table: GPL-only unused symbols */
+	__ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .;
+		*(__ksymtab_unused_gpl)
+		VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .;
+	} > ERAM
+
+	/* Kernel symbol table: GPL-future-only symbols */
+	__ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .;
+		*(__ksymtab_gpl_future)
+		VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .;
+	} > ERAM
+
+	/* Kernel symbol table: Normal symbols */
+	__kcrctab         : AT(ADDR(__kcrctab) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___kcrctab) = .;
+		*(__kcrctab)
+		VMLINUX_SYMBOL(__stop___kcrctab) = .;
+	} > ERAM
+
+	/* Kernel symbol table: GPL-only symbols */
+	__kcrctab_gpl     : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___kcrctab_gpl) = .;
+		*(__kcrctab_gpl)
+		VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .;
+	} > ERAM
+
+	/* Kernel symbol table: Normal unused symbols */
+	__kcrctab_unused  : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___kcrctab_unused) = .;
+		*(__kcrctab_unused)
+		VMLINUX_SYMBOL(__stop___kcrctab_unused) = .;
+	} > ERAM
+
+	/* Kernel symbol table: GPL-only unused symbols */
+	__kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .;
+		*(__kcrctab_unused_gpl)
+		VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .;
+	} > ERAM
+
+	/* Kernel symbol table: GPL-future-only symbols */
+	__kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .;
+		*(__kcrctab_gpl_future)
+		VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .;
+	} > ERAM
+
+	/* Kernel symbol table: strings */
+        __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {
+		*(__ksymtab_strings)
+	} > ERAM
+
+	/* Built-in module parameters. */
+	__param : AT(ADDR(__param) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___param) = .;
+		*(__param)
+		VMLINUX_SYMBOL(__stop___param) = .;
+		VMLINUX_SYMBOL(__end_rodata) = .;
+	} > ERAM
+
+
 	/* sdata2 section can go anywhere, but must be word aligned
 	   and SDA2_BASE must point to the middle of it */
-	.sdata2 : {
+	.sdata2 ALIGN(4096) : {
 		_ssrw = .;
 		. = ALIGN(0x8);
 		*(.sdata2)
@@ -47,27 +193,25 @@
 	_essrw = .;
 	_ssrw_size = _essrw - _ssrw;
 	_KERNEL_SDA2_BASE_ = _ssrw + (_ssrw_size / 2);
-	}
+	} > ERAM
_sdata = . ;
 	.data ALIGN (0x4) : {
         	*(.data)
-	}
-	. = ALIGN(32);
-	.data.cacheline_aligned : { *(.data.cacheline_aligned) }
+	} > ERAM
+
+	.data.cacheline_aligned ALIGN(32) : { *(.data.cacheline_aligned) } > ERAM
 	_edata = . ;
/* The initial task */
-	. = ALIGN(8192);
-	.data.init_task : { *(.data.init_task) }
+	.data.init_task ALIGN(8192): { *(.data.init_task) } > ERAM
/* Under the microblaze ABI, .sdata and .sbss must be contiguous */
-	. = ALIGN(8);
-	.sdata : {
+	.sdata ALIGN(8) : {
 		_ssro = .;
 		*(.sdata)
-	}
+	} > ERAM
.sbss : {
 		_ssbss = .;
@@ -76,50 +220,49 @@
 		_essro = .;
 		_ssro_size = _essro - _ssro ;
 		_KERNEL_SDA_BASE_ = _ssro + (_ssro_size / 2) ;
-	}
+	} > ERAM
- . = ALIGN(16);
-	__start___ex_table = .;
-	__ex_table : { *(__ex_table) }
-	__stop___ex_table = .;
+	__ex_table ALIGN(16): {
+		__start___ex_table = .;
+ *(__ex_table) + __stop___ex_table = .;
+	} > ERAM
- __init_begin = .; - . = ALIGN(4096);
-	.init.text : {
+	.init.text ALIGN(4096) : {
+		__init_begin = .;
 		_sinittext = . ;
 		*(.init.text)
 		*(.exit.text)
 		*(.exit.data)
 		_einittext = .;
-	}
+	} > ERAM
- .init.data : { *(.init.data) }
+	.init.data : { *(.init.data) } > ERAM
- . = ALIGN(4);
-	.init.ivt : {
+	.init.ivt ALIGN(4) : {
 		__ivt_start = .;
 		*(.init.ivt)
 		__ivt_end = .;
-	}
+	} > ERAM
.init.setup : {
 		__setup_start = .;
 		*(.init.setup)
 		__setup_end = .;
-	}
+	} > ERAM
.initcall.init : {
 		__initcall_start = .;
 		INITCALLS
 		__initcall_end = .;
-	}
+	} > ERAM
.con_initcall.init : {
 		__con_initcall_start = .;
 		*(.con_initcall.init)
 		__con_initcall_end = .;
-	}
+	} > ERAM
__init_end_before_initramfs = .; @@ -132,7 +275,7 @@
 		. = ALIGN(4096);/* Pad init.ramfs up to page boundary, so
 				 that __init_end == __bss_start.  This will
 				 make image.elf consistent with the image.bin */
-	}
+	} > ERAM
__init_end = .; @@ -143,7 +286,7 @@
 		. = ALIGN (4) ;
 		__bss_stop = . ;
 		_ebss = . ;
- 	}
+ 	} > ERAM
 	. = ALIGN(4096);
 	_end = .;
 }
Index: arch/microblaze/kernel/cpu/cache.c
===================================================================
--- arch/microblaze/kernel/cpu/cache.c	(revision 3740)
+++ arch/microblaze/kernel/cpu/cache.c	(working copy)
@@ -16,8 +16,7 @@
 #include <asm/cpuinfo.h>
/* Exported functions. */
-
-void __flush_icache_all(void)
+void CACHEFLUSH_TEXT __flush_icache_all(void)
 {
 	unsigned int i;
 	unsigned flags;
@@ -37,7 +36,7 @@
 	}
 }
-void __flush_icache_range(unsigned long start, unsigned long end)
+void CACHEFLUSH_TEXT __flush_icache_range(unsigned long start, unsigned long end)
 {
 	unsigned int i;
 	unsigned flags;
@@ -65,24 +64,24 @@
 	}
 }
-void __flush_icache_page(struct vm_area_struct *vma, struct page *page)
+void CACHEFLUSH_TEXT __flush_icache_page(struct vm_area_struct *vma, struct page *page)
 {
 	__flush_icache_all();
 }
-void __flush_icache_user_range(struct vm_area_struct *vma,
+void CACHEFLUSH_TEXT __flush_icache_user_range(struct vm_area_struct *vma,
 			       struct page *page, unsigned long adr,
 			       int len)
 {
 	__flush_icache_all();
 }
-void __flush_cache_sigtramp(unsigned long addr)
+void CACHEFLUSH_TEXT __flush_cache_sigtramp(unsigned long addr)
 {
 	__flush_icache_range(addr, addr + 8);
 }
-void __flush_dcache_all(void)
+void CACHEFLUSH_TEXT __flush_dcache_all(void)
 {
 	unsigned int i;
 	unsigned flags;
@@ -104,7 +103,7 @@
 	}
 }
-void __flush_dcache_range(unsigned long start, unsigned long end)
+void CACHEFLUSH_TEXT __flush_dcache_range(unsigned long start, unsigned long end)
 {
 	unsigned int i;
 	unsigned flags;
@@ -131,12 +130,12 @@
 	}
 }
-void __flush_dcache_page(struct vm_area_struct *vma, struct page *page)
+void CACHEFLUSH_TEXT __flush_dcache_page(struct vm_area_struct *vma, struct page *page)
 {
 	__flush_dcache_all();
 }
-void __flush_dcache_user_range(struct vm_area_struct *vma,
+void CACHEFLUSH_TEXT __flush_dcache_user_range(struct vm_area_struct *vma,
 			       struct page *page, unsigned long adr,
 			       int len)
 {
Index: arch/microblaze/kernel/hw_exception_handler.S
===================================================================
--- arch/microblaze/kernel/hw_exception_handler.S	(revision 3740)
+++ arch/microblaze/kernel/hw_exception_handler.S	(working copy)
@@ -43,6 +43,18 @@
 #include <asm/exceptions.h>
 #include <asm/unistd.h>
+/* Are we using fast BRAM for the exception handling? */
+#ifdef CONFIG_EXCEPTIONS_IN_BRAM
+#define EXCEPTION_TEXT .bram.text
+#else
+#define EXCEPTION_TEXT .text
+#endif
+#ifdef CONFIG_EXCEPTIONS_IN_BRAM
+#define EXCEPTION_DATA .bram.data
+#else
+#define EXCEPTION_TEXT .data
+#endif
+
 #if MICROBLAZE_EXCEPTIONS_ENABLED
/* Helpful Macros */
@@ -142,7 +154,7 @@
.global _hw_exception_handler -.section .text +.section EXCEPTION_SECTION
 .align 4
 .ent _hw_exception_handler
_hw_exception_handler: @@ -304,7 +316,7 @@
  * - Each table is of size:   (8 * 32) = 256 bytes
  */
-.section .text
+.section EXCEPTION_TEXT
 .align  4
 lw_table:
lw_r0: R3_TO_LWREG (0); @@ -375,7 +387,7 @@
 sw_r31: SWREG_TO_R3   (31);
/* Temporary data structures used in the handler */
-.section .data
+.section EXCEPTION_DATA
 .align 4
ex_tmp_data_loc_0: .byte 0
@@ -392,7 +404,7 @@
 #else                                                   /* Dummy exception handler, in case exceptions are not present in the processor */
.global _hw_exception_handler -.section .text +.section EXCEPTION_TEXT .align 4
 .ent _hw_exception_handler
 _hw_exception_handler:
Index: arch/microblaze/kernel/signal.c
===================================================================
--- arch/microblaze/kernel/signal.c	(revision 3740)
+++ arch/microblaze/kernel/signal.c	(working copy)
@@ -37,12 +37,18 @@
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset, int in_sycall);
+#ifdef CONFIG_SIGNALS_IN_BRAM
+#define SIGNAL_TEXT __bram_code__
+#else
+#define SIGNAL_TEXT
+#endif
+asmlinkage int SIGNAL_TEXT do_signal(struct pt_regs *regs, sigset_t *oldset, int in_sycall);
+
 /*
  * Atomically swap in the new signal mask, and wait for a signal.
  */
-asmlinkage int
+asmlinkage int SIGNAL_TEXT sys_sigsuspend(old_sigset_t mask, struct pt_regs *regs)
 {
 	sigset_t saveset;
@@ -63,7 +69,7 @@
 	}
 }
-asmlinkage int
+asmlinkage int SIGNAL_TEXT
 sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize,
 		  struct pt_regs *regs)
 {
@@ -91,7 +97,7 @@
 	}
 }
-asmlinkage int +asmlinkage int SIGNAL_TEXT
 sys_sigaction(int sig, const struct old_sigaction *act,
 	      struct old_sigaction *oact)
 {
@@ -123,7 +129,7 @@
 	return ret;
 }
-asmlinkage int +asmlinkage int SIGNAL_TEXT sys_sigaltstack(const stack_t *uss, stack_t *uoss,
 		struct pt_regs *regs)
 {
@@ -149,7 +155,7 @@
 	unsigned long tramp[2];	/* signal trampoline */
 };
-static int
+static int SIGNAL_TEXT
 restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *rval_p)
 {
 	unsigned int err = 0;
@@ -172,7 +178,7 @@
 	return err;
 }
-asmlinkage int sys_sigreturn(struct pt_regs *regs)
+asmlinkage int SIGNAL_TEXT sys_sigreturn(struct pt_regs *regs)
 {
 	struct sigframe *frame = (struct sigframe *)regs->r1;
 	sigset_t set;
@@ -203,7 +209,7 @@
 	return 0;
 }
-asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
+asmlinkage int SIGNAL_TEXT sys_rt_sigreturn(struct pt_regs *regs)
 {
 	struct rt_sigframe *frame = (struct rt_sigframe *)regs->r1;
 	sigset_t set;
@@ -242,7 +248,7 @@
  * Set up a signal frame.
  */
-static int
+static int SIGNAL_TEXT
 setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
 		 unsigned long mask)
 {
@@ -269,7 +275,7 @@
 /*
  * Determine which stack to use..
  */
-static inline void *
+static inline void SIGNAL_TEXT *
 get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
 {
 	/* Default to using normal stack */
@@ -281,7 +287,7 @@
 	return (void *)((sp - frame_size) & -8UL);
 }
-static void setup_frame(int sig, struct k_sigaction *ka,
+static void SIGNAL_TEXT setup_frame(int sig, struct k_sigaction *ka,
 			sigset_t *set, struct pt_regs *regs)
 {
 	struct sigframe *frame;
@@ -356,7 +362,7 @@
 	force_sig(SIGSEGV, current);
 }
-static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+static void SIGNAL_TEXT setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 			   sigset_t *set, struct pt_regs *regs)
 {
 	struct rt_sigframe *frame;
@@ -437,7 +443,7 @@
 }
/* Handle restarting system calls */
-static inline void
+static inline void SIGNAL_TEXT handle_restart(struct pt_regs *regs, struct k_sigaction *ka, int has_handler)
 {
 	switch (regs->r3) {
@@ -465,7 +471,7 @@
  * OK, we're invoking a handler
  */	
-static void
+static void SIGNAL_TEXT
 handle_signal(unsigned long sig, struct k_sigaction *ka,
 	      siginfo_t *info, sigset_t *oldset, struct pt_regs * regs)
 {
@@ -496,7 +502,7 @@
  * the kernel can handle, and then we build all the user-level signal handling
  * stack-frames in one go after that.
  */
-int do_signal(struct pt_regs *regs, sigset_t *oldset, int in_syscall)
+int SIGNAL_TEXT do_signal(struct pt_regs *regs, sigset_t *oldset, int in_syscall)
 {
 	siginfo_t info;
 	int signr;
Index: arch/microblaze/kernel/opb_timer.c
===================================================================
--- arch/microblaze/kernel/opb_timer.c	(revision 3740)
+++ arch/microblaze/kernel/opb_timer.c	(working copy)
@@ -15,6 +15,7 @@
 #include <linux/profile.h>
 #include <linux/irq.h>
 #include <asm/io.h>
+#include <asm/sections.h>
#define BASE_ADDR CONFIG_XILINX_TIMER_0_BASEADDR @@ -37,14 +38,22 @@
 #define TCSR_PWMA  (1<<9)
 #define TCSR_ENALL (1<<10)
+#ifdef CONFIG_IRQ_IN_BRAM
+#define TIMER_TEXT __bram_code__
+#define TIMER_DATA __bram_data__
+#else
+#define TIMER_TEXT
+#define TIMER_DATA
+#endif
+
 extern void heartbeat(void);
-static void timer_ack(void)
+static void TIMER_TEXT timer_ack(void)
 {
 	iowrite32(ioread32(BASE_ADDR + TCSR0), BASE_ADDR + TCSR0);
 }
-irqreturn_t timer_interrupt(int irq, void *dev_id)
+irqreturn_t TIMER_TEXT timer_interrupt(int irq, void *dev_id)
 {
 	heartbeat();
@@ -61,11 +70,11 @@
 	return IRQ_HANDLED;
 }
-struct irqaction timer_irqaction = {
+struct irqaction TIMER_DATA timer_irqaction = {
 	.handler = timer_interrupt,
 	.flags   = SA_INTERRUPT,
 	.name    = "timer",
-};
+} ;
void system_timer_init(void)
 {
Index: arch/microblaze/kernel/entry.S
===================================================================
--- arch/microblaze/kernel/entry.S	(revision 3740)
+++ arch/microblaze/kernel/entry.S	(working copy)
@@ -19,6 +19,21 @@
#define PER_CPU(var) per_cpu__##var +/* Setup for placement of interrupt and entry handling in fast BRAM */
+#ifdef CONFIG_IRQ_IN_BRAM
+#define IRQ_TEXT .bram.text
+#else
+#define IRQ_TEXT .text
+#endif
+
+#ifdef CONFIG_ENTRY_IN_BRAM
+#define ENTRY_TEXT .bram.text
+#define ENTRY_DATA .bram.data
+#else
+#define ENTRY_TEXT .text
+#define ENTRY_DATA .rodata
+#endif
+
 #if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
 	.macro	disable_irq
 	msrclr r0, MSR_IE
@@ -51,6 +66,7 @@
 	.endm
 #endif
+.section IRQ_TEXT
 ENTRY(_interrupt)
 	swi	r1, r0, PER_CPU(ENTRY_SP)	/* save the current sp */
 	swi	r11, r0, PER_CPU(R11_SAVE) 	/* temporarily save r11 */
@@ -193,10 +209,13 @@
 	lwi	r1, r1, PT_R1
 	rtid	r14, 0
 	nop
+.previous
ENTRY(_reset)
 	brai	0x70;				/* Jump back to FS-boot */
+.section ENTRY_TEXT
+
 ENTRY(_user_exception)
 	swi	r1, r0, PER_CPU(ENTRY_SP)	/* save the current sp */
 	swi	r11, r0, PER_CPU(R11_SAVE) 	/* temporarily save r11 */
@@ -287,7 +306,9 @@
 	brid	ret_to_user			/* jump to syscall epilogue */
 	addi	r3, r0, -ENOSYS			/* set errno in delay slot  */
+.previous + /* Debug traps are like a system call, but entered via brki r14, 0x60 All we need to do is send the SIGTRAP signal to current, ptrace and do_signal
    will handle the rest */
@@ -369,9 +390,11 @@
 	lwi	r4, r1, PT_R4
 	bri	ret_to_user
+
 ENTRY(_break)
 	bri	0
+.section ENTRY_TEXT /* struct task_struct *_switch_to(struct thread_info *prev, struct thread_info *next); */
 ENTRY(_switch_to)
@@ -458,7 +481,6 @@
 	rtsd	r15, 8
 	nop
-
 ENTRY(ret_from_fork)
 	addk	r5, r0, r3
 	addk	r6, r0, r1
@@ -576,6 +598,8 @@
 	brid	sys_rt_sigsuspend
 	addk	r7, r1, r0
+.previous
+
 	/* Interrupt vector table */
 	.section	.init.ivt, "ax"
 	.org 0x0
@@ -588,9 +612,7 @@
 	brai	_debug_exception
-
-
-.section .rodata,"a"
+.section ENTRY_DATA,"a"
 #include "syscall_table.S"
syscall_table_size=(.-sys_call_table)
Index: arch/microblaze/kernel/irq.c
===================================================================
--- arch/microblaze/kernel/irq.c	(revision 3740)
+++ arch/microblaze/kernel/irq.c	(working copy)
@@ -15,7 +15,14 @@
 #include <linux/irqflags.h>
 #include <linux/seq_file.h>
 #include <linux/kernel_stat.h>
+#include <asm/sections.h>
+#ifdef CONFIG_IRQ_IN_BRAM
+#define IRQ_TEXT __bram_code__
+#else
+#define IRQ_TEXT +#endif
+
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
  * each architecture has to answer this themselves.
@@ -27,7 +34,7 @@
extern void ledoff(void); -void do_IRQ(struct pt_regs *regs)
+void IRQ_TEXT do_IRQ(struct pt_regs *regs)
 {
 	unsigned int irq;
Index: arch/microblaze/kernel/opb_intc.c
===================================================================
--- arch/microblaze/kernel/opb_intc.c	(revision 3740)
+++ arch/microblaze/kernel/opb_intc.c	(working copy)
@@ -12,6 +12,7 @@
 #include <linux/autoconf.h>
 #include <asm/page.h>
 #include <asm/io.h>
+#include <asm/sections.h>
/* No one else should require these constants, so define them locally here. */
 #define ISR 0x00			/* Interrupt Status Register */
@@ -28,21 +29,29 @@
#define BASE_ADDR CONFIG_XILINX_INTC_0_BASEADDR -static void opb_intc_enable(unsigned int irq)
+#ifdef CONFIG_IRQ_IN_BRAM
+#define INTC_TEXT __bram_code__
+#define INTC_DATA __bram_data__
+#else
+#define INTC_TEXT
+#define INTC_DATA
+#endif
+
+static INTC_TEXT void opb_intc_enable(unsigned int irq)
 {
 	unsigned long mask = (0x00000001 << (irq & 31));
 	pr_debug("enable: %d\n", irq);
 	iowrite32(mask, BASE_ADDR + SIE);
 }
-static void opb_intc_disable(unsigned int irq)
+static INTC_TEXT void opb_intc_disable(unsigned int irq)
 {
 	unsigned long mask = (0x00000001 << (irq & 31));
 	pr_debug("disable: %d\n", irq);
 	iowrite32(mask, BASE_ADDR + CIE);
 }
-static void opb_intc_disable_and_ack(unsigned int irq)
+static INTC_TEXT void opb_intc_disable_and_ack(unsigned int irq)
 {
 	unsigned long mask = (0x00000001 << (irq & 31));
 	pr_debug("disable_and_ack: %d\n", irq);
@@ -51,7 +60,7 @@
 		iowrite32(mask, BASE_ADDR + IAR);	/* ack edge triggered intr */
 }
-static void opb_intc_end(unsigned int irq)
+static INTC_TEXT void opb_intc_end(unsigned int irq)
 {
 	unsigned long mask = (0x00000001 << (irq & 31));
@@ -64,7 +73,7 @@
 	}
 }
-static struct irq_chip obp_intc = {
+static INTC_DATA struct irq_chip obp_intc = {
 	.name = "OPB-INTC",
 	.enable	  = opb_intc_enable,
 	.disable  = opb_intc_disable,
Index: arch/microblaze/Kconfig
===================================================================
--- arch/microblaze/Kconfig	(revision 3740)
+++ arch/microblaze/Kconfig	(working copy)
@@ -103,6 +103,98 @@
             a lot of RAM, and you need to able to allocate very large
             contiguous chunks. If unsure, say N.
+config USE_BRAM
+	bool 'Allow placing code/data in BRAM'
+	default n
+	help
+	    Allow the compile-time placement of critical code or data into
+	    on-chip BRAM resources.  To use, add the '__bram_code__' attribute
+ on functions, or '__bram_data__' attribute on statica data + structures. Cannot be applied to user data/code, or kernel data
+	    structures declared on the stack.
+
+	    Performance improvements can be significant, but it's easy to run
+	    out of BRAM.  If you do, you'll get cryptic kernel link errors.
+
+	    Remember that tight loops (like memcpy) will soon get cache-hot, so
+	    improvements from moving them to BRAM may be less than you expect.
+	    Asynchronous things like interrupt and exception handlers, context
+	    switches and the like, are more likely to benefit.
+
+	    Device driver interrupt handlers are another potential candidate.
+
+	    See suboptions below for some sensible features that are already
+	    migrated to BRAM.
+
+	    If you are not sure, choose 'N'
+
+config IRQ_IN_BRAM
+	depends USE_BRAM
+	default y
+	bool 'Place interrupt entry path in BRAM'
+	help
+ Choose this option to place the low level interrupt entry path into + BRAM. This can reduce interrupt latency by removing cache misses + from the interrupt entry path.
+
+	   If you are not sure, choose 'Y'
+
+config SIGNALS_IN_BRAM
+	depends USE_BRAM
+	default y
+	bool 'Place low level signal handling and delivery path in BRAM'
+	help
+ Choose this option to place the low level signal handling and + delivery code into BRAM. This may result in improved performance
+	   and reduced latency in signal-intensive environments.
+
+	   If you are not sure, choose 'Y'
+
+config ENTRY_IN_BRAM
+	depends USE_BRAM
+	default y
+	bool 'Place system call entry path and related routines in BRAM'
+	help
+ Choose this option to place the low level system call entry path + and thread switching code into BRAM. This can improve system
+	   performance.
+
+	   If you are not sure, choose 'Y'
+
+config CACHEFLUSH_IN_BRAM
+	depends USE_BRAM
+	default y
+	bool 'Place cache flush code in BRAM'
+	help
+	   Choose this option to place the cache flushing routines into
+	   BRAM.  This can improve performance because the cache flushing and
+	   invalidation loops must be executed with caches disabled, causing
+ them to be quite slow. Placing this code into BRAM avoids that + performance penalty. If you place nothing else into BRAM, place + this there.
+
+	   If you are not sure, choose 'Y'
+
+config EXCEPTIONS_IN_BRAM
+	depends USE_BRAM
+	default y
+	bool 'Place exception handling code in BRAM'
+	help
+	   Choose this option to place the exception handling routines into
+ BRAM. +
+	   If you are not sure, choose 'Y'
+
+config FASTCALLS_IN_BRAM
+	depends USE_BRAM
+	default n
+	bool 'Place kernel FASTCALL sybols in BRAM'
+	help
+	   Choose this option to all code tagged as 'FASTCALL' into BRAM.
+	   You probably don't have enough BRAM for this, but give it a try.
+
+	   If you are not sure, choose 'N'
+
 comment "Boot options"
config CMDLINE

Index: include/asm-microblaze/cacheflush.h
===================================================================
--- include/asm-microblaze/cacheflush.h	(revision 3751)
+++ include/asm-microblaze/cacheflush.h	(revision 3761)
@@ -19,6 +19,15 @@
 /* Somebody depends on this; sigh...  */
 #include <linux/mm.h>
 
+#include <linux/autoconf.h>
+#include <asm/sections.h>
+
+#ifdef CONFIG_CACHEFLUSH_IN_BRAM
+#define CACHEFLUSH_TEXT __bram_code__
+#else
+#define CACHEFLUSH_TEXT
+#endif
+
 #define flush_cache_all()			__flush_cache_all()
 #define flush_cache_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	__flush_cache_all()
@@ -41,22 +50,22 @@
 struct vm_area_struct;
 
 /* see arch/microblaze/kernel/cache.c */
-extern void __flush_icache_all (void);
-extern void __flush_icache_range (unsigned long start, unsigned long end);
-extern void __flush_icache_page (struct vm_area_struct *vma, struct page *page);
-extern void __flush_icache_user_range (struct vm_area_struct *vma,
+extern void CACHEFLUSH_TEXT __flush_icache_all (void);
+extern void CACHEFLUSH_TEXT __flush_icache_range (unsigned long start, unsigned long end);
+extern void CACHEFLUSH_TEXT __flush_icache_page (struct vm_area_struct *vma, struct page *page);
+extern void CACHEFLUSH_TEXT __flush_icache_user_range (struct vm_area_struct *vma,
 				     struct page *page,
 				     unsigned long adr, int len);
-extern void __flush_cache_sigtramp (unsigned long addr);
+extern void CACHEFLUSH_TEXT __flush_cache_sigtramp (unsigned long addr);
 
-extern void __flush_dcache_all (void);
-extern void __flush_dcache_range (unsigned long start, unsigned long end);
-extern void __flush_dcache_page (struct vm_area_struct *vma, struct page *page);
-extern void __flush_dcache_user_range (struct vm_area_struct *vma,
+extern void CACHEFLUSH_TEXT __flush_dcache_all (void);
+extern void CACHEFLUSH_TEXT __flush_dcache_range (unsigned long start, unsigned long end);
+extern void CACHEFLUSH_TEXT __flush_dcache_page (struct vm_area_struct *vma, struct page *page);
+extern void CACHEFLUSH_TEXT __flush_dcache_user_range (struct vm_area_struct *vma,
 				     struct page *page,
 				     unsigned long adr, int len);
 
-extern inline void __flush_cache_all(void) {
+extern inline void CACHEFLUSH_TEXT __flush_cache_all(void) {
 	__flush_icache_all();
 	__flush_dcache_all();
 }
Index: include/asm-microblaze/linkage.h
===================================================================
--- include/asm-microblaze/linkage.h	(revision 3751)
+++ include/asm-microblaze/linkage.h	(revision 3761)
@@ -14,4 +14,12 @@
 #define __ALIGN		.align 4
 #define __ALIGN_STR	".align 4"
 
+#if defined(CONFIG_USE_BRAM) && defined (CONFIG_FASTCALLS_IN_BRAM)
+#define FASTCALL(x) x __attribute__((section(".bram.text"))) 
+#define fastcall 
+#else
+#define FASTCALL(x) x
+#define fastcall
+#endif
+
 #endif /* _ASM_LINKAGE_H */
Index: include/asm-microblaze/sections.h
===================================================================
--- include/asm-microblaze/sections.h	(revision 3751)
+++ include/asm-microblaze/sections.h	(revision 3761)
@@ -16,4 +16,14 @@
 extern char _ssbss[], _esbss[];
 extern unsigned long __ivt_start[], __ivt_end[];
 
+/* Attribute definitions for placing code and data into BRAM */
+#ifdef CONFIG_USE_BRAM
+#define __bram_code__ __attribute__((section(".bram.text"))) 
+#define __bram_data__ __attribute__((section(".bram.data"))) 
+#else
+#define __bram_code__
+#define __bram_data__
+#endif
+
+
 #endif /* _ASM_SECTIONS_H */
Index: arch/microblaze/Kconfig
===================================================================
--- arch/microblaze/Kconfig	(revision 3751)
+++ arch/microblaze/Kconfig	(revision 3761)
@@ -103,6 +103,98 @@
             a lot of RAM, and you need to able to allocate very large
             contiguous chunks. If unsure, say N.
 
+config USE_BRAM
+	bool 'Allow placing code/data in BRAM'
+	default n
+	help
+	    Allow the compile-time placement of critical code or data into
+	    on-chip BRAM resources.  To use, add the '__bram_code__' attribute
+	    on functions, or '__bram_data__' attribute on statica data 
+	    structures.  Cannot be applied to user data/code, or kernel data
+	    structures declared on the stack.
+
+	    Performance improvements can be significant, but it's easy to run
+	    out of BRAM.  If you do, you'll get cryptic kernel link errors.
+
+	    Remember that tight loops (like memcpy) will soon get cache-hot, so
+	    improvements from moving them to BRAM may be less than you expect.
+	    Asynchronous things like interrupt and exception handlers, context
+	    switches and the like, are more likely to benefit.
+
+	    Device driver interrupt handlers are another potential candidate.
+
+	    See suboptions below for some sensible features that are already
+	    migrated to BRAM.
+
+	    If you are not sure, choose 'N'
+
+config IRQ_IN_BRAM
+	depends USE_BRAM
+	default y
+	bool 'Place interrupt entry path in BRAM'
+	help
+	   Choose this option to place the low level interrupt entry path into 
+	   BRAM.  This can reduce interrupt latency by removing cache misses 
+	   from the interrupt entry path.
+
+	   If you are not sure, choose 'Y'
+
+config SIGNALS_IN_BRAM
+	depends USE_BRAM
+	default y
+	bool 'Place low level signal handling and delivery path in BRAM'
+	help
+	   Choose this option to place the low level signal handling and 
+	   delivery code into BRAM.  This may result in improved performance
+	   and reduced latency in signal-intensive environments.
+
+	   If you are not sure, choose 'Y'
+
+config ENTRY_IN_BRAM
+	depends USE_BRAM
+	default y
+	bool 'Place system call entry path and related routines in BRAM'
+	help
+	   Choose this option to place the low level system call entry path 
+	   and thread switching code into BRAM.  This can improve system
+	   performance.
+
+	   If you are not sure, choose 'Y'
+
+config CACHEFLUSH_IN_BRAM
+	depends USE_BRAM
+	default y
+	bool 'Place cache flush code in BRAM'
+	help
+	   Choose this option to place the cache flushing routines into
+	   BRAM.  This can improve performance because the cache flushing and
+	   invalidation loops must be executed with caches disabled, causing
+	   them to be quite slow.  Placing this code into BRAM avoids that 
+	   performance penalty.  If you place nothing else into BRAM, place 
+	   this there.
+
+	   If you are not sure, choose 'Y'
+
+config EXCEPTIONS_IN_BRAM
+	depends USE_BRAM
+	default y
+	bool 'Place exception handling code in BRAM'
+	help
+	   Choose this option to place the exception handling routines into
+	   BRAM.  
+
+	   If you are not sure, choose 'Y'
+
+config FASTCALLS_IN_BRAM
+	depends USE_BRAM
+	default n
+	bool 'Place kernel FASTCALL sybols in BRAM'
+	help
+	   Choose this option to all code tagged as 'FASTCALL' into BRAM.
+	   You probably don't have enough BRAM for this, but give it a try.
+
+	   If you are not sure, choose 'N'
+
 comment "Boot options"
 
 config CMDLINE
Index: arch/microblaze/kernel/setup.c
===================================================================
--- arch/microblaze/kernel/setup.c	(revision 3751)
+++ arch/microblaze/kernel/setup.c	(revision 3761)
@@ -97,6 +97,36 @@
 }
 #endif 	/* CONFIG_MTD_UCLINUX_EBSS */
 
+/* Populate the BRAM memory space with any code or data earmarked for there.
+   Note this is a pingpong - we copy the original BRAM contents back into
+   main memory, so the operation is reversible.  This lets us do a clean
+   soft restart back to any bootloader that was originally resident in BRAM
+ */
+
+#ifdef CONFIG_USE_BRAM
+void pingpong_bram(void)
+{
+	unsigned int *src, *dst;
+	extern unsigned int _bram_load_start, _bram_load_end;
+	extern unsigned int __bram_lma;
+
+	dst = (unsigned int *)&_bram_load_start;
+
+	/* Destination in BRAM to copy - note 0x40 word offset == 0x100 bytes
+	   to skip past interrupt, exception and debug vectors.  See 
+	   vmlinux.ld.S .bram section */
+	src = (unsigned int *)&__bram_lma+0x40;	
+
+	do {
+		unsigned int tmp=*dst;
+		*(dst) = *(src);
+		*src=tmp;
+		src++;
+		dst++;
+	} while (dst < &_bram_load_end);
+}
+#endif
+
 void machine_early_init(const char *cmdline)
 {
 	unsigned long *src, *dst = (unsigned long *)0x0;
@@ -144,6 +174,10 @@
 	for (src = __ivt_start; src < __ivt_end; src++, dst++)
 		*dst = *src;
 
+#ifdef CONFIG_USE_BRAM
+	pingpong_bram();
+#endif
+
 	/* Initialize global data */
 	per_cpu(KM,0)= 0x1;	/* We start in kernel mode */
 	per_cpu(CURRENT_SAVE,0) = (unsigned long)current;
Index: arch/microblaze/kernel/vmlinux.lds.S
===================================================================
--- arch/microblaze/kernel/vmlinux.lds.S	(revision 3751)
+++ arch/microblaze/kernel/vmlinux.lds.S	(revision 3761)
@@ -5,6 +5,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
+ * Copyright (C) 2006-2008 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  */
 
@@ -15,6 +16,15 @@
 #include <linux/autoconf.h>
 #include <asm-generic/vmlinux.lds.h>
 
+MEMORY {
+	ERAM	: ORIGIN = CONFIG_XILINX_ERAM_START,
+		  LENGTH = CONFIG_XILINX_ERAM_SIZE
+#if defined(CONFIG_XILINX_LMB_START) && defined(CONFIG_USE_BRAM)
+	LMB	: ORIGIN = CONFIG_XILINX_LMB_START,
+		  LENGTH = CONFIG_XILINX_LMB_SIZE
+#endif
+}
+
 jiffies = jiffies_64 + 4;
 
 SECTIONS {
@@ -31,15 +41,151 @@
 		SCHED_TEXT
 		LOCK_TEXT
 		. = ALIGN (4) ;
+
 		_etext = . ;
-	}
+	} > ERAM
 
-	. = ALIGN(16);
-	RODATA
+#if defined(CONFIG_XILINX_LMB_START) && defined(CONFIG_USE_BRAM)
+       /* Code and data loaded into LMB */
+       .bram : {
+		/* Skip past interrupt, exception and debug vectors.  If you
+		   change this, also change the 0x40 (word) offset in
+		   kernel/setup.c:pingpong_bram()  */
+               . = 0x100;
+               _bram_load_start = . ;
+               *(.bram*)               /* Everything */
+               *(.bram.text)
+               *(.bram.data)
+               _bram_load_end = . ;
+       } > LMB AT > ERAM
+ 
+       __bram_lma = LOADADDR(.bram);
+#endif
 
+	.rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {
+	. = ALIGN(4096);
+		VMLINUX_SYMBOL(__start_rodata) = .;
+		*(.rodata) *(.rodata.*)
+		*(__vermagic)		/* Kernel version magic */
+	} > ERAM
+
+	.rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {
+		*(.rodata1)
+	} > ERAM
+
+	/* PCI quirks */
+	.pci_fixup        : AT(ADDR(.pci_fixup) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start_pci_fixups_early) = .;
+		*(.pci_fixup_early)
+		VMLINUX_SYMBOL(__end_pci_fixups_early) = .;
+		VMLINUX_SYMBOL(__start_pci_fixups_header) = .;
+		*(.pci_fixup_header)
+		VMLINUX_SYMBOL(__end_pci_fixups_header) = .;
+		VMLINUX_SYMBOL(__start_pci_fixups_final) = .;
+		*(.pci_fixup_final)
+		VMLINUX_SYMBOL(__end_pci_fixups_final) = .;
+		VMLINUX_SYMBOL(__start_pci_fixups_enable) = .;
+		*(.pci_fixup_enable)
+		VMLINUX_SYMBOL(__end_pci_fixups_enable) = .;
+		VMLINUX_SYMBOL(__start_pci_fixups_resume) = .;
+		*(.pci_fixup_resume)
+		VMLINUX_SYMBOL(__end_pci_fixups_resume) = .;
+	} > ERAM
+
+	/* RapidIO route ops */
+	.rio_route        : AT(ADDR(.rio_route) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start_rio_route_ops) = .;
+		*(.rio_route_ops)
+		VMLINUX_SYMBOL(__end_rio_route_ops) = .;
+	} > ERAM
+
+	/* Kernel symbol table: Normal symbols */
+	__ksymtab         : AT(ADDR(__ksymtab) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___ksymtab) = .;
+		*(__ksymtab)
+		VMLINUX_SYMBOL(__stop___ksymtab) = .;
+	} > ERAM
+
+	/* Kernel symbol table: GPL-only symbols */
+	__ksymtab_gpl     : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___ksymtab_gpl) = .;
+		*(__ksymtab_gpl)
+		VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .;
+	} > ERAM
+
+	/* Kernel symbol table: Normal unused symbols */
+	__ksymtab_unused  : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___ksymtab_unused) = .;
+		*(__ksymtab_unused)
+		VMLINUX_SYMBOL(__stop___ksymtab_unused) = .;
+	} > ERAM
+
+	/* Kernel symbol table: GPL-only unused symbols */
+	__ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .;
+		*(__ksymtab_unused_gpl)
+		VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .;
+	} > ERAM
+
+	/* Kernel symbol table: GPL-future-only symbols */
+	__ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .;
+		*(__ksymtab_gpl_future)
+		VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .;
+	} > ERAM
+
+	/* Kernel symbol table: Normal symbols */
+	__kcrctab         : AT(ADDR(__kcrctab) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___kcrctab) = .;
+		*(__kcrctab)
+		VMLINUX_SYMBOL(__stop___kcrctab) = .;
+	} > ERAM
+
+	/* Kernel symbol table: GPL-only symbols */
+	__kcrctab_gpl     : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___kcrctab_gpl) = .;
+		*(__kcrctab_gpl)
+		VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .;
+	} > ERAM
+
+	/* Kernel symbol table: Normal unused symbols */
+	__kcrctab_unused  : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___kcrctab_unused) = .;
+		*(__kcrctab_unused)
+		VMLINUX_SYMBOL(__stop___kcrctab_unused) = .;
+	} > ERAM
+
+	/* Kernel symbol table: GPL-only unused symbols */
+	__kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .;
+		*(__kcrctab_unused_gpl)
+		VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .;
+	} > ERAM
+
+	/* Kernel symbol table: GPL-future-only symbols */
+	__kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .;
+		*(__kcrctab_gpl_future)
+		VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .;
+	} > ERAM
+
+	/* Kernel symbol table: strings */
+        __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {
+		*(__ksymtab_strings)
+	} > ERAM
+
+	/* Built-in module parameters. */
+	__param : AT(ADDR(__param) - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(__start___param) = .;
+		*(__param)
+		VMLINUX_SYMBOL(__stop___param) = .;
+		VMLINUX_SYMBOL(__end_rodata) = .;
+	} > ERAM
+
+
 	/* sdata2 section can go anywhere, but must be word aligned
 	   and SDA2_BASE must point to the middle of it */
-	.sdata2 : {
+	.sdata2 ALIGN(4096) : {
 		_ssrw = .;
 		. = ALIGN(0x8);
 		*(.sdata2)
@@ -47,27 +193,25 @@
 	_essrw = .;
 	_ssrw_size = _essrw - _ssrw;
 	_KERNEL_SDA2_BASE_ = _ssrw + (_ssrw_size / 2);
-	}
+	} > ERAM
 
 
 	_sdata = . ;
 	.data ALIGN (0x4) : {
         	*(.data)
-	}
-	. = ALIGN(32);
-	.data.cacheline_aligned : { *(.data.cacheline_aligned) }
+	} > ERAM
+
+	.data.cacheline_aligned ALIGN(32) : { *(.data.cacheline_aligned) } > ERAM
 	_edata = . ;
 
 	/* The initial task */
-	. = ALIGN(8192);
-	.data.init_task : { *(.data.init_task) }
+	.data.init_task ALIGN(8192): { *(.data.init_task) } > ERAM
 
 	/* Under the microblaze ABI, .sdata and .sbss must be contiguous */
-	. = ALIGN(8);
-	.sdata : {
+	.sdata ALIGN(8) : {
 		_ssro = .;
 		*(.sdata)
-	}
+	} > ERAM
 
 	.sbss :	{
 		_ssbss = .;
@@ -76,50 +220,49 @@
 		_essro = .;
 		_ssro_size = _essro - _ssro ;
 		_KERNEL_SDA_BASE_ = _ssro + (_ssro_size / 2) ;
-	}
+	} > ERAM
 
-	. = ALIGN(16);
-	__start___ex_table = .;
-	__ex_table : { *(__ex_table) }
-	__stop___ex_table = .;
+	__ex_table ALIGN(16): {
+		__start___ex_table = .;
+		 *(__ex_table) 
+		__stop___ex_table = .;
+	} > ERAM
 
-	__init_begin = .;
 
-	. = ALIGN(4096);
-	.init.text : {
+	.init.text ALIGN(4096) : {
+		__init_begin = .;
 		_sinittext = . ;
 		*(.init.text)
 		*(.exit.text)
 		*(.exit.data)
 		_einittext = .;
-	}
+	} > ERAM
 
-	.init.data : { *(.init.data) }
+	.init.data : { *(.init.data) } > ERAM
 
-	. = ALIGN(4);
-	.init.ivt : {
+	.init.ivt ALIGN(4) : {
 		__ivt_start = .;
 		*(.init.ivt)
 		__ivt_end = .;
-	}
+	} > ERAM
 
 	.init.setup : {
 		__setup_start = .;
 		*(.init.setup)
 		__setup_end = .;
-	}
+	} > ERAM
 
 	.initcall.init : {
 		__initcall_start = .;
 		INITCALLS
 		__initcall_end = .;
-	}
+	} > ERAM
 
 	.con_initcall.init : {
 		__con_initcall_start = .;
 		*(.con_initcall.init)
 		__con_initcall_end = .;
-	}
+	} > ERAM
 
 	__init_end_before_initramfs = .;
 
@@ -132,7 +275,7 @@
 		. = ALIGN(4096);/* Pad init.ramfs up to page boundary, so
 				 that __init_end == __bss_start.  This will
 				 make image.elf consistent with the image.bin */
-	}
+	} > ERAM
 
 	__init_end = .;
 
@@ -143,7 +286,7 @@
 		. = ALIGN (4) ;
 		__bss_stop = . ;
 		_ebss = . ;
- 	}
+ 	} > ERAM
 	. = ALIGN(4096);
 	_end = .;
 }
Index: arch/microblaze/kernel/cpu/cache.c
===================================================================
--- arch/microblaze/kernel/cpu/cache.c	(revision 3751)
+++ arch/microblaze/kernel/cpu/cache.c	(revision 3761)
@@ -16,8 +16,7 @@
 #include <asm/cpuinfo.h>
 
 /* Exported functions.  */
-
-void __flush_icache_all(void)
+void CACHEFLUSH_TEXT __flush_icache_all(void)
 {
 	unsigned int i;
 	unsigned flags;
@@ -37,7 +36,7 @@
 	}
 }
 
-void __flush_icache_range(unsigned long start, unsigned long end)
+void CACHEFLUSH_TEXT __flush_icache_range(unsigned long start, unsigned long end)
 {
 	unsigned int i;
 	unsigned flags;
@@ -65,24 +64,24 @@
 	}
 }
 
-void __flush_icache_page(struct vm_area_struct *vma, struct page *page)
+void CACHEFLUSH_TEXT __flush_icache_page(struct vm_area_struct *vma, struct page *page)
 {
 	__flush_icache_all();
 }
 
-void __flush_icache_user_range(struct vm_area_struct *vma,
+void CACHEFLUSH_TEXT __flush_icache_user_range(struct vm_area_struct *vma,
 			       struct page *page, unsigned long adr,
 			       int len)
 {
 	__flush_icache_all();
 }
 
-void __flush_cache_sigtramp(unsigned long addr)
+void CACHEFLUSH_TEXT __flush_cache_sigtramp(unsigned long addr)
 {
 	__flush_icache_range(addr, addr + 8);
 }
 
-void __flush_dcache_all(void)
+void CACHEFLUSH_TEXT __flush_dcache_all(void)
 {
 	unsigned int i;
 	unsigned flags;
@@ -104,7 +103,7 @@
 	}
 }
 
-void __flush_dcache_range(unsigned long start, unsigned long end)
+void CACHEFLUSH_TEXT __flush_dcache_range(unsigned long start, unsigned long end)
 {
 	unsigned int i;
 	unsigned flags;
@@ -131,12 +130,12 @@
 	}
 }
 
-void __flush_dcache_page(struct vm_area_struct *vma, struct page *page)
+void CACHEFLUSH_TEXT __flush_dcache_page(struct vm_area_struct *vma, struct page *page)
 {
 	__flush_dcache_all();
 }
 
-void __flush_dcache_user_range(struct vm_area_struct *vma,
+void CACHEFLUSH_TEXT __flush_dcache_user_range(struct vm_area_struct *vma,
 			       struct page *page, unsigned long adr,
 			       int len)
 {
Index: arch/microblaze/kernel/hw_exception_handler.S
===================================================================
--- arch/microblaze/kernel/hw_exception_handler.S	(revision 3751)
+++ arch/microblaze/kernel/hw_exception_handler.S	(revision 3761)
@@ -43,6 +43,18 @@
 #include <asm/exceptions.h>
 #include <asm/unistd.h>
 
+/* Are we using fast BRAM for the exception handling? */
+#ifdef CONFIG_EXCEPTIONS_IN_BRAM
+#define EXCEPTION_TEXT .bram.text
+#else
+#define EXCEPTION_TEXT .text
+#endif
+#ifdef CONFIG_EXCEPTIONS_IN_BRAM
+#define EXCEPTION_DATA .bram.data
+#else
+#define EXCEPTION_TEXT .data
+#endif
+
 #if MICROBLAZE_EXCEPTIONS_ENABLED
 
 /* Helpful Macros */
@@ -142,7 +154,7 @@
 
 
 .global _hw_exception_handler                               
-.section .text                                          
+.section EXCEPTION_SECTION
 .align 4
 .ent _hw_exception_handler
 _hw_exception_handler:                      
@@ -304,7 +316,7 @@
  * - Each table is of size:   (8 * 32) = 256 bytes
  */
                 
-.section .text
+.section EXCEPTION_TEXT
 .align  4
 lw_table:
 lw_r0:  R3_TO_LWREG   (0); 
@@ -375,7 +387,7 @@
 sw_r31: SWREG_TO_R3   (31);
 
 /* Temporary data structures used in the handler */
-.section .data
+.section EXCEPTION_DATA
 .align 4
 ex_tmp_data_loc_0:      
         .byte 0
@@ -392,7 +404,7 @@
 #else                                                   /* Dummy exception handler, in case exceptions are not present in the processor */
 
 .global _hw_exception_handler                               
-.section .text                                          
+.section EXCEPTION_TEXT                                          
 .align 4
 .ent _hw_exception_handler
 _hw_exception_handler:
Index: arch/microblaze/kernel/signal.c
===================================================================
--- arch/microblaze/kernel/signal.c	(revision 3751)
+++ arch/microblaze/kernel/signal.c	(revision 3761)
@@ -34,15 +34,22 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
+#include <asm/sections.h>
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
-asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset, int in_sycall);
+#ifdef CONFIG_SIGNALS_IN_BRAM
+#define SIGNAL_TEXT __bram_code__
+#else
+#define SIGNAL_TEXT
+#endif
 
+asmlinkage int SIGNAL_TEXT do_signal(struct pt_regs *regs, sigset_t *oldset, int in_sycall);
+
 /*
  * Atomically swap in the new signal mask, and wait for a signal.
  */
-asmlinkage int
+asmlinkage int SIGNAL_TEXT 
 sys_sigsuspend(old_sigset_t mask, struct pt_regs *regs)
 {
 	sigset_t saveset;
@@ -63,7 +70,7 @@
 	}
 }
 
-asmlinkage int
+asmlinkage int SIGNAL_TEXT
 sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize,
 		  struct pt_regs *regs)
 {
@@ -91,7 +98,7 @@
 	}
 }
 
-asmlinkage int 
+asmlinkage int SIGNAL_TEXT
 sys_sigaction(int sig, const struct old_sigaction *act,
 	      struct old_sigaction *oact)
 {
@@ -123,7 +130,7 @@
 	return ret;
 }
 
-asmlinkage int
+asmlinkage int SIGNAL_TEXT 
 sys_sigaltstack(const stack_t *uss, stack_t *uoss,
 		struct pt_regs *regs)
 {
@@ -149,7 +156,7 @@
 	unsigned long tramp[2];	/* signal trampoline */
 };
 
-static int
+static int SIGNAL_TEXT
 restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *rval_p)
 {
 	unsigned int err = 0;
@@ -172,7 +179,7 @@
 	return err;
 }
 
-asmlinkage int sys_sigreturn(struct pt_regs *regs)
+asmlinkage int SIGNAL_TEXT sys_sigreturn(struct pt_regs *regs)
 {
 	struct sigframe *frame = (struct sigframe *)regs->r1;
 	sigset_t set;
@@ -203,7 +210,7 @@
 	return 0;
 }
 
-asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
+asmlinkage int SIGNAL_TEXT sys_rt_sigreturn(struct pt_regs *regs)
 {
 	struct rt_sigframe *frame = (struct rt_sigframe *)regs->r1;
 	sigset_t set;
@@ -242,7 +249,7 @@
  * Set up a signal frame.
  */
 
-static int
+static int SIGNAL_TEXT
 setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
 		 unsigned long mask)
 {
@@ -269,7 +276,7 @@
 /*
  * Determine which stack to use..
  */
-static inline void *
+static inline void SIGNAL_TEXT *
 get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
 {
 	/* Default to using normal stack */
@@ -281,7 +288,7 @@
 	return (void *)((sp - frame_size) & -8UL);
 }
 
-static void setup_frame(int sig, struct k_sigaction *ka,
+static void SIGNAL_TEXT setup_frame(int sig, struct k_sigaction *ka,
 			sigset_t *set, struct pt_regs *regs)
 {
 	struct sigframe *frame;
@@ -356,7 +363,7 @@
 	force_sig(SIGSEGV, current);
 }
 
-static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+static void SIGNAL_TEXT setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 			   sigset_t *set, struct pt_regs *regs)
 {
 	struct rt_sigframe *frame;
@@ -437,7 +444,7 @@
 }
 
 /* Handle restarting system calls */
-static inline void
+static inline void SIGNAL_TEXT 
 handle_restart(struct pt_regs *regs, struct k_sigaction *ka, int has_handler)
 {
 	switch (regs->r3) {
@@ -465,7 +472,7 @@
  * OK, we're invoking a handler
  */	
 
-static void
+static void SIGNAL_TEXT
 handle_signal(unsigned long sig, struct k_sigaction *ka,
 	      siginfo_t *info, sigset_t *oldset, struct pt_regs * regs)
 {
@@ -496,7 +503,7 @@
  * the kernel can handle, and then we build all the user-level signal handling
  * stack-frames in one go after that.
  */
-int do_signal(struct pt_regs *regs, sigset_t *oldset, int in_syscall)
+int SIGNAL_TEXT do_signal(struct pt_regs *regs, sigset_t *oldset, int in_syscall)
 {
 	siginfo_t info;
 	int signr;
Index: arch/microblaze/kernel/opb_timer.c
===================================================================
--- arch/microblaze/kernel/opb_timer.c	(revision 3751)
+++ arch/microblaze/kernel/opb_timer.c	(revision 3761)
@@ -15,6 +15,7 @@
 #include <linux/profile.h>
 #include <linux/irq.h>
 #include <asm/io.h>
+#include <asm/sections.h>
 
 #define BASE_ADDR CONFIG_XILINX_TIMER_0_BASEADDR
 
@@ -37,14 +38,22 @@
 #define TCSR_PWMA  (1<<9)
 #define TCSR_ENALL (1<<10)
 
+#ifdef CONFIG_IRQ_IN_BRAM
+#define TIMER_TEXT __bram_code__
+#define TIMER_DATA __bram_data__
+#else
+#define TIMER_TEXT
+#define TIMER_DATA
+#endif
+
 extern void heartbeat(void);
 
-static void timer_ack(void)
+static void TIMER_TEXT timer_ack(void)
 {
 	iowrite32(ioread32(BASE_ADDR + TCSR0), BASE_ADDR + TCSR0);
 }
 
-irqreturn_t timer_interrupt(int irq, void *dev_id)
+irqreturn_t TIMER_TEXT timer_interrupt(int irq, void *dev_id)
 {
 	heartbeat();
 
@@ -61,11 +70,11 @@
 	return IRQ_HANDLED;
 }
 
-struct irqaction timer_irqaction = {
+struct irqaction TIMER_DATA timer_irqaction = {
 	.handler = timer_interrupt,
 	.flags   = SA_INTERRUPT,
 	.name    = "timer",
-};
+} ;
 
 void system_timer_init(void)
 {
Index: arch/microblaze/kernel/entry.S
===================================================================
--- arch/microblaze/kernel/entry.S	(revision 3751)
+++ arch/microblaze/kernel/entry.S	(revision 3761)
@@ -19,6 +19,21 @@
 
 #define PER_CPU(var) per_cpu__##var
 
+/* Setup for placement of interrupt and entry handling in fast BRAM */
+#ifdef CONFIG_IRQ_IN_BRAM
+#define IRQ_TEXT .bram.text
+#else
+#define IRQ_TEXT .text
+#endif
+
+#ifdef CONFIG_ENTRY_IN_BRAM
+#define ENTRY_TEXT .bram.text
+#define ENTRY_DATA .bram.data
+#else
+#define ENTRY_TEXT .text
+#define ENTRY_DATA .rodata
+#endif
+
 #if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
 	.macro	disable_irq
 	msrclr r0, MSR_IE
@@ -51,6 +66,7 @@
 	.endm
 #endif
 
+.section IRQ_TEXT
 ENTRY(_interrupt)
 	swi	r1, r0, PER_CPU(ENTRY_SP)	/* save the current sp */
 	swi	r11, r0, PER_CPU(R11_SAVE) 	/* temporarily save r11 */
@@ -193,10 +209,13 @@
 	lwi	r1, r1, PT_R1
 	rtid	r14, 0
 	nop
+.previous
 
 ENTRY(_reset)
 	brai	0x70;				/* Jump back to FS-boot */
 
+.section ENTRY_TEXT
+
 ENTRY(_user_exception)
 	swi	r1, r0, PER_CPU(ENTRY_SP)	/* save the current sp */
 	swi	r11, r0, PER_CPU(R11_SAVE) 	/* temporarily save r11 */
@@ -287,7 +306,9 @@
 	brid	ret_to_user			/* jump to syscall epilogue */
 	addi	r3, r0, -ENOSYS			/* set errno in delay slot  */
 
+.previous
 
+
 /* Debug traps are like a system call, but entered via brki r14, 0x60 
    All we need to do is send the SIGTRAP signal to current, ptrace and do_signal
    will handle the rest */
@@ -369,9 +390,11 @@
 	lwi	r4, r1, PT_R4
 	bri	ret_to_user
 
+
 ENTRY(_break)
 	bri	0
 
+.section ENTRY_TEXT
 
 /* struct task_struct *_switch_to(struct thread_info *prev, struct thread_info *next); */
 ENTRY(_switch_to)
@@ -458,7 +481,6 @@
 	rtsd	r15, 8
 	nop
 
-
 ENTRY(ret_from_fork)
 	addk	r5, r0, r3
 	addk	r6, r0, r1
@@ -576,6 +598,8 @@
 	brid	sys_rt_sigsuspend
 	addk	r7, r1, r0
 
+.previous
+
 	/* Interrupt vector table */
 	.section	.init.ivt, "ax"
 	.org 0x0
@@ -588,9 +612,7 @@
 	brai	_debug_exception
 
 
-
-
-.section .rodata,"a"
+.section ENTRY_DATA,"a"
 #include "syscall_table.S"
 
 syscall_table_size=(.-sys_call_table)
Index: arch/microblaze/kernel/irq.c
===================================================================
--- arch/microblaze/kernel/irq.c	(revision 3751)
+++ arch/microblaze/kernel/irq.c	(revision 3761)
@@ -15,7 +15,14 @@
 #include <linux/irqflags.h>
 #include <linux/seq_file.h>
 #include <linux/kernel_stat.h>
+#include <asm/sections.h>
 
+#ifdef CONFIG_IRQ_IN_BRAM
+#define IRQ_TEXT __bram_code__
+#else
+#define IRQ_TEXT 
+#endif
+
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
  * each architecture has to answer this themselves.
@@ -27,7 +34,7 @@
 
 extern void ledoff(void);
 
-void do_IRQ(struct pt_regs *regs)
+void IRQ_TEXT do_IRQ(struct pt_regs *regs)
 {
 	unsigned int irq;
 
Index: arch/microblaze/kernel/opb_intc.c
===================================================================
--- arch/microblaze/kernel/opb_intc.c	(revision 3751)
+++ arch/microblaze/kernel/opb_intc.c	(revision 3761)
@@ -12,6 +12,7 @@
 #include <linux/autoconf.h>
 #include <asm/page.h>
 #include <asm/io.h>
+#include <asm/sections.h>
 
 /* No one else should require these constants, so define them locally here. */
 #define ISR 0x00			/* Interrupt Status Register */
@@ -28,21 +29,29 @@
 
 #define BASE_ADDR CONFIG_XILINX_INTC_0_BASEADDR
 
-static void opb_intc_enable(unsigned int irq)
+#ifdef CONFIG_IRQ_IN_BRAM
+#define INTC_TEXT __bram_code__
+#define INTC_DATA __bram_data__
+#else
+#define INTC_TEXT
+#define INTC_DATA
+#endif
+
+static INTC_TEXT void opb_intc_enable(unsigned int irq)
 {
 	unsigned long mask = (0x00000001 << (irq & 31));
 	pr_debug("enable: %d\n", irq);
 	iowrite32(mask, BASE_ADDR + SIE);
 }
 
-static void opb_intc_disable(unsigned int irq)
+static INTC_TEXT void opb_intc_disable(unsigned int irq)
 {
 	unsigned long mask = (0x00000001 << (irq & 31));
 	pr_debug("disable: %d\n", irq);
 	iowrite32(mask, BASE_ADDR + CIE);
 }
 
-static void opb_intc_disable_and_ack(unsigned int irq)
+static INTC_TEXT void opb_intc_disable_and_ack(unsigned int irq)
 {
 	unsigned long mask = (0x00000001 << (irq & 31));
 	pr_debug("disable_and_ack: %d\n", irq);
@@ -51,7 +60,7 @@
 		iowrite32(mask, BASE_ADDR + IAR);	/* ack edge triggered intr */
 }
 
-static void opb_intc_end(unsigned int irq)
+static INTC_TEXT void opb_intc_end(unsigned int irq)
 {
 	unsigned long mask = (0x00000001 << (irq & 31));
 
@@ -64,7 +73,7 @@
 	}
 }
 
-static struct irq_chip obp_intc = {
+static INTC_DATA struct irq_chip obp_intc = {
 	.name = "OPB-INTC",
 	.enable	  = opb_intc_enable,
 	.disable  = opb_intc_disable,