PPC440 DDR setup: Set SDRAM0_CFG0[PMU]=0 for best performance
AMCC suggested to set the PMU bit to 0 for best performace on the PPC440 DDR controller. Please see doc/README.440-DDR-performance for details. Patch by Stefan Roese, 28 Jul 2006
This commit is contained in:
parent
fc6c4a67ae
commit
a2c95a7224
@ -2,6 +2,12 @@
|
||||
Changes since U-Boot 1.1.4:
|
||||
======================================================================
|
||||
|
||||
* PPC440 DDR setup: Set SDRAM0_CFG0[PMU]=0 for best performance
|
||||
AMCC suggested to set the PMU bit to 0 for best performace on
|
||||
the PPC440 DDR controller.
|
||||
Please see doc/README.440-DDR-performance for details.
|
||||
Patch by Stefan Roese, 28 Jul 2006
|
||||
|
||||
* AMCC bamboo (440EP) U-Boot image reduced to 384kbyte
|
||||
Please see doc/README.bamboo for details.
|
||||
Patch by Stefan Roese, 27 Jul 2006
|
||||
|
@ -313,13 +313,13 @@ void sdram_init(void)
|
||||
mtsdram(mem_tr0, 0x410a4012); /* ?? */
|
||||
mtsdram(mem_rtr, 0x04080000); /* ?? */
|
||||
mtsdram(mem_cfg1, 0x00000000); /* Self-refresh exit, disable PM */
|
||||
mtsdram(mem_cfg0, 0x34000000); /* Disable EEC */
|
||||
mtsdram(mem_cfg0, 0x30000000); /* Disable EEC */
|
||||
udelay(400); /* Delay 200 usecs (min) */
|
||||
|
||||
/*--------------------------------------------------------------------
|
||||
* Enable the controller, then wait for DCEN to complete
|
||||
*------------------------------------------------------------------*/
|
||||
mtsdram(mem_cfg0, 0x84000000); /* Enable */
|
||||
mtsdram(mem_cfg0, 0x80000000); /* Enable */
|
||||
|
||||
for (;;) {
|
||||
mfsdram(mem_mcsts, reg);
|
||||
|
@ -309,13 +309,13 @@ void sdram_init(void)
|
||||
mtsdram(mem_tr0, 0x410a4012); /* ?? */
|
||||
mtsdram(mem_rtr, 0x04080000); /* ?? */
|
||||
mtsdram(mem_cfg1, 0x00000000); /* Self-refresh exit, disable PM */
|
||||
mtsdram(mem_cfg0, 0x34000000); /* Disable EEC */
|
||||
mtsdram(mem_cfg0, 0x30000000); /* Disable EEC */
|
||||
udelay(400); /* Delay 200 usecs (min) */
|
||||
|
||||
/*--------------------------------------------------------------------
|
||||
* Enable the controller, then wait for DCEN to complete
|
||||
*------------------------------------------------------------------*/
|
||||
mtsdram(mem_cfg0, 0x84000000); /* Enable */
|
||||
mtsdram(mem_cfg0, 0x80000000); /* Enable */
|
||||
|
||||
for (;;) {
|
||||
mfsdram(mem_mcsts, reg);
|
||||
|
@ -379,7 +379,7 @@ long int initdram(int board_type)
|
||||
/*
|
||||
* Enable the controller, then wait for DCEN to complete
|
||||
*/
|
||||
mtsdram(mem_cfg0, 0x86000000); /* DCEN=1, PMUD=1, 64-bit */
|
||||
mtsdram(mem_cfg0, 0x82000000); /* DCEN=1, PMUD=0, 64-bit */
|
||||
udelay(10000);
|
||||
|
||||
if (get_ram_size(0, mb0cf[i].size) == mb0cf[i].size) {
|
||||
|
@ -1007,9 +1007,9 @@ void program_cfg0(unsigned long* dimm_populated,
|
||||
}
|
||||
|
||||
/*
|
||||
* program Page Management Unit
|
||||
* program Page Management Unit (0 == enabled)
|
||||
*/
|
||||
cfg0 |= SDRAM_CFG0_PMUD;
|
||||
cfg0 &= ~SDRAM_CFG0_PMUD;
|
||||
|
||||
/*
|
||||
* program Memory Controller Options 0
|
||||
|
90
doc/README.440-DDR-performance
Normal file
90
doc/README.440-DDR-performance
Normal file
@ -0,0 +1,90 @@
|
||||
AMCC suggested to set the PMU bit to 0 for best performace on the
|
||||
PPC440 DDR controller. The 440er common DDR setup files (sdram.c &
|
||||
spd_sdram.c) are changed accordingly. So all 440er boards using
|
||||
these setup routines will automatically receive this performance
|
||||
increase.
|
||||
|
||||
Please see below some benchmarks done by AMCC to demonstrate this
|
||||
performance changes:
|
||||
|
||||
|
||||
----------------------------------------
|
||||
SDRAM0_CFG0[PMU] = 1 (U-boot default for Bamboo, Yosemite and Yellowstone)
|
||||
----------------------------------------
|
||||
Stream benchmark results
|
||||
-------------------------------------------------------------
|
||||
This system uses 8 bytes per DOUBLE PRECISION word.
|
||||
-------------------------------------------------------------
|
||||
Array size = 2000000, Offset = 0
|
||||
Total memory required = 45.8 MB.
|
||||
Each test is run 10 times, but only
|
||||
the *best* time for each is used.
|
||||
-------------------------------------------------------------
|
||||
Your clock granularity/precision appears to be 1 microseconds.
|
||||
Each test below will take on the order of 112345 microseconds.
|
||||
(= 112345 clock ticks)
|
||||
Increase the size of the arrays if this shows that you are not getting
|
||||
at least 20 clock ticks per test.
|
||||
-------------------------------------------------------------
|
||||
WARNING -- The above is only a rough guideline.
|
||||
For best results, please be sure you know the precision of your system
|
||||
timer.
|
||||
-------------------------------------------------------------
|
||||
Function Rate (MB/s) RMS time Min time Max time
|
||||
Copy: 256.7683 0.1248 0.1246 0.1250
|
||||
Scale: 246.0157 0.1302 0.1301 0.1302
|
||||
Add: 255.0316 0.1883 0.1882 0.1885
|
||||
Triad: 253.1245 0.1897 0.1896 0.1899
|
||||
|
||||
|
||||
TTCP Benchmark Results
|
||||
ttcp-t: socket
|
||||
ttcp-t: connect
|
||||
ttcp-t: buflen=8192, nbuf=2048, align=16384/0, port=5000 tcp ->
|
||||
localhost
|
||||
ttcp-t: 16777216 bytes in 0.28 real seconds = 454.29 Mbit/sec +++
|
||||
ttcp-t: 2048 I/O calls, msec/call = 0.14, calls/sec = 7268.57
|
||||
ttcp-t: 0.0user 0.1sys 0:00real 60% 0i+0d 0maxrss 0+2pf 3+1506csw
|
||||
|
||||
----------------------------------------
|
||||
SDRAM0_CFG0[PMU] = 0 (Suggested modification)
|
||||
Setting PMU = 0 provides a noticeable performance improvement *2% to
|
||||
5% improvement in memory performance.
|
||||
*Improves the Mbit/sec for TTCP benchmark by almost 76%.
|
||||
----------------------------------------
|
||||
Stream benchmark results
|
||||
-------------------------------------------------------------
|
||||
This system uses 8 bytes per DOUBLE PRECISION word.
|
||||
-------------------------------------------------------------
|
||||
Array size = 2000000, Offset = 0
|
||||
Total memory required = 45.8 MB.
|
||||
Each test is run 10 times, but only
|
||||
the *best* time for each is used.
|
||||
-------------------------------------------------------------
|
||||
Your clock granularity/precision appears to be 1 microseconds.
|
||||
Each test below will take on the order of 120066 microseconds.
|
||||
(= 120066 clock ticks)
|
||||
Increase the size of the arrays if this shows that you are not getting
|
||||
at least 20 clock ticks per test.
|
||||
-------------------------------------------------------------
|
||||
WARNING -- The above is only a rough guideline.
|
||||
For best results, please be sure you know the precision of your system
|
||||
timer.
|
||||
-------------------------------------------------------------
|
||||
Function Rate (MB/s) RMS time Min time Max time
|
||||
Copy: 262.5167 0.1221 0.1219 0.1223
|
||||
Scale: 258.4856 0.1238 0.1238 0.1240
|
||||
Add: 262.5404 0.1829 0.1828 0.1831
|
||||
Triad: 266.8594 0.1800 0.1799 0.1802
|
||||
|
||||
TTCP Benchmark Results
|
||||
ttcp-t: socket
|
||||
ttcp-t: connect
|
||||
ttcp-t: buflen=8192, nbuf=2048, align=16384/0, port=5000 tcp ->
|
||||
localhost
|
||||
ttcp-t: 16777216 bytes in 0.16 real seconds = 804.06 Mbit/sec +++
|
||||
ttcp-t: 2048 I/O calls, msec/call = 0.08, calls/sec = 12864.89
|
||||
ttcp-t: 0.0user 0.0sys 0:00real 46% 0i+0d 0maxrss 0+2pf 120+1csw
|
||||
|
||||
|
||||
2006-07-28, Stefan Roese <sr@denx.de>
|
Loading…
Reference in New Issue
Block a user