-
Notifications
You must be signed in to change notification settings - Fork 35
Description
DonQuichotteComputers@gmail.com, 2016/05/12
Issue with the -msmall16 option, e-gcc 4.8.2
1/ Reformatted my SD card with the 2015.1 Zynq 7020 headless image (from late March 2016)
2/ Copied build.sh + run.sh + src/e_dbg3.c on the Parallella
3/ compared e-objdump throughputs => e_emem_config and the dest offset from e_read() are broken with option -msmall16
Could not test with the 2016.3 image - the network config changed or I dunno what, my SSH/putty/Suzanne Matthews method is KO too.
Since I use e_read() / e_write() only once in the DEVICE code, I'll probably patch the input parameters and that's all -- -msmall16 avoids the superfluous movt rX, 0x0 :D
parallella@parallella:~$ cat build.sh
!/bin/bash
set -e
ESDK=${EPIPHANY_HOME}
ELIBS="-L ${ESDK}/tools/host/lib"
EINCS="-I ${ESDK}/tools/host/include"
ELDF=${ESDK}/bsps/current/internal.ldf
SCRIPT=$(readlink -f "$0")
EXEPATH=$(dirname "$SCRIPT")
cd $EXEPATH
CROSS_PREFIX=
case $(uname -p) in
arm*)
# Use native arm compiler (no cross prefix)
CROSS_PREFIX=
;;
*)
# Use cross compiler
CROSS_PREFIX="arm-linux-gnueabihf-"
;;
esac
Build HOST side application
${CROSS_PREFIX}gcc -Ofast src/$1.c -o Debug/$1.elf ${EINCS} ${ELIBS} -le-hal -le-loader -lpthread
Build DEVICE side program
e-gcc -T ${ELDF} -Ofast $2 $3 $4 $5 $6 $7 $8 $9 src/e_$1.c -o Debug/e_$1.elf -le-lib
Convert ebinary to SREC file
e-objcopy --srec-forceS3 --output-target srec Debug/e_$1.elf Debug/e_$1.srec
parallella@parallella:~$ cat run.sh
!/bin/bash
set -e
cd Debug
./$1.elf $2 $3 $4 $5 $6 $7 $8 $9
parallella@parallella:$ ./build.sh dbg3$ ./run.sh dbg3
parallella@parallella:
Eternity II running under Parallella :)
(... works fine, 16 eCore shake hands as for Paralle2 1st version)
parallella@parallella:~$ e-objdump -d Debug/e_dbg3.elf > ok
parallella@parallella:$ ./build.sh dbg3 -msmall16$ ./run.sh dbg3
parallella@parallella:
Eternity II running under Parallella :)
(... KO, 0 eCore ending)
parallella@parallella:~$ e-objdump -d Debug/e_dbg3.elf > ko
parallella@parallella:~$ diff ok ko
244,254c244,254
< 758: 10d6 lsl r0,r4,0x6
< 75a: 3116 lsl r1,r4,0x8
< 75c: 243a sub r1,r1,r0
< 75e: 080b 0002 mov r0,0x40
< 762: 000b 1012 movt r0,0x100
< 766: 241a add r1,r1,r0
< 768: 355c 0400 str r1,[sp,+0x2]
< 76c: 0a0b 0002 mov r0,0x50
< 770: 3803 mov r1,0xc0
< 772: 8f0b 2092 mov r12,0x978
< 776: 35dc 0400 str r1,[sp,+0x3]
758: 3116 lsl r1,r4,0x8
75a: 10d6 lsl r0,r4,0x6
75c: 043a sub r0,r1,r0
75e: 280b 0002 mov r1,0x40
762: 200b 1012 movt r1,0x100
766: 009a add r0,r0,r1
768: 155c 0400 str r0,[sp,+0x2]
76c: 8f0b 2092 mov r12,0x978
770: 1803 mov r0,0xc0
772: 15dc 0400 str r0,[sp,+0x3]
776: 200b 0602 mov r1,0x6000
256,275c256,273
< 77c: 200b 0602 mov r1,0x6000
< 780: 6003 mov r3,0x0
< 782: 800b 3002 movt r12,0x0
< 786: 000b 1002 movt r0,0x0
< 78a: 115f 0402 jalr r12
< 78e: 01a2 nop
< 790: 000b 0002 mov r0,0x0
< 794: 080b 1002 movt r0,0x40
< 798: 101a add r0,r4,r0
< 79a: 200b 0002 mov r1,0x0
< 79e: 0056 lsl r0,r0,0x2
< 7a0: 200b 1402 movt r1,0x4000
< 7a4: 2054 str r1,[r0]
< 7a6: 77cc 2400 ldr fp,[sp,+0x7]
< 7aa: 10e2 mov r0,r4
< 7ac: 974c 0400 ldr r4,[sp,+0x6]
< 7b0: d6cc 2400 ldr lr,[sp,+0x5]
< 7b4: b41b 2403 add sp,sp,24
< 7b8: 194f 0402 rts< 7bc: 0000 beq 7bc <_main+0x8c>
77c: 6003 mov r3,0x0
77e: 800b 3002 movt r12,0x0
782: 004b 0002 mov r0,0x2
786: 115f 0402 jalr r12
78a: 01a2 nop
78c: 000b 0002 mov r0,0x0
790: 080b 1002 movt r0,0x40
794: 101a add r0,r4,r0
796: 200b 0002 mov r1,0x0
79a: 0056 lsl r0,r0,0x2
79c: 200b 1402 movt r1,0x4000
7a0: 2054 str r1,[r0]
7a2: 77cc 2400 ldr fp,[sp,+0x7]
7a6: 10e2 mov r0,r4
7a8: 974c 0400 ldr r4,[sp,+0x6]
7ac: d6cc 2400 ldr lr,[sp,+0x5]
7b0: b41b 2403 add sp,sp,24
7b4: 194f 0402 rts
parallella@parallella:~$
parallella@parallella:~$ cat src/e_dbg3.c
include <e-lib.h> //mandatory even for a minimalist design -- e_get_coreid(), e_read(), e_write()
define uint8_t unsigned char //avoid stdint.h
define uint16_t unsigned short
define uint32_t unsigned int
define uint64_t unsigned long long
define int64_t long long
define CORE_N 16
pragma pack(4)
//to DEVICE
typedef struct S_input {
int64_t tuile2do;
int bordertuile2do;
signed char tdam[180];
}Sinput;
//from DEVICE
typedef struct S_output {
int64_t globaltsolN[80];
int globalres;
int align8;
}Soutput;
//shared MEMORY
typedef struct S_io {
int tcmd[CORE_N];
Sinput tin [CORE_N];
Soutput tout[CORE_N];
}Sio;
define CMD_LEN (CORE_N * sizeof(uint)) //ARM handling Epiphany tasks: start, end
define CMD_INIT 0x80000000 //host init
define CMD_DONE 0x40000000 //eCore did the job properly (probably ; some bug might crush this word but it's highly improbable)
//ARM <-> Epiphany
define SHARED_RAM 0x01000000
define SHARED_IN 0x6000
define SHARED_OUT (SHARED_IN + sizeof(Sinput))
//volatile Sinput in;
//volatile Soutput out;
volatile Sinput in SECTION(".data_bank3");//0x6000
volatile Soutput out SECTION(".data_bank3");//0x6000 + sizeof(Sinput)
//#######################################
int main(void) {
e_coreid_t coreid;
int row, col, cmdI;
int cmd;// address of shared memory
int src;// address of shared memory source
coreid=e_get_coreid();//query the coreID from hardware
row=(coreid>>6) - 32;//dirty but OK for MY 16-core Epiphany
col=(coreid&15) - 8;
cmdI=(row<<2) + col;
src=SHARED_RAM + CMD_LEN + (cmdI * sizeof(Sinput));
//e_read((unsigned int *)&e_emem_config, (void *)&in, 0, 0, (void *)src, sizeof(Sinput));
e_read((unsigned int *)&e_emem_config, (void *)SHARED_IN, 0, 0, (void *)src, sizeof(Sinput));
cmd=SHARED_RAM + (cmdI *4);
asm("nop");
*(int *)cmd=CMD_DONE;
return cmdI;
}
my gcc-explorer output of e_dbg3.c: not handling the SECTION directive
-T /home/ylav/dev/parallella/epiphany-libs-2015.1/bsps/current/internal.ldf -Ofast -mfp-mode=int -msmall16 -le-lib
str r4,[sp],#-6
mov r0, %low(_e_get_coreid)
str lr,[sp,#5]
str fp,[sp,#7]
movt r0, %high(_e_get_coreid)
jalr r0
mov r1,#15
lsr r4,r0,#6
add r4,r4,#-32
and r0,r0,r1
add r0,r0,#-8
lsl r4,r4,#2
add r4,r4,r0
lsl r1,r4,#8
lsl r0,r4,#6
sub r0,r1,r0
mov r1, %low(#16777280)
movt r1, %high(#16777280)
add r0,r0,r1
str r0,[sp,#2]
mov ip, %low(_e_read)
mov r0,#192
str r0,[sp,#3]
mov r1,#24576
mov r2,#0
mov r3,#0
movt ip, %high(_e_read)
mov r0,_e_emem_config
jalr ip
; 62 "/tmp/gcc-explorer-compiler116412-10341-wxbyvm/example.c" 1
nop
; 0 "" 2
mov r0, %low(#4194304)
movt r0, %high(#4194304)
add r0,r4,r0
mov r1, %low(#1073741824)
lsl r0,r0,#2
movt r1, %high(#1073741824)
str r1,[r0]
ldr fp,[sp,#7]
mov r0,r4
ldr r4,[sp,#6]
ldr lr,[sp,#5]
add sp,sp,#24
rts