code: 9ferno

ref: 6bb619c8db2867ddd9cd19c0aec05065f5ee0cae
dir: /libkern/memmove-power.s/

View raw version
#define	BDNZ	BC	16,0,
	TEXT	memcpy(SB), $0
	BR	move

	TEXT	memmove(SB), $0
move:

/*
 * performance:
 * (tba)
 */

	MOVW	R3, s1+0(FP)
	MOVW	n+8(FP), R9		/* R9 is count */
	MOVW	R3, R10			/* R10 is to-pointer */
	CMP	R9, $0
	BEQ	ret
	BLT	trap
	MOVW	s2+4(FP), R11		/* R11 is from-pointer */

/*
 * if no more than 16 bytes, just use one lsw/stsw
 */
	CMP	R9, $16
	BLE	fout

	ADD	R9,R11, R13		/* R13 is end from-pointer */
	ADD	R9,R10, R12		/* R12 is end to-pointer */

/*
 * easiest test is copy backwards if
 * destination string has higher mem address
 */
	CMPU	R10, R11
	BGT	back

/*
 * test if both pointers
 * are similarly word aligned
 */
	XOR	R10,R11, R7
	ANDCC	$3,R7
	BNE	fbad

/*
 * move a few bytes to align pointers
 */
	ANDCC	$3,R10,R7
	BEQ	f2
	SUBC	R7, $4, R7
	SUB	R7, R9
	MOVW	R7, XER
	LSW	(R11), R16
	ADD	R7, R11
	STSW	R16, (R10)
	ADD	R7, R10

/*
 * turn R14 into doubleword count
 * copy 16 bytes at a time while there's room.
 */
f2:
	SRAWCC	$4, R9, R14
	BLE	fout
	MOVW	R14, CTR
	SUB	$4, R11
	SUB	$4, R10
f3:
	MOVWU	4(R11), R16
	MOVWU	R16, 4(R10)
	MOVWU	4(R11), R17
	MOVWU	R17, 4(R10)
	MOVWU	4(R11), R16
	MOVWU	R16, 4(R10)
	MOVWU	4(R11), R17
	MOVWU	R17, 4(R10)
	BDNZ	f3
	RLWNMCC	$0, R9, $15, R9	/* residue */
	BEQ	ret
	ADD	$4, R11
	ADD	$4, R10

/*
 * move up to 16 bytes through R16 .. R19; aligned and unaligned
 */
fout:
	MOVW	R9, XER
	LSW	(R11), R16
	STSW	R16, (R10)
	BR	ret

/*
 * loop for unaligned copy, then copy up to 15 remaining bytes
 */
fbad:
	SRAWCC	$4, R9, R14
	BLE	f6
	MOVW	R14, CTR
f5:
	LSW	(R11), $16, R16
	ADD	$16, R11
	STSW	R16, $16, (R10)
	ADD	$16, R10
	BDNZ	f5
	RLWNMCC	$0, R9, $15, R9	/* residue */
	BEQ	ret
f6:
	MOVW	R9, XER
	LSW	(R11), R16
	STSW	R16, (R10)
	BR	ret

/*
 * whole thing repeated for backwards
 */
back:
	CMP	R9, $4
	BLT	bout

	XOR	R12,R13, R7
	ANDCC	$3,R7
	BNE	bout
b1:
	ANDCC	$3,R13, R7
	BEQ	b2
	MOVBZU	-1(R13), R16
	MOVBZU	R16, -1(R12)
	SUB	$1, R9
	BR	b1
b2:
	SRAWCC	$4, R9, R14
	BLE	b4
	MOVW	R14, CTR
b3:
	MOVWU	-4(R13), R16
	MOVWU	R16, -4(R12)
	MOVWU	-4(R13), R17
	MOVWU	R17, -4(R12)
	MOVWU	-4(R13), R16
	MOVWU	R16, -4(R12)
	MOVWU	-4(R13), R17
	MOVWU	R17, -4(R12)
	BDNZ	b3
	RLWNMCC	$0, R9, $15, R9	/* residue */
	BEQ	ret
b4:
	SRAWCC	$2, R9, R14
	BLE	bout
	MOVW	R14, CTR
b5:
	MOVWU	-4(R13), R16
	MOVWU	R16, -4(R12)
	BDNZ	b5
	RLWNMCC	$0, R9, $3, R9	/* residue */
	BEQ	ret

bout:
	CMPU	R13, R11
	BLE	ret
	MOVBZU	-1(R13), R16
	MOVBZU	R16, -1(R12)
	BR	bout

trap:
/*	MOVW	$0, R0	*/
	MOVW	R0, 0(R0)

ret:
	MOVW	s1+0(FP), R3
	RETURN