Skip to content

Commit

Permalink
metag: Optimised library functions
Browse files Browse the repository at this point in the history
Add optimised library functions for metag.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
  • Loading branch information
James Hogan committed Mar 2, 2013
1 parent f507758 commit 086e9dc
Show file tree
Hide file tree
Showing 20 changed files with 1,474 additions and 0 deletions.
92 changes: 92 additions & 0 deletions arch/metag/include/asm/checksum.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#ifndef _METAG_CHECKSUM_H
#define _METAG_CHECKSUM_H

/*
* computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit)
*
* returns a 32-bit number suitable for feeding into itself
* or csum_tcpudp_magic
*
* this function must be called with even lengths, except
* for the last fragment, which may be odd
*
* it's best to have buff aligned on a 32-bit boundary
*/
extern __wsum csum_partial(const void *buff, int len, __wsum sum);

/*
* the same as csum_partial, but copies from src while it
* checksums
*
* here even more important to align src and dst on a 32-bit (or even
* better 64-bit) boundary
*/
extern __wsum csum_partial_copy(const void *src, void *dst, int len,
__wsum sum);

/*
* the same as csum_partial_copy, but copies from user space.
*
* here even more important to align src and dst on a 32-bit (or even
* better 64-bit) boundary
*/
extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
int len, __wsum sum, int *csum_err);

#define csum_partial_copy_nocheck(src, dst, len, sum) \
csum_partial_copy((src), (dst), (len), (sum))

/*
* Fold a partial checksum
*/
static inline __sum16 csum_fold(__wsum csum)
{
u32 sum = (__force u32)csum;
sum = (sum & 0xffff) + (sum >> 16);
sum = (sum & 0xffff) + (sum >> 16);
return (__force __sum16)~sum;
}

/*
* This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries.
*/
extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);

/*
* computes the checksum of the TCP/UDP pseudo-header
* returns a 16-bit checksum, already complemented
*/
static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
unsigned short len,
unsigned short proto,
__wsum sum)
{
unsigned long len_proto = (proto + len) << 8;
asm ("ADD %0, %0, %1\n"
"ADDS %0, %0, %2\n"
"ADDCS %0, %0, #1\n"
"ADDS %0, %0, %3\n"
"ADDCS %0, %0, #1\n"
: "=d" (sum)
: "d" (daddr), "d" (saddr), "d" (len_proto),
"0" (sum)
: "cc");
return sum;
}

static inline __sum16
csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len,
unsigned short proto, __wsum sum)
{
return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
}

/*
* this routine is used for miscellaneous IP-like checksums, mainly
* in icmp.c
*/
extern __sum16 ip_compute_csum(const void *buff, int len);

#endif /* _METAG_CHECKSUM_H */
12 changes: 12 additions & 0 deletions arch/metag/include/asm/div64.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#ifndef __ASM_DIV64_H__
#define __ASM_DIV64_H__

#include <asm-generic/div64.h>

extern u64 div_u64(u64 dividend, u64 divisor);
extern s64 div_s64(s64 dividend, s64 divisor);

#define div_u64 div_u64
#define div_s64 div_s64

#endif
13 changes: 13 additions & 0 deletions arch/metag/include/asm/string.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#ifndef _METAG_STRING_H_
#define _METAG_STRING_H_

#define __HAVE_ARCH_MEMSET
extern void *memset(void *__s, int __c, size_t __count);

#define __HAVE_ARCH_MEMCPY
void *memcpy(void *__to, __const__ void *__from, size_t __n);

#define __HAVE_ARCH_MEMMOVE
extern void *memmove(void *__dest, __const__ void *__src, size_t __n);

#endif /* _METAG_STRING_H_ */
33 changes: 33 additions & 0 deletions arch/metag/lib/ashldi3.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
! Copyright (C) 2012 by Imagination Technologies Ltd.
!
! 64-bit arithmetic shift left routine.
!

.text
.global ___ashldi3
.type ___ashldi3,function

___ashldi3:
MOV D0Re0,D0Ar2
MOV D1Re0,D1Ar1
CMP D1Ar3,#0 ! COUNT == 0
MOVEQ PC,D1RtP ! Yes, return

SUBS D0Ar4,D1Ar3,#32 ! N = COUNT - 32
BGE $L10

!! Shift < 32
NEG D0Ar4,D0Ar4 ! N = - N
LSL D1Re0,D1Re0,D1Ar3 ! HI = HI << COUNT
LSR D0Ar6,D0Re0,D0Ar4 ! TMP= LO >> -(COUNT - 32)
OR D1Re0,D1Re0,D0Ar6 ! HI = HI | TMP
SWAP D0Ar4,D1Ar3
LSL D0Re0,D0Re0,D0Ar4 ! LO = LO << COUNT
MOV PC,D1RtP

$L10:
!! Shift >= 32
LSL D1Re0,D0Re0,D0Ar4 ! HI = LO << N
MOV D0Re0,#0 ! LO = 0
MOV PC,D1RtP
.size ___ashldi3,.-___ashldi3
33 changes: 33 additions & 0 deletions arch/metag/lib/ashrdi3.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
! Copyright (C) 2012 by Imagination Technologies Ltd.
!
! 64-bit arithmetic shift right routine.
!

.text
.global ___ashrdi3
.type ___ashrdi3,function

___ashrdi3:
MOV D0Re0,D0Ar2
MOV D1Re0,D1Ar1
CMP D1Ar3,#0 ! COUNT == 0
MOVEQ PC,D1RtP ! Yes, return

MOV D0Ar4,D1Ar3
SUBS D1Ar3,D1Ar3,#32 ! N = COUNT - 32
BGE $L20

!! Shift < 32
NEG D1Ar3,D1Ar3 ! N = - N
LSR D0Re0,D0Re0,D0Ar4 ! LO = LO >> COUNT
LSL D0Ar6,D1Re0,D1Ar3 ! TMP= HI << -(COUNT - 32)
OR D0Re0,D0Re0,D0Ar6 ! LO = LO | TMP
SWAP D1Ar3,D0Ar4
ASR D1Re0,D1Re0,D1Ar3 ! HI = HI >> COUNT
MOV PC,D1RtP
$L20:
!! Shift >= 32
ASR D0Re0,D1Re0,D1Ar3 ! LO = HI >> N
ASR D1Re0,D1Re0,#31 ! HI = HI >> 31
MOV PC,D1RtP
.size ___ashrdi3,.-___ashrdi3
168 changes: 168 additions & 0 deletions arch/metag/lib/checksum.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
/*
*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* IP/TCP/UDP checksumming routines
*
* Authors: Jorge Cwik, <jorge@laser.satlink.net>
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
* Tom May, <ftom@netcom.com>
* Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>
* Lots of code moved from tcp.c and ip.c; see those files
* for more names.
*
* 03/02/96 Jes Sorensen, Andreas Schwab, Roman Hodek:
* Fixed some nasty bugs, causing some horrible crashes.
* A: At some points, the sum (%0) was used as
* length-counter instead of the length counter
* (%1). Thanks to Roman Hodek for pointing this out.
* B: GCC seems to mess up if one uses too many
* data-registers to hold input values and one tries to
* specify d0 and d1 as scratch registers. Letting gcc
* choose these registers itself solves the problem.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/

/* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access
kills, so most of the assembly has to go. */

#include <linux/module.h>
#include <net/checksum.h>

#include <asm/byteorder.h>

static inline unsigned short from32to16(unsigned int x)
{
/* add up 16-bit and 16-bit for 16+c bit */
x = (x & 0xffff) + (x >> 16);
/* add up carry.. */
x = (x & 0xffff) + (x >> 16);
return x;
}

static unsigned int do_csum(const unsigned char *buff, int len)
{
int odd;
unsigned int result = 0;

if (len <= 0)
goto out;
odd = 1 & (unsigned long) buff;
if (odd) {
#ifdef __LITTLE_ENDIAN
result += (*buff << 8);
#else
result = *buff;
#endif
len--;
buff++;
}
if (len >= 2) {
if (2 & (unsigned long) buff) {
result += *(unsigned short *) buff;
len -= 2;
buff += 2;
}
if (len >= 4) {
const unsigned char *end = buff + ((unsigned)len & ~3);
unsigned int carry = 0;
do {
unsigned int w = *(unsigned int *) buff;
buff += 4;
result += carry;
result += w;
carry = (w > result);
} while (buff < end);
result += carry;
result = (result & 0xffff) + (result >> 16);
}
if (len & 2) {
result += *(unsigned short *) buff;
buff += 2;
}
}
if (len & 1)
#ifdef __LITTLE_ENDIAN
result += *buff;
#else
result += (*buff << 8);
#endif
result = from32to16(result);
if (odd)
result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
out:
return result;
}
EXPORT_SYMBOL(ip_fast_csum);

/*
* computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit)
*
* returns a 32-bit number suitable for feeding into itself
* or csum_tcpudp_magic
*
* this function must be called with even lengths, except
* for the last fragment, which may be odd
*
* it's best to have buff aligned on a 32-bit boundary
*/
__wsum csum_partial(const void *buff, int len, __wsum wsum)
{
unsigned int sum = (__force unsigned int)wsum;
unsigned int result = do_csum(buff, len);

/* add in old sum, and carry.. */
result += sum;
if (sum > result)
result += 1;
return (__force __wsum)result;
}
EXPORT_SYMBOL(csum_partial);

/*
* this routine is used for miscellaneous IP-like checksums, mainly
* in icmp.c
*/
__sum16 ip_compute_csum(const void *buff, int len)
{
return (__force __sum16)~do_csum(buff, len);
}
EXPORT_SYMBOL(ip_compute_csum);

/*
* copy from fs while checksumming, otherwise like csum_partial
*/
__wsum
csum_partial_copy_from_user(const void __user *src, void *dst, int len,
__wsum sum, int *csum_err)
{
int missing;

missing = __copy_from_user(dst, src, len);
if (missing) {
memset(dst + len - missing, 0, missing);
*csum_err = -EFAULT;
} else
*csum_err = 0;

return csum_partial(dst, len, sum);
}
EXPORT_SYMBOL(csum_partial_copy_from_user);

/*
* copy from ds while checksumming, otherwise like csum_partial
*/
__wsum
csum_partial_copy(const void *src, void *dst, int len, __wsum sum)
{
memcpy(dst, src, len);
return csum_partial(dst, len, sum);
}
EXPORT_SYMBOL(csum_partial_copy);
17 changes: 17 additions & 0 deletions arch/metag/lib/clear_page.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
! Copyright 2007,2008,2009 Imagination Technologies Ltd.

#include <asm/page.h>

.text
.global _clear_page
.type _clear_page,function
!! D1Ar1 - page
_clear_page:
MOV TXRPT,#((PAGE_SIZE / 8) - 1)
MOV D0Re0,#0
MOV D1Re0,#0
$Lclear_page_loop:
SETL [D1Ar1++],D0Re0,D1Re0
BR $Lclear_page_loop
MOV PC,D1RtP
.size _clear_page,.-_clear_page
Loading

0 comments on commit 086e9dc

Please sign in to comment.