diff --git a/[refs] b/[refs]
index 7ae54205f76d..1de5906e2a0b 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
---
-refs/heads/master: 86e1d57e4f24ca27ce813bdc2afaac4adafcbaf4
+refs/heads/master: a421018e8c10e5593a1fee076af72a66c3fe8ca3
diff --git a/trunk/.gitignore b/trunk/.gitignore
index b93fb7eff942..946c7ec5c922 100644
--- a/trunk/.gitignore
+++ b/trunk/.gitignore
@@ -25,6 +25,7 @@
*.elf
*.bin
*.gz
+*.bz2
*.lzma
*.patch
*.gcno
diff --git a/trunk/Documentation/DocBook/tracepoint.tmpl b/trunk/Documentation/DocBook/tracepoint.tmpl
index b0756d0fd579..8bca1d5cec09 100644
--- a/trunk/Documentation/DocBook/tracepoint.tmpl
+++ b/trunk/Documentation/DocBook/tracepoint.tmpl
@@ -86,4 +86,9 @@
!Iinclude/trace/events/irq.h
+
+ SIGNAL
+!Iinclude/trace/events/signal.h
+
+
diff --git a/trunk/Documentation/RCU/trace.txt b/trunk/Documentation/RCU/trace.txt
index 187bbf10c923..8608fd85e921 100644
--- a/trunk/Documentation/RCU/trace.txt
+++ b/trunk/Documentation/RCU/trace.txt
@@ -1,185 +1,10 @@
CONFIG_RCU_TRACE debugfs Files and Formats
-The rcupreempt and rcutree implementations of RCU provide debugfs trace
-output that summarizes counters and state. This information is useful for
-debugging RCU itself, and can sometimes also help to debug abuses of RCU.
-Note that the rcuclassic implementation of RCU does not provide debugfs
-trace output.
-
-The following sections describe the debugfs files and formats for
-preemptable RCU (rcupreempt) and hierarchical RCU (rcutree).
-
-
-Preemptable RCU debugfs Files and Formats
-
-This implementation of RCU provides three debugfs files under the
-top-level directory RCU: rcu/rcuctrs (which displays the per-CPU
-counters used by preemptable RCU) rcu/rcugp (which displays grace-period
-counters), and rcu/rcustats (which internal counters for debugging RCU).
-
-The output of "cat rcu/rcuctrs" looks as follows:
-
-CPU last cur F M
- 0 5 -5 0 0
- 1 -1 0 0 0
- 2 0 1 0 0
- 3 0 1 0 0
- 4 0 1 0 0
- 5 0 1 0 0
- 6 0 2 0 0
- 7 0 -1 0 0
- 8 0 1 0 0
-ggp = 26226, state = waitzero
-
-The per-CPU fields are as follows:
-
-o "CPU" gives the CPU number. Offline CPUs are not displayed.
-
-o "last" gives the value of the counter that is being decremented
- for the current grace period phase. In the example above,
- the counters sum to 4, indicating that there are still four
- RCU read-side critical sections still running that started
- before the last counter flip.
-
-o "cur" gives the value of the counter that is currently being
- both incremented (by rcu_read_lock()) and decremented (by
- rcu_read_unlock()). In the example above, the counters sum to
- 1, indicating that there is only one RCU read-side critical section
- still running that started after the last counter flip.
-
-o "F" indicates whether RCU is waiting for this CPU to acknowledge
- a counter flip. In the above example, RCU is not waiting on any,
- which is consistent with the state being "waitzero" rather than
- "waitack".
-
-o "M" indicates whether RCU is waiting for this CPU to execute a
- memory barrier. In the above example, RCU is not waiting on any,
- which is consistent with the state being "waitzero" rather than
- "waitmb".
-
-o "ggp" is the global grace-period counter.
-
-o "state" is the RCU state, which can be one of the following:
-
- o "idle": there is no grace period in progress.
-
- o "waitack": RCU just incremented the global grace-period
- counter, which has the effect of reversing the roles of
- the "last" and "cur" counters above, and is waiting for
- all the CPUs to acknowledge the flip. Once the flip has
- been acknowledged, CPUs will no longer be incrementing
- what are now the "last" counters, so that their sum will
- decrease monotonically down to zero.
-
- o "waitzero": RCU is waiting for the sum of the "last" counters
- to decrease to zero.
-
- o "waitmb": RCU is waiting for each CPU to execute a memory
- barrier, which ensures that instructions from a given CPU's
- last RCU read-side critical section cannot be reordered
- with instructions following the memory-barrier instruction.
-
-The output of "cat rcu/rcugp" looks as follows:
-
-oldggp=48870 newggp=48873
-
-Note that reading from this file provokes a synchronize_rcu(). The
-"oldggp" value is that of "ggp" from rcu/rcuctrs above, taken before
-executing the synchronize_rcu(), and the "newggp" value is also the
-"ggp" value, but taken after the synchronize_rcu() command returns.
-
-
-The output of "cat rcu/rcugp" looks as follows:
-
-na=1337955 nl=40 wa=1337915 wl=44 da=1337871 dl=0 dr=1337871 di=1337871
-1=50989 e1=6138 i1=49722 ie1=82 g1=49640 a1=315203 ae1=265563 a2=49640
-z1=1401244 ze1=1351605 z2=49639 m1=5661253 me1=5611614 m2=49639
-
-These are counters tracking internal preemptable-RCU events, however,
-some of them may be useful for debugging algorithms using RCU. In
-particular, the "nl", "wl", and "dl" values track the number of RCU
-callbacks in various states. The fields are as follows:
-
-o "na" is the total number of RCU callbacks that have been enqueued
- since boot.
-
-o "nl" is the number of RCU callbacks waiting for the previous
- grace period to end so that they can start waiting on the next
- grace period.
-
-o "wa" is the total number of RCU callbacks that have started waiting
- for a grace period since boot. "na" should be roughly equal to
- "nl" plus "wa".
-
-o "wl" is the number of RCU callbacks currently waiting for their
- grace period to end.
-
-o "da" is the total number of RCU callbacks whose grace periods
- have completed since boot. "wa" should be roughly equal to
- "wl" plus "da".
-
-o "dr" is the total number of RCU callbacks that have been removed
- from the list of callbacks ready to invoke. "dr" should be roughly
- equal to "da".
-
-o "di" is the total number of RCU callbacks that have been invoked
- since boot. "di" should be roughly equal to "da", though some
- early versions of preemptable RCU had a bug so that only the
- last CPU's count of invocations was displayed, rather than the
- sum of all CPU's counts.
-
-o "1" is the number of calls to rcu_try_flip(). This should be
- roughly equal to the sum of "e1", "i1", "a1", "z1", and "m1"
- described below. In other words, the number of times that
- the state machine is visited should be equal to the sum of the
- number of times that each state is visited plus the number of
- times that the state-machine lock acquisition failed.
-
-o "e1" is the number of times that rcu_try_flip() was unable to
- acquire the fliplock.
-
-o "i1" is the number of calls to rcu_try_flip_idle().
-
-o "ie1" is the number of times rcu_try_flip_idle() exited early
- due to the calling CPU having no work for RCU.
-
-o "g1" is the number of times that rcu_try_flip_idle() decided
- to start a new grace period. "i1" should be roughly equal to
- "ie1" plus "g1".
-
-o "a1" is the number of calls to rcu_try_flip_waitack().
-
-o "ae1" is the number of times that rcu_try_flip_waitack() found
- that at least one CPU had not yet acknowledge the new grace period
- (AKA "counter flip").
-
-o "a2" is the number of time rcu_try_flip_waitack() found that
- all CPUs had acknowledged. "a1" should be roughly equal to
- "ae1" plus "a2". (This particular output was collected on
- a 128-CPU machine, hence the smaller-than-usual fraction of
- calls to rcu_try_flip_waitack() finding all CPUs having already
- acknowledged.)
-
-o "z1" is the number of calls to rcu_try_flip_waitzero().
-
-o "ze1" is the number of times that rcu_try_flip_waitzero() found
- that not all of the old RCU read-side critical sections had
- completed.
-
-o "z2" is the number of times that rcu_try_flip_waitzero() finds
- the sum of the counters equal to zero, in other words, that
- all of the old RCU read-side critical sections had completed.
- The value of "z1" should be roughly equal to "ze1" plus
- "z2".
-
-o "m1" is the number of calls to rcu_try_flip_waitmb().
-
-o "me1" is the number of times that rcu_try_flip_waitmb() finds
- that at least one CPU has not yet executed a memory barrier.
-
-o "m2" is the number of times that rcu_try_flip_waitmb() finds that
- all CPUs have executed a memory barrier.
+The rcutree implementation of RCU provides debugfs trace output that
+summarizes counters and state. This information is useful for debugging
+RCU itself, and can sometimes also help to debug abuses of RCU.
+The following sections describe the debugfs files and formats.
Hierarchical RCU debugfs Files and Formats
@@ -210,9 +35,10 @@ rcu_bh:
6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
-The first section lists the rcu_data structures for rcu, the second for
-rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system.
-The fields are as follows:
+The first section lists the rcu_data structures for rcu_sched, the second
+for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an
+additional section for rcu_preempt. Each section has one line per CPU,
+or eight for this 8-CPU system. The fields are as follows:
o The number at the beginning of each line is the CPU number.
CPUs numbers followed by an exclamation mark are offline,
@@ -223,9 +49,9 @@ o The number at the beginning of each line is the CPU number.
o "c" is the count of grace periods that this CPU believes have
completed. CPUs in dynticks idle mode may lag quite a ways
- behind, for example, CPU 4 under "rcu" above, which has slept
- through the past 25 RCU grace periods. It is not unusual to
- see CPUs lagging by thousands of grace periods.
+ behind, for example, CPU 4 under "rcu_sched" above, which has
+ slept through the past 25 RCU grace periods. It is not unusual
+ to see CPUs lagging by thousands of grace periods.
o "g" is the count of grace periods that this CPU believes have
started. Again, CPUs in dynticks idle mode may lag behind.
@@ -308,8 +134,10 @@ The output of "cat rcu/rcugp" looks as follows:
rcu_sched: completed=33062 gpnum=33063
rcu_bh: completed=464 gpnum=464
-Again, this output is for both "rcu" and "rcu_bh". The fields are
-taken from the rcu_state structure, and are as follows:
+Again, this output is for both "rcu_sched" and "rcu_bh". Note that
+kernels built with CONFIG_TREE_PREEMPT_RCU will have an additional
+"rcu_preempt" line. The fields are taken from the rcu_state structure,
+and are as follows:
o "completed" is the number of grace periods that have completed.
It is comparable to the "c" field from rcu/rcudata in that a
@@ -324,23 +152,24 @@ o "gpnum" is the number of grace periods that have started. It is
If these two fields are equal (as they are for "rcu_bh" above),
then there is no grace period in progress, in other words, RCU
is idle. On the other hand, if the two fields differ (as they
- do for "rcu" above), then an RCU grace period is in progress.
+ do for "rcu_sched" above), then an RCU grace period is in progress.
The output of "cat rcu/rcuhier" looks as follows, with very long lines:
-c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6
-1/1 0:127 ^0
-3/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3
-3/3f 0:5 ^0 2/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3
+c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 oqlen=0
+1/1 .>. 0:127 ^0
+3/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3
+3/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3
rcu_bh:
-c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0
-0/1 0:127 ^0
-0/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3
-0/3f 0:5 ^0 0/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3
+c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 oqlen=0
+0/1 .>. 0:127 ^0
+0/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3
+0/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3
-This is once again split into "rcu" and "rcu_bh" portions. The fields are
-as follows:
+This is once again split into "rcu_sched" and "rcu_bh" portions,
+and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional
+"rcu_preempt" section. The fields are as follows:
o "c" is exactly the same as "completed" under rcu/rcugp.
@@ -372,6 +201,11 @@ o "fqlh" is the number of calls to force_quiescent_state() that
exited immediately (without even being counted in nfqs above)
due to contention on ->fqslock.
+o "oqlen" is the number of callbacks on the "orphan" callback
+ list. RCU callbacks are placed on this list by CPUs going
+ offline, and are "adopted" either by the CPU helping the outgoing
+ CPU or by the next rcu_barrier*() call, whichever comes first.
+
o Each element of the form "1/1 0:127 ^0" represents one struct
rcu_node. Each line represents one level of the hierarchy, from
root to leaves. It is best to think of the rcu_data structures
@@ -379,7 +213,7 @@ o Each element of the form "1/1 0:127 ^0" represents one struct
might be either one, two, or three levels of rcu_node structures,
depending on the relationship between CONFIG_RCU_FANOUT and
CONFIG_NR_CPUS.
-
+
o The numbers separated by the "/" are the qsmask followed
by the qsmaskinit. The qsmask will have one bit
set for each entity in the next lower level that
@@ -389,10 +223,19 @@ o Each element of the form "1/1 0:127 ^0" represents one struct
The value of qsmaskinit is assigned to that of qsmask
at the beginning of each grace period.
- For example, for "rcu", the qsmask of the first entry
- of the lowest level is 0x14, meaning that we are still
- waiting for CPUs 2 and 4 to check in for the current
- grace period.
+ For example, for "rcu_sched", the qsmask of the first
+ entry of the lowest level is 0x14, meaning that we
+ are still waiting for CPUs 2 and 4 to check in for the
+ current grace period.
+
+ o The characters separated by the ">" indicate the state
+ of the blocked-tasks lists. A "T" preceding the ">"
+ indicates that at least one task blocked in an RCU
+ read-side critical section blocks the current grace
+ period, while a "." preceding the ">" indicates otherwise.
+ The character following the ">" indicates similarly for
+ the next grace period. A "T" should appear in this
+ field only for rcu-preempt.
o The numbers separated by the ":" are the range of CPUs
served by this struct rcu_node. This can be helpful
@@ -431,8 +274,9 @@ rcu_bh:
6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921
7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542
-As always, this is once again split into "rcu" and "rcu_bh" portions.
-The fields are as follows:
+As always, this is once again split into "rcu_sched" and "rcu_bh"
+portions, with CONFIG_TREE_PREEMPT_RCU kernels having an additional
+"rcu_preempt" section. The fields are as follows:
o "np" is the number of times that __rcu_pending() has been invoked
for the corresponding flavor of RCU.
diff --git a/trunk/Documentation/RCU/whatisRCU.txt b/trunk/Documentation/RCU/whatisRCU.txt
index e41a7fecf0d3..d542ca243b80 100644
--- a/trunk/Documentation/RCU/whatisRCU.txt
+++ b/trunk/Documentation/RCU/whatisRCU.txt
@@ -830,7 +830,7 @@ sched: Critical sections Grace period Barrier
SRCU: Critical sections Grace period Barrier
srcu_read_lock synchronize_srcu N/A
- srcu_read_unlock
+ srcu_read_unlock synchronize_srcu_expedited
SRCU: Initialization/cleanup
init_srcu_struct
diff --git a/trunk/Documentation/dontdiff b/trunk/Documentation/dontdiff
index e1efc400bed6..e151b2a36267 100644
--- a/trunk/Documentation/dontdiff
+++ b/trunk/Documentation/dontdiff
@@ -65,6 +65,7 @@ aicdb.h*
asm-offsets.h
asm_offsets.h
autoconf.h*
+av_permissions.h
bbootsect
bin2c
binkernel.spec
@@ -95,12 +96,14 @@ docproc
elf2ecoff
elfconfig.h*
fixdep
+flask.h
fore200e_mkfirm
fore200e_pca_fw.c*
gconf
gen-devlist
gen_crc32table
gen_init_cpio
+genheaders
genksyms
*_gray256.c
ihex2fw
diff --git a/trunk/Documentation/fb/framebuffer.txt b/trunk/Documentation/fb/framebuffer.txt
index b3e3a0356839..fe79e3c8847d 100644
--- a/trunk/Documentation/fb/framebuffer.txt
+++ b/trunk/Documentation/fb/framebuffer.txt
@@ -312,10 +312,8 @@ and to the following documentation:
8. Mailing list
---------------
-There are several frame buffer device related mailing lists at SourceForge:
- - linux-fbdev-announce@lists.sourceforge.net, for announcements,
- - linux-fbdev-user@lists.sourceforge.net, for generic user support,
- - linux-fbdev-devel@lists.sourceforge.net, for project developers.
+There is a frame buffer device related mailing list at kernel.org:
+linux-fbdev@vger.kernel.org.
Point your web browser to http://sourceforge.net/projects/linux-fbdev/ for
subscription information and archive browsing.
diff --git a/trunk/Documentation/feature-removal-schedule.txt b/trunk/Documentation/feature-removal-schedule.txt
index bc693fffabe0..f613df8ec7bf 100644
--- a/trunk/Documentation/feature-removal-schedule.txt
+++ b/trunk/Documentation/feature-removal-schedule.txt
@@ -6,6 +6,21 @@ be removed from this file.
---------------------------
+What: USER_SCHED
+When: 2.6.34
+
+Why: USER_SCHED was implemented as a proof of concept for group scheduling.
+ The effect of USER_SCHED can already be achieved from userspace with
+ the help of libcgroup. The removal of USER_SCHED will also simplify
+ the scheduler code with the removal of one major ifdef. There are also
+ issues USER_SCHED has with USER_NS. A decision was taken not to fix
+ those and instead remove USER_SCHED. Also new group scheduling
+ features will not be implemented for USER_SCHED.
+
+Who: Dhaval Giani
+
+---------------------------
+
What: PRISM54
When: 2.6.34
diff --git a/trunk/Documentation/filesystems/caching/fscache.txt b/trunk/Documentation/filesystems/caching/fscache.txt
index 9e94b9491d89..a91e2e2095b0 100644
--- a/trunk/Documentation/filesystems/caching/fscache.txt
+++ b/trunk/Documentation/filesystems/caching/fscache.txt
@@ -235,6 +235,7 @@ proc files.
neg=N Number of negative lookups made
pos=N Number of positive lookups made
crt=N Number of objects created by lookup
+ tmo=N Number of lookups timed out and requeued
Updates n=N Number of update cookie requests seen
nul=N Number of upd reqs given a NULL parent
run=N Number of upd reqs granted CPU time
@@ -250,8 +251,10 @@ proc files.
ok=N Number of successful alloc reqs
wt=N Number of alloc reqs that waited on lookup completion
nbf=N Number of alloc reqs rejected -ENOBUFS
+ int=N Number of alloc reqs aborted -ERESTARTSYS
ops=N Number of alloc reqs submitted
owt=N Number of alloc reqs waited for CPU time
+ abt=N Number of alloc reqs aborted due to object death
Retrvls n=N Number of retrieval (read) requests seen
ok=N Number of successful retr reqs
wt=N Number of retr reqs that waited on lookup completion
@@ -261,6 +264,7 @@ proc files.
oom=N Number of retr reqs failed -ENOMEM
ops=N Number of retr reqs submitted
owt=N Number of retr reqs waited for CPU time
+ abt=N Number of retr reqs aborted due to object death
Stores n=N Number of storage (write) requests seen
ok=N Number of successful store reqs
agn=N Number of store reqs on a page already pending storage
@@ -268,12 +272,37 @@ proc files.
oom=N Number of store reqs failed -ENOMEM
ops=N Number of store reqs submitted
run=N Number of store reqs granted CPU time
+ pgs=N Number of pages given store req processing time
+ rxd=N Number of store reqs deleted from tracking tree
+ olm=N Number of store reqs over store limit
+ VmScan nos=N Number of release reqs against pages with no pending store
+ gon=N Number of release reqs against pages stored by time lock granted
+ bsy=N Number of release reqs ignored due to in-progress store
+ can=N Number of page stores cancelled due to release req
Ops pend=N Number of times async ops added to pending queues
run=N Number of times async ops given CPU time
enq=N Number of times async ops queued for processing
+ can=N Number of async ops cancelled
+ rej=N Number of async ops rejected due to object lookup/create failure
dfr=N Number of async ops queued for deferred release
rel=N Number of async ops released
gc=N Number of deferred-release async ops garbage collected
+ CacheOp alo=N Number of in-progress alloc_object() cache ops
+ luo=N Number of in-progress lookup_object() cache ops
+ luc=N Number of in-progress lookup_complete() cache ops
+ gro=N Number of in-progress grab_object() cache ops
+ upo=N Number of in-progress update_object() cache ops
+ dro=N Number of in-progress drop_object() cache ops
+ pto=N Number of in-progress put_object() cache ops
+ syn=N Number of in-progress sync_cache() cache ops
+ atc=N Number of in-progress attr_changed() cache ops
+ rap=N Number of in-progress read_or_alloc_page() cache ops
+ ras=N Number of in-progress read_or_alloc_pages() cache ops
+ alp=N Number of in-progress allocate_page() cache ops
+ als=N Number of in-progress allocate_pages() cache ops
+ wrp=N Number of in-progress write_page() cache ops
+ ucp=N Number of in-progress uncache_page() cache ops
+ dsp=N Number of in-progress dissociate_pages() cache ops
(*) /proc/fs/fscache/histogram
@@ -299,6 +328,87 @@ proc files.
jiffy range covered, and the SECS field the equivalent number of seconds.
+===========
+OBJECT LIST
+===========
+
+If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a
+list of all the objects currently allocated and allow them to be viewed
+through:
+
+ /proc/fs/fscache/objects
+
+This will look something like:
+
+ [root@andromeda ~]# head /proc/fs/fscache/objects
+ OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA OBJECT_KEY, AUX_DATA
+ ======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================
+ 17e4b 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a
+ 1693a 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a
+
+where the first set of columns before the '|' describe the object:
+
+ COLUMN DESCRIPTION
+ ======= ===============================================================
+ OBJECT Object debugging ID (appears as OBJ%x in some debug messages)
+ PARENT Debugging ID of parent object
+ STAT Object state
+ CHLDN Number of child objects of this object
+ OPS Number of outstanding operations on this object
+ OOP Number of outstanding child object management operations
+ IPR
+ EX Number of outstanding exclusive operations
+ READS Number of outstanding read operations
+ EM Object's event mask
+ EV Events raised on this object
+ F Object flags
+ S Object slow-work work item flags
+
+and the second set of columns describe the object's cookie, if present:
+
+ COLUMN DESCRIPTION
+ =============== =======================================================
+ NETFS_COOKIE_DEF Name of netfs cookie definition
+ TY Cookie type (IX - index, DT - data, hex - special)
+ FL Cookie flags
+ NETFS_DATA Netfs private data stored in the cookie
+ OBJECT_KEY Object key } 1 column, with separating comma
+ AUX_DATA Object aux data } presence may be configured
+
+The data shown may be filtered by attaching the a key to an appropriate keyring
+before viewing the file. Something like:
+
+ keyctl add user fscache:objlist @s
+
+where are a selection of the following letters:
+
+ K Show hexdump of object key (don't show if not given)
+ A Show hexdump of object aux data (don't show if not given)
+
+and the following paired letters:
+
+ C Show objects that have a cookie
+ c Show objects that don't have a cookie
+ B Show objects that are busy
+ b Show objects that aren't busy
+ W Show objects that have pending writes
+ w Show objects that don't have pending writes
+ R Show objects that have outstanding reads
+ r Show objects that don't have outstanding reads
+ S Show objects that have slow work queued
+ s Show objects that don't have slow work queued
+
+If neither side of a letter pair is given, then both are implied. For example:
+
+ keyctl add user fscache:objlist KB @s
+
+shows objects that are busy, and lists their object keys, but does not dump
+their auxiliary data. It also implies "CcWwRrSs", but as 'B' is given, 'b' is
+not implied.
+
+By default all objects and all fields will be shown.
+
+
=========
DEBUGGING
=========
diff --git a/trunk/Documentation/filesystems/caching/netfs-api.txt b/trunk/Documentation/filesystems/caching/netfs-api.txt
index 2666b1ed5e9e..1902c57b72ef 100644
--- a/trunk/Documentation/filesystems/caching/netfs-api.txt
+++ b/trunk/Documentation/filesystems/caching/netfs-api.txt
@@ -641,7 +641,7 @@ data file must be retired (see the relinquish cookie function below).
Furthermore, note that this does not cancel the asynchronous read or write
operation started by the read/alloc and write functions, so the page
-invalidation and release functions must use:
+invalidation functions must use:
bool fscache_check_page_write(struct fscache_cookie *cookie,
struct page *page);
@@ -654,6 +654,25 @@ to see if a page is being written to the cache, and:
to wait for it to finish if it is.
+When releasepage() is being implemented, a special FS-Cache function exists to
+manage the heuristics of coping with vmscan trying to eject pages, which may
+conflict with the cache trying to write pages to the cache (which may itself
+need to allocate memory):
+
+ bool fscache_maybe_release_page(struct fscache_cookie *cookie,
+ struct page *page,
+ gfp_t gfp);
+
+This takes the netfs cookie, and the page and gfp arguments as supplied to
+releasepage(). It will return false if the page cannot be released yet for
+some reason and if it returns true, the page has been uncached and can now be
+released.
+
+To make a page available for release, this function may wait for an outstanding
+storage request to complete, or it may attempt to cancel the storage request -
+in which case the page will not be stored in the cache this time.
+
+
==========================
INDEX AND DATA FILE UPDATE
==========================
diff --git a/trunk/Documentation/filesystems/ocfs2.txt b/trunk/Documentation/filesystems/ocfs2.txt
index c2a0871280a0..c58b9f5ba002 100644
--- a/trunk/Documentation/filesystems/ocfs2.txt
+++ b/trunk/Documentation/filesystems/ocfs2.txt
@@ -20,15 +20,16 @@ Lots of code taken from ext3 and other projects.
Authors in alphabetical order:
Joel Becker
Zach Brown
-Mark Fasheh
+Mark Fasheh
Kurt Hackel
+Tao Ma
Sunil Mushran
Manish Singh
+Tiger Yang
Caveats
=======
Features which OCFS2 does not support yet:
- - quotas
- Directory change notification (F_NOTIFY)
- Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
@@ -70,7 +71,6 @@ commit=nrsec (*) Ocfs2 can be told to sync all its data and metadata
performance.
localalloc=8(*) Allows custom localalloc size in MB. If the value is too
large, the fs will silently revert it to the default.
- Localalloc is not enabled for local mounts.
localflocks This disables cluster aware flock.
inode64 Indicates that Ocfs2 is allowed to create inodes at
any location in the filesystem, including those which
diff --git a/trunk/Documentation/filesystems/proc.txt b/trunk/Documentation/filesystems/proc.txt
index 2c48f945546b..4af0018533f2 100644
--- a/trunk/Documentation/filesystems/proc.txt
+++ b/trunk/Documentation/filesystems/proc.txt
@@ -1072,7 +1072,8 @@ second). The meanings of the columns are as follows, from left to right:
- irq: servicing interrupts
- softirq: servicing softirqs
- steal: involuntary wait
-- guest: running a guest
+- guest: running a normal guest
+- guest_nice: running a niced guest
The "intr" line gives counts of interrupts serviced since boot time, for each
of the possible system interrupts. The first column is the total of all
diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt
index 9107b387e91f..fce5b5e516cc 100644
--- a/trunk/Documentation/kernel-parameters.txt
+++ b/trunk/Documentation/kernel-parameters.txt
@@ -85,7 +85,6 @@ parameter is applicable:
PPT Parallel port support is enabled.
PS2 Appropriate PS/2 support is enabled.
RAM RAM disk support is enabled.
- ROOTPLUG The example Root Plug LSM is enabled.
S390 S390 architecture is enabled.
SCSI Appropriate SCSI support is enabled.
A lot of drivers has their options described inside of
@@ -345,6 +344,15 @@ and is between 256 and 4096 characters. It is defined in the file
Change the amount of debugging information output
when initialising the APIC and IO-APIC components.
+ show_lapic= [APIC,X86] Advanced Programmable Interrupt Controller
+ Limit apic dumping. The parameter defines the maximal
+ number of local apics being dumped. Also it is possible
+ to set it to "all" by meaning -- no limit here.
+ Format: { 1 (default) | 2 | ... | all }.
+ The parameter valid if only apic=debug or
+ apic=verbose is specified.
+ Example: apic=debug show_lapic=all
+
apm= [APM] Advanced Power Management
See header of arch/x86/kernel/apm_32.c.
@@ -779,6 +787,13 @@ and is between 256 and 4096 characters. It is defined in the file
by the set_ftrace_notrace file in the debugfs
tracing directory.
+ ftrace_graph_filter=[function-list]
+ [FTRACE] Limit the top level callers functions traced
+ by the function graph tracer at boot up.
+ function-list is a comma separated list of functions
+ that can be changed at run time by the
+ set_graph_function file in the debugfs tracing directory.
+
gamecon.map[2|3]=
[HW,JOY] Multisystem joystick and NES/SNES/PSX pad
support via parallel port (up to 5 devices per port)
@@ -2032,8 +2047,15 @@ and is between 256 and 4096 characters. It is defined in the file
print-fatal-signals=
[KNL] debug: print fatal signals
- print-fatal-signals=1: print segfault info to
- the kernel console.
+
+ If enabled, warn about various signal handling
+ related application anomalies: too many signals,
+ too many POSIX.1 timers, fatal signals causing a
+ coredump - etc.
+
+ If you hit the warning due to signal overflow,
+ you might want to try "ulimit -i unlimited".
+
default: off.
printk.time= Show timing data prefixed to each printk message line
@@ -2164,15 +2186,6 @@ and is between 256 and 4096 characters. It is defined in the file
Useful for devices that are detected asynchronously
(e.g. USB and MMC devices).
- root_plug.vendor_id=
- [ROOTPLUG] Override the default vendor ID
-
- root_plug.product_id=
- [ROOTPLUG] Override the default product ID
-
- root_plug.debug=
- [ROOTPLUG] Enable debugging output
-
rw [KNL] Mount root device read-write on boot
S [KNL] Run init in single mode
@@ -2182,6 +2195,8 @@ and is between 256 and 4096 characters. It is defined in the file
sbni= [NET] Granch SBNI12 leased line adapter
+ sched_debug [KNL] Enables verbose scheduler debug messages.
+
sc1200wdt= [HW,WDT] SC1200 WDT (watchdog) driver
Format: [,[,]]
diff --git a/trunk/Documentation/pcmcia/driver-changes.txt b/trunk/Documentation/pcmcia/driver-changes.txt
index 059934363caf..446f43b309df 100644
--- a/trunk/Documentation/pcmcia/driver-changes.txt
+++ b/trunk/Documentation/pcmcia/driver-changes.txt
@@ -1,5 +1,17 @@
This file details changes in 2.6 which affect PCMCIA card driver authors:
+* no cs_error / CS_CHECK / CONFIG_PCMCIA_DEBUG (as of 2.6.33)
+ Instead of the cs_error() callback or the CS_CHECK() macro, please use
+ Linux-style checking of return values, and -- if necessary -- debug
+ messages using "dev_dbg()" or "pr_debug()".
+
+* New CIS tuple access (as of 2.6.33)
+ Instead of pcmcia_get_{first,next}_tuple(), pcmcia_get_tuple_data() and
+ pcmcia_parse_tuple(), a driver shall use "pcmcia_get_tuple()" if it is
+ only interested in one (raw) tuple, or "pcmcia_loop_tuple()" if it is
+ interested in all tuples of one type. To decode the MAC from CISTPL_FUNCE,
+ a new helper "pcmcia_get_mac_from_cis()" was added.
+
* New configuration loop helper (as of 2.6.28)
By calling pcmcia_loop_config(), a driver can iterate over all available
configuration options. During a driver's probe() phase, one doesn't need
diff --git a/trunk/Documentation/slow-work.txt b/trunk/Documentation/slow-work.txt
index ebc50f808ea4..9dbf4470c7e1 100644
--- a/trunk/Documentation/slow-work.txt
+++ b/trunk/Documentation/slow-work.txt
@@ -41,6 +41,13 @@ expand files, provided the time taken to do so isn't too long.
Operations of both types may sleep during execution, thus tying up the thread
loaned to it.
+A further class of work item is available, based on the slow work item class:
+
+ (*) Delayed slow work items.
+
+These are slow work items that have a timer to defer queueing of the item for
+a while.
+
THREAD-TO-CLASS ALLOCATION
--------------------------
@@ -64,9 +71,11 @@ USING SLOW WORK ITEMS
Firstly, a module or subsystem wanting to make use of slow work items must
register its interest:
- int ret = slow_work_register_user();
+ int ret = slow_work_register_user(struct module *module);
-This will return 0 if successful, or a -ve error upon failure.
+This will return 0 if successful, or a -ve error upon failure. The module
+pointer should be the module interested in using this facility (almost
+certainly THIS_MODULE).
Slow work items may then be set up by:
@@ -91,6 +100,10 @@ Slow work items may then be set up by:
slow_work_init(&myitem, &myitem_ops);
+ or:
+
+ delayed_slow_work_init(&myitem, &myitem_ops);
+
or:
vslow_work_init(&myitem, &myitem_ops);
@@ -102,15 +115,92 @@ A suitably set up work item can then be enqueued for processing:
int ret = slow_work_enqueue(&myitem);
This will return a -ve error if the thread pool is unable to gain a reference
-on the item, 0 otherwise.
+on the item, 0 otherwise, or (for delayed work):
+
+ int ret = delayed_slow_work_enqueue(&myitem, my_jiffy_delay);
The items are reference counted, so there ought to be no need for a flush
-operation. When all a module's slow work items have been processed, and the
+operation. But as the reference counting is optional, means to cancel
+existing work items are also included:
+
+ cancel_slow_work(&myitem);
+ cancel_delayed_slow_work(&myitem);
+
+can be used to cancel pending work. The above cancel function waits for
+existing work to have been executed (or prevent execution of them, depending
+on timing).
+
+
+When all a module's slow work items have been processed, and the
module has no further interest in the facility, it should unregister its
interest:
- slow_work_unregister_user();
+ slow_work_unregister_user(struct module *module);
+
+The module pointer is used to wait for all outstanding work items for that
+module before completing the unregistration. This prevents the put_ref() code
+from being taken away before it completes. module should almost certainly be
+THIS_MODULE.
+
+
+================
+HELPER FUNCTIONS
+================
+
+The slow-work facility provides a function by which it can be determined
+whether or not an item is queued for later execution:
+
+ bool queued = slow_work_is_queued(struct slow_work *work);
+
+If it returns false, then the item is not on the queue (it may be executing
+with a requeue pending). This can be used to work out whether an item on which
+another depends is on the queue, thus allowing a dependent item to be queued
+after it.
+
+If the above shows an item on which another depends not to be queued, then the
+owner of the dependent item might need to wait. However, to avoid locking up
+the threads unnecessarily be sleeping in them, it can make sense under some
+circumstances to return the work item to the queue, thus deferring it until
+some other items have had a chance to make use of the yielded thread.
+
+To yield a thread and defer an item, the work function should simply enqueue
+the work item again and return. However, this doesn't work if there's nothing
+actually on the queue, as the thread just vacated will jump straight back into
+the item's work function, thus busy waiting on a CPU.
+
+Instead, the item should use the thread to wait for the dependency to go away,
+but rather than using schedule() or schedule_timeout() to sleep, it should use
+the following function:
+
+ bool requeue = slow_work_sleep_till_thread_needed(
+ struct slow_work *work,
+ signed long *_timeout);
+
+This will add a second wait and then sleep, such that it will be woken up if
+either something appears on the queue that could usefully make use of the
+thread - and behind which this item can be queued, or if the event the caller
+set up to wait for happens. True will be returned if something else appeared
+on the queue and this work function should perhaps return, of false if
+something else woke it up. The timeout is as for schedule_timeout().
+
+For example:
+
+ wq = bit_waitqueue(&my_flags, MY_BIT);
+ init_wait(&wait);
+ requeue = false;
+ do {
+ prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
+ if (!test_bit(MY_BIT, &my_flags))
+ break;
+ requeue = slow_work_sleep_till_thread_needed(&my_work,
+ &timeout);
+ } while (timeout > 0 && !requeue);
+ finish_wait(wq, &wait);
+ if (!test_bit(MY_BIT, &my_flags)
+ goto do_my_thing;
+ if (requeue)
+ return; // to slow_work
===============
@@ -118,7 +208,8 @@ ITEM OPERATIONS
===============
Each work item requires a table of operations of type struct slow_work_ops.
-All members are required:
+Only ->execute() is required; the getting and putting of a reference and the
+describing of an item are all optional.
(*) Get a reference on an item:
@@ -148,6 +239,16 @@ All members are required:
This should perform the work required of the item. It may sleep, it may
perform disk I/O and it may wait for locks.
+ (*) View an item through /proc:
+
+ void (*desc)(struct slow_work *work, struct seq_file *m);
+
+ If supplied, this should print to 'm' a small string describing the work
+ the item is to do. This should be no more than about 40 characters, and
+ shouldn't include a newline character.
+
+ See the 'Viewing executing and queued items' section below.
+
==================
POOL CONFIGURATION
@@ -172,3 +273,50 @@ The slow-work thread pool has a number of configurables:
is bounded to between 1 and one fewer than the number of active threads.
This ensures there is always at least one thread that can process very
slow work items, and always at least one thread that won't.
+
+
+==================================
+VIEWING EXECUTING AND QUEUED ITEMS
+==================================
+
+If CONFIG_SLOW_WORK_DEBUG is enabled, a debugfs file is made available:
+
+ /sys/kernel/debug/slow_work/runqueue
+
+through which the list of work items being executed and the queues of items to
+be executed may be viewed. The owner of a work item is given the chance to
+add some information of its own.
+
+The contents look something like the following:
+
+ THR PID ITEM ADDR FL MARK DESC
+ === ===== ================ == ===== ==========
+ 0 3005 ffff880023f52348 a 952ms FSC: OBJ17d3: LOOK
+ 1 3006 ffff880024e33668 2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2
+ 2 3165 ffff8800296dd180 a 424ms FSC: OBJ17e4: LOOK
+ 3 4089 ffff8800262c8d78 a 212ms FSC: OBJ17ea: CRTN
+ 4 4090 ffff88002792bed8 2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2
+ 5 4092 ffff88002a0ef308 2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2
+ 6 4094 ffff88002abaf4b8 2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2
+ 7 4095 ffff88002bb188e0 a 388ms FSC: OBJ17e9: CRTN
+ vsq - ffff880023d99668 1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2
+ vsq - ffff8800295d1740 1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2
+ vsq - ffff880025ba3308 1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2
+ vsq - ffff880024ec83e0 1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2
+ vsq - ffff880026618e00 1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2
+ vsq - ffff880025a2a4b8 1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2
+ vsq - ffff880023cbe6d8 9 212ms FSC: OBJ17eb: LOOK
+ vsq - ffff880024d37590 9 212ms FSC: OBJ17ec: LOOK
+ vsq - ffff880027746cb0 9 212ms FSC: OBJ17ed: LOOK
+ vsq - ffff880024d37ae8 9 212ms FSC: OBJ17ee: LOOK
+ vsq - ffff880024d37cb0 9 212ms FSC: OBJ17ef: LOOK
+ vsq - ffff880025036550 9 212ms FSC: OBJ17f0: LOOK
+ vsq - ffff8800250368e0 9 212ms FSC: OBJ17f1: LOOK
+ vsq - ffff880025036aa8 9 212ms FSC: OBJ17f2: LOOK
+
+In the 'THR' column, executing items show the thread they're occupying and
+queued threads indicate which queue they're on. 'PID' shows the process ID of
+a slow-work thread that's executing something. 'FL' shows the work item flags.
+'MARK' indicates how long since an item was queued or began executing. Lastly,
+the 'DESC' column permits the owner of an item to give some information.
+
diff --git a/trunk/Documentation/sysctl/ctl_unnumbered.txt b/trunk/Documentation/sysctl/ctl_unnumbered.txt
deleted file mode 100644
index 23003a8ea3e7..000000000000
--- a/trunk/Documentation/sysctl/ctl_unnumbered.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-
-Except for a few extremely rare exceptions user space applications do not use
-the binary sysctl interface. Instead everyone uses /proc/sys/... with
-readable ascii names.
-
-Recently the kernel has started supporting setting the binary sysctl value to
-CTL_UNNUMBERED so we no longer need to assign a binary sysctl path to allow
-sysctls to show up in /proc/sys.
-
-Assigning binary sysctl numbers is an endless source of conflicts in sysctl.h,
-breaking of the user space ABI (because of those conflicts), and maintenance
-problems. A complete pass through all of the sysctl users revealed multiple
-instances where the sysctl binary interface was broken and had gone undetected
-for years.
-
-So please do not add new binary sysctl numbers. They are unneeded and
-problematic.
-
-If you really need a new binary sysctl number please first merge your sysctl
-into the kernel and then as a separate patch allocate a binary sysctl number.
-
-(ebiederm@xmission.com, June 2007)
diff --git a/trunk/Documentation/trace/ftrace-design.txt b/trunk/Documentation/trace/ftrace-design.txt
index 7003e10f10f5..641a1ef2a7ff 100644
--- a/trunk/Documentation/trace/ftrace-design.txt
+++ b/trunk/Documentation/trace/ftrace-design.txt
@@ -213,10 +213,19 @@ If you can't trace NMI functions, then skip this option.
-HAVE_FTRACE_SYSCALLS
+HAVE_SYSCALL_TRACEPOINTS
---------------------
-
+You need very few things to get the syscalls tracing in an arch.
+
+- Have a NR_syscalls variable in that provides the number
+ of syscalls supported by the arch.
+- Implement arch_syscall_addr() that resolves a syscall address from a
+ syscall number.
+- Support the TIF_SYSCALL_TRACEPOINT thread flags
+- Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace
+ in the ptrace syscalls tracing path.
+- Tag this arch as HAVE_SYSCALL_TRACEPOINTS.
HAVE_FTRACE_MCOUNT_RECORD
diff --git a/trunk/Documentation/trace/kprobetrace.txt b/trunk/Documentation/trace/kprobetrace.txt
new file mode 100644
index 000000000000..47aabeebbdf6
--- /dev/null
+++ b/trunk/Documentation/trace/kprobetrace.txt
@@ -0,0 +1,149 @@
+ Kprobe-based Event Tracing
+ ==========================
+
+ Documentation is written by Masami Hiramatsu
+
+
+Overview
+--------
+These events are similar to tracepoint based events. Instead of Tracepoint,
+this is based on kprobes (kprobe and kretprobe). So it can probe wherever
+kprobes can probe (this means, all functions body except for __kprobes
+functions). Unlike the Tracepoint based event, this can be added and removed
+dynamically, on the fly.
+
+To enable this feature, build your kernel with CONFIG_KPROBE_TRACING=y.
+
+Similar to the events tracer, this doesn't need to be activated via
+current_tracer. Instead of that, add probe points via
+/sys/kernel/debug/tracing/kprobe_events, and enable it via
+/sys/kernel/debug/tracing/events/kprobes//enabled.
+
+
+Synopsis of kprobe_events
+-------------------------
+ p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
+ r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
+
+ GRP : Group name. If omitted, use "kprobes" for it.
+ EVENT : Event name. If omitted, the event name is generated
+ based on SYMBOL+offs or MEMADDR.
+ SYMBOL[+offs] : Symbol+offset where the probe is inserted.
+ MEMADDR : Address where the probe is inserted.
+
+ FETCHARGS : Arguments. Each probe can have up to 128 args.
+ %REG : Fetch register REG
+ @ADDR : Fetch memory at ADDR (ADDR should be in kernel)
+ @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol)
+ $stackN : Fetch Nth entry of stack (N >= 0)
+ $stack : Fetch stack address.
+ $argN : Fetch function argument. (N >= 0)(*)
+ $retval : Fetch return value.(**)
+ +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***)
+ NAME=FETCHARG: Set NAME as the argument name of FETCHARG.
+
+ (*) aN may not correct on asmlinkaged functions and at the middle of
+ function body.
+ (**) only for return probe.
+ (***) this is useful for fetching a field of data structures.
+
+
+Per-Probe Event Filtering
+-------------------------
+ Per-probe event filtering feature allows you to set different filter on each
+probe and gives you what arguments will be shown in trace buffer. If an event
+name is specified right after 'p:' or 'r:' in kprobe_events, it adds an event
+under tracing/events/kprobes/, at the directory you can see 'id',
+'enabled', 'format' and 'filter'.
+
+enabled:
+ You can enable/disable the probe by writing 1 or 0 on it.
+
+format:
+ This shows the format of this probe event.
+
+filter:
+ You can write filtering rules of this event.
+
+id:
+ This shows the id of this probe event.
+
+
+Event Profiling
+---------------
+ You can check the total number of probe hits and probe miss-hits via
+/sys/kernel/debug/tracing/kprobe_profile.
+ The first column is event name, the second is the number of probe hits,
+the third is the number of probe miss-hits.
+
+
+Usage examples
+--------------
+To add a probe as a new event, write a new definition to kprobe_events
+as below.
+
+ echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events
+
+ This sets a kprobe on the top of do_sys_open() function with recording
+1st to 4th arguments as "myprobe" event. As this example shows, users can
+choose more familiar names for each arguments.
+
+ echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events
+
+ This sets a kretprobe on the return point of do_sys_open() function with
+recording return value as "myretprobe" event.
+ You can see the format of these events via
+/sys/kernel/debug/tracing/events/kprobes//format.
+
+ cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format
+name: myprobe
+ID: 75
+format:
+ field:unsigned short common_type; offset:0; size:2;
+ field:unsigned char common_flags; offset:2; size:1;
+ field:unsigned char common_preempt_count; offset:3; size:1;
+ field:int common_pid; offset:4; size:4;
+ field:int common_tgid; offset:8; size:4;
+
+ field: unsigned long ip; offset:16;tsize:8;
+ field: int nargs; offset:24;tsize:4;
+ field: unsigned long dfd; offset:32;tsize:8;
+ field: unsigned long filename; offset:40;tsize:8;
+ field: unsigned long flags; offset:48;tsize:8;
+ field: unsigned long mode; offset:56;tsize:8;
+
+print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode
+
+
+ You can see that the event has 4 arguments as in the expressions you specified.
+
+ echo > /sys/kernel/debug/tracing/kprobe_events
+
+ This clears all probe points.
+
+ Right after definition, each event is disabled by default. For tracing these
+events, you need to enable it.
+
+ echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable
+ echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable
+
+ And you can see the traced information via /sys/kernel/debug/tracing/trace.
+
+ cat /sys/kernel/debug/tracing/trace
+# tracer: nop
+#
+# TASK-PID CPU# TIMESTAMP FUNCTION
+# | | | | |
+ <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0
+ <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $retval=fffffffffffffffe
+ <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6
+ <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3
+ <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10
+ <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3
+
+
+ Each line shows when the kernel hits an event, and <- SYMBOL means kernel
+returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel
+returns from do_sys_open to sys_open+0x1b).
+
+
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
index c824b4d62754..4f96ac81089c 100644
--- a/trunk/MAINTAINERS
+++ b/trunk/MAINTAINERS
@@ -512,10 +512,32 @@ W: http://www.arm.linux.org.uk/
S: Maintained
F: arch/arm/
+ARM PRIMECELL AACI PL041 DRIVER
+M: Russell King
+S: Maintained
+F: sound/arm/aaci.*
+
+ARM PRIMECELL CLCD PL110 DRIVER
+M: Russell King
+S: Maintained
+F: drivers/video/amba-clcd.*
+
+ARM PRIMECELL KMI PL050 DRIVER
+M: Russell King
+S: Maintained
+F: drivers/input/serio/ambakmi.*
+F: include/linux/amba/kmi.h
+
ARM PRIMECELL MMCI PL180/1 DRIVER
S: Orphan
F: drivers/mmc/host/mmci.*
+ARM PRIMECELL BUS SUPPORT
+M: Russell King
+S: Maintained
+F: drivers/amba/
+F: include/linux/amba/bus.h
+
ARM/ADI ROADRUNNER MACHINE SUPPORT
M: Lennert Buytenhek
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -1027,7 +1049,7 @@ F: drivers/serial/atmel_serial.c
ATMEL LCDFB DRIVER
M: Nicolas Ferre
-L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L: linux-fbdev@vger.kernel.org
S: Maintained
F: drivers/video/atmel_lcdfb.c
F: include/video/atmel_lcdc.h
@@ -2113,7 +2135,7 @@ F: drivers/net/wan/dlci.c
F: drivers/net/wan/sdla.c
FRAMEBUFFER LAYER
-L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L: linux-fbdev@vger.kernel.org
W: http://linux-fbdev.sourceforge.net/
S: Orphan
F: Documentation/fb/
@@ -2136,7 +2158,7 @@ F: drivers/i2c/busses/i2c-cpm.c
FREESCALE IMX / MXC FRAMEBUFFER DRIVER
M: Sascha Hauer
-L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L: linux-fbdev@vger.kernel.org
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/plat-mxc/include/mach/imxfb.h
@@ -2312,6 +2334,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6.git
S: Maintained
F: drivers/media/video/gspca/finepix.c
+GSPCA GL860 SUBDRIVER
+M: Olivier Lorin
+L: linux-media@vger.kernel.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6.git
+S: Maintained
+F: drivers/media/video/gspca/gl860/
+
GSPCA M5602 SUBDRIVER
M: Erik Andren
L: linux-media@vger.kernel.org
@@ -2533,8 +2562,7 @@ S: Maintained
F: Documentation/i2c/
F: drivers/i2c/
F: include/linux/i2c.h
-F: include/linux/i2c-dev.h
-F: include/linux/i2c-id.h
+F: include/linux/i2c-*.h
I2C-TINY-USB DRIVER
M: Till Harbaum
@@ -2635,7 +2663,7 @@ S: Supported
F: security/integrity/ima/
IMS TWINTURBO FRAMEBUFFER DRIVER
-L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L: linux-fbdev@vger.kernel.org
S: Orphan
F: drivers/video/imsttfb.c
@@ -2670,14 +2698,14 @@ F: drivers/input/
INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
M: Sylvain Meyer
-L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L: linux-fbdev@vger.kernel.org
S: Maintained
F: Documentation/fb/intelfb.txt
F: drivers/video/intelfb/
INTEL 810/815 FRAMEBUFFER DRIVER
M: Antonino Daplas
-L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L: linux-fbdev@vger.kernel.org
S: Maintained
F: drivers/video/i810/
@@ -2987,11 +3015,8 @@ S: Maintained
F: fs/autofs4/
KERNEL BUILD
-M: Sam Ravnborg
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/sam/kbuild-next.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/sam/kbuild-fixes.git
L: linux-kbuild@vger.kernel.org
-S: Maintained
+S: Orphan
F: Documentation/kbuild/
F: Makefile
F: scripts/Makefile.*
@@ -3391,7 +3416,7 @@ S: Supported
MATROX FRAMEBUFFER DRIVER
M: Petr Vandrovec
-L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L: linux-fbdev@vger.kernel.org
S: Maintained
F: drivers/video/matrox/matroxfb_*
F: include/linux/matroxfb.h
@@ -3778,7 +3803,7 @@ F: fs/ntfs/
NVIDIA (rivafb and nvidiafb) FRAMEBUFFER DRIVER
M: Antonino Daplas
-L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L: linux-fbdev@vger.kernel.org
S: Maintained
F: drivers/video/riva/
F: drivers/video/nvidia/
@@ -3813,7 +3838,7 @@ F: sound/soc/omap/
OMAP FRAMEBUFFER SUPPORT
M: Imre Deak
-L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L: linux-fbdev@vger.kernel.org
L: linux-omap@vger.kernel.org
S: Maintained
F: drivers/video/omap/
@@ -4319,14 +4344,14 @@ F: include/linux/qnxtypes.h
RADEON FRAMEBUFFER DISPLAY DRIVER
M: Benjamin Herrenschmidt
-L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L: linux-fbdev@vger.kernel.org
S: Maintained
F: drivers/video/aty/radeon*
F: include/linux/radeonfb.h
RAGE128 FRAMEBUFFER DISPLAY DRIVER
M: Paul Mackerras
-L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L: linux-fbdev@vger.kernel.org
S: Maintained
F: drivers/video/aty/aty128fb.c
@@ -4465,7 +4490,7 @@ F: drivers/net/wireless/rtl818x/rtl8187*
S3 SAVAGE FRAMEBUFFER DRIVER
M: Antonino Daplas
-L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L: linux-fbdev@vger.kernel.org
S: Maintained
F: drivers/video/savage/
@@ -5628,7 +5653,7 @@ S: Maintained
UVESAFB DRIVER
M: Michal Januszewski
-L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L: linux-fbdev@vger.kernel.org
W: http://dev.gentoo.org/~spock/projects/uvesafb/
S: Maintained
F: Documentation/fb/uvesafb.txt
@@ -5661,7 +5686,7 @@ F: drivers/mmc/host/via-sdmmc.c
VIA UNICHROME(PRO)/CHROME9 FRAMEBUFFER DRIVER
M: Joseph Chan
M: Scott Fang
-L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L: linux-fbdev@vger.kernel.org
S: Maintained
F: drivers/video/via/
diff --git a/trunk/Makefile b/trunk/Makefile
index ad8260102f64..33d4732a6c4a 100644
--- a/trunk/Makefile
+++ b/trunk/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 32
-EXTRAVERSION = -rc8
+EXTRAVERSION =
NAME = Man-Eating Seals of Antiquity
# *DOCUMENTATION*
@@ -379,6 +379,7 @@ export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn --exc
PHONY += scripts_basic
scripts_basic:
$(Q)$(MAKE) $(build)=scripts/basic
+ $(Q)rm -f .tmp_quiet_recordmcount
# To avoid any implicit rule to kick in, define an empty command.
scripts/basic/%: scripts_basic ;
diff --git a/trunk/arch/Kconfig b/trunk/arch/Kconfig
index 7f418bbc261a..eef3bbb97075 100644
--- a/trunk/arch/Kconfig
+++ b/trunk/arch/Kconfig
@@ -126,4 +126,11 @@ config HAVE_DMA_API_DEBUG
config HAVE_DEFAULT_NO_SPIN_MUTEXES
bool
+config HAVE_HW_BREAKPOINT
+ bool
+ depends on HAVE_PERF_EVENTS
+ select ANON_INODES
+ select PERF_EVENTS
+
+
source "kernel/gcov/Kconfig"
diff --git a/trunk/arch/alpha/include/asm/thread_info.h b/trunk/arch/alpha/include/asm/thread_info.h
index 815680b585ed..b3e888638bb7 100644
--- a/trunk/arch/alpha/include/asm/thread_info.h
+++ b/trunk/arch/alpha/include/asm/thread_info.h
@@ -61,21 +61,24 @@ register struct thread_info *__current_thread_info __asm__("$8");
/*
* Thread information flags:
* - these are process state flags and used from assembly
- * - pending work-to-be-done flags come first to fit in and immediate operand.
+ * - pending work-to-be-done flags come first and must be assigned to be
+ * within bits 0 to 7 to fit in and immediate operand.
+ * - ALPHA_UAC_SHIFT below must be kept consistent with the unaligned
+ * control flags.
*
* TIF_SYSCALL_TRACE is known to be 0 via blbs.
*/
#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
-#define TIF_SIGPENDING 1 /* signal pending */
-#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
-#define TIF_POLLING_NRFLAG 3 /* poll_idle is polling NEED_RESCHED */
-#define TIF_DIE_IF_KERNEL 4 /* dik recursion lock */
-#define TIF_UAC_NOPRINT 5 /* see sysinfo.h */
-#define TIF_UAC_NOFIX 6
-#define TIF_UAC_SIGBUS 7
-#define TIF_MEMDIE 8
-#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
-#define TIF_NOTIFY_RESUME 10 /* callback before returning to user */
+#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
+#define TIF_SIGPENDING 2 /* signal pending */
+#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
+#define TIF_POLLING_NRFLAG 8 /* poll_idle is polling NEED_RESCHED */
+#define TIF_DIE_IF_KERNEL 9 /* dik recursion lock */
+#define TIF_UAC_NOPRINT 10 /* see sysinfo.h */
+#define TIF_UAC_NOFIX 11
+#define TIF_UAC_SIGBUS 12
+#define TIF_MEMDIE 13
+#define TIF_RESTORE_SIGMASK 14 /* restore signal mask in do_signal */
#define TIF_FREEZE 16 /* is freezing for suspend */
#define _TIF_SYSCALL_TRACE (1<typename);
+ seq_printf(p, " %14s", irq_desc[irq].chip->name);
seq_printf(p, " %c%s",
(action->flags & IRQF_DISABLED)?'+':' ',
action->name);
diff --git a/trunk/arch/alpha/kernel/irq_alpha.c b/trunk/arch/alpha/kernel/irq_alpha.c
index 38c805dfc544..cfde865b78e0 100644
--- a/trunk/arch/alpha/kernel/irq_alpha.c
+++ b/trunk/arch/alpha/kernel/irq_alpha.c
@@ -228,7 +228,7 @@ struct irqaction timer_irqaction = {
};
static struct irq_chip rtc_irq_type = {
- .typename = "RTC",
+ .name = "RTC",
.startup = rtc_startup,
.shutdown = rtc_enable_disable,
.enable = rtc_enable_disable,
diff --git a/trunk/arch/alpha/kernel/irq_i8259.c b/trunk/arch/alpha/kernel/irq_i8259.c
index 50bfec9b588f..83a9ac280890 100644
--- a/trunk/arch/alpha/kernel/irq_i8259.c
+++ b/trunk/arch/alpha/kernel/irq_i8259.c
@@ -84,7 +84,7 @@ i8259a_end_irq(unsigned int irq)
}
struct irq_chip i8259a_irq_type = {
- .typename = "XT-PIC",
+ .name = "XT-PIC",
.startup = i8259a_startup_irq,
.shutdown = i8259a_disable_irq,
.enable = i8259a_enable_irq,
diff --git a/trunk/arch/alpha/kernel/irq_pyxis.c b/trunk/arch/alpha/kernel/irq_pyxis.c
index 69199a76ec4a..989ce46a0cf3 100644
--- a/trunk/arch/alpha/kernel/irq_pyxis.c
+++ b/trunk/arch/alpha/kernel/irq_pyxis.c
@@ -71,7 +71,7 @@ pyxis_mask_and_ack_irq(unsigned int irq)
}
static struct irq_chip pyxis_irq_type = {
- .typename = "PYXIS",
+ .name = "PYXIS",
.startup = pyxis_startup_irq,
.shutdown = pyxis_disable_irq,
.enable = pyxis_enable_irq,
diff --git a/trunk/arch/alpha/kernel/irq_srm.c b/trunk/arch/alpha/kernel/irq_srm.c
index 85229369a1f8..d63e93e1e8bf 100644
--- a/trunk/arch/alpha/kernel/irq_srm.c
+++ b/trunk/arch/alpha/kernel/irq_srm.c
@@ -49,7 +49,7 @@ srm_end_irq(unsigned int irq)
/* Handle interrupts from the SRM, assuming no additional weirdness. */
static struct irq_chip srm_irq_type = {
- .typename = "SRM",
+ .name = "SRM",
.startup = srm_startup_irq,
.shutdown = srm_disable_irq,
.enable = srm_enable_irq,
diff --git a/trunk/arch/alpha/kernel/sys_alcor.c b/trunk/arch/alpha/kernel/sys_alcor.c
index 382035ef7394..20a30b8b9655 100644
--- a/trunk/arch/alpha/kernel/sys_alcor.c
+++ b/trunk/arch/alpha/kernel/sys_alcor.c
@@ -90,7 +90,7 @@ alcor_end_irq(unsigned int irq)
}
static struct irq_chip alcor_irq_type = {
- .typename = "ALCOR",
+ .name = "ALCOR",
.startup = alcor_startup_irq,
.shutdown = alcor_disable_irq,
.enable = alcor_enable_irq,
diff --git a/trunk/arch/alpha/kernel/sys_cabriolet.c b/trunk/arch/alpha/kernel/sys_cabriolet.c
index ed349436732b..affd0f3f25df 100644
--- a/trunk/arch/alpha/kernel/sys_cabriolet.c
+++ b/trunk/arch/alpha/kernel/sys_cabriolet.c
@@ -72,7 +72,7 @@ cabriolet_end_irq(unsigned int irq)
}
static struct irq_chip cabriolet_irq_type = {
- .typename = "CABRIOLET",
+ .name = "CABRIOLET",
.startup = cabriolet_startup_irq,
.shutdown = cabriolet_disable_irq,
.enable = cabriolet_enable_irq,
diff --git a/trunk/arch/alpha/kernel/sys_dp264.c b/trunk/arch/alpha/kernel/sys_dp264.c
index 46e70ece5176..d64e1e497e76 100644
--- a/trunk/arch/alpha/kernel/sys_dp264.c
+++ b/trunk/arch/alpha/kernel/sys_dp264.c
@@ -199,7 +199,7 @@ clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
}
static struct irq_chip dp264_irq_type = {
- .typename = "DP264",
+ .name = "DP264",
.startup = dp264_startup_irq,
.shutdown = dp264_disable_irq,
.enable = dp264_enable_irq,
@@ -210,7 +210,7 @@ static struct irq_chip dp264_irq_type = {
};
static struct irq_chip clipper_irq_type = {
- .typename = "CLIPPER",
+ .name = "CLIPPER",
.startup = clipper_startup_irq,
.shutdown = clipper_disable_irq,
.enable = clipper_enable_irq,
diff --git a/trunk/arch/alpha/kernel/sys_eb64p.c b/trunk/arch/alpha/kernel/sys_eb64p.c
index 660c23ef661f..df2090ce5e7f 100644
--- a/trunk/arch/alpha/kernel/sys_eb64p.c
+++ b/trunk/arch/alpha/kernel/sys_eb64p.c
@@ -70,7 +70,7 @@ eb64p_end_irq(unsigned int irq)
}
static struct irq_chip eb64p_irq_type = {
- .typename = "EB64P",
+ .name = "EB64P",
.startup = eb64p_startup_irq,
.shutdown = eb64p_disable_irq,
.enable = eb64p_enable_irq,
diff --git a/trunk/arch/alpha/kernel/sys_eiger.c b/trunk/arch/alpha/kernel/sys_eiger.c
index b99ea488d844..3ca1dbcf4044 100644
--- a/trunk/arch/alpha/kernel/sys_eiger.c
+++ b/trunk/arch/alpha/kernel/sys_eiger.c
@@ -81,7 +81,7 @@ eiger_end_irq(unsigned int irq)
}
static struct irq_chip eiger_irq_type = {
- .typename = "EIGER",
+ .name = "EIGER",
.startup = eiger_startup_irq,
.shutdown = eiger_disable_irq,
.enable = eiger_enable_irq,
diff --git a/trunk/arch/alpha/kernel/sys_jensen.c b/trunk/arch/alpha/kernel/sys_jensen.c
index ef0b83a070ac..7a7ae36fff91 100644
--- a/trunk/arch/alpha/kernel/sys_jensen.c
+++ b/trunk/arch/alpha/kernel/sys_jensen.c
@@ -119,7 +119,7 @@ jensen_local_end(unsigned int irq)
}
static struct irq_chip jensen_local_irq_type = {
- .typename = "LOCAL",
+ .name = "LOCAL",
.startup = jensen_local_startup,
.shutdown = jensen_local_shutdown,
.enable = jensen_local_enable,
diff --git a/trunk/arch/alpha/kernel/sys_marvel.c b/trunk/arch/alpha/kernel/sys_marvel.c
index bbfc4f20ca72..0bb3b5c4f693 100644
--- a/trunk/arch/alpha/kernel/sys_marvel.c
+++ b/trunk/arch/alpha/kernel/sys_marvel.c
@@ -170,7 +170,7 @@ marvel_irq_noop_return(unsigned int irq)
}
static struct irq_chip marvel_legacy_irq_type = {
- .typename = "LEGACY",
+ .name = "LEGACY",
.startup = marvel_irq_noop_return,
.shutdown = marvel_irq_noop,
.enable = marvel_irq_noop,
@@ -180,7 +180,7 @@ static struct irq_chip marvel_legacy_irq_type = {
};
static struct irq_chip io7_lsi_irq_type = {
- .typename = "LSI",
+ .name = "LSI",
.startup = io7_startup_irq,
.shutdown = io7_disable_irq,
.enable = io7_enable_irq,
@@ -190,7 +190,7 @@ static struct irq_chip io7_lsi_irq_type = {
};
static struct irq_chip io7_msi_irq_type = {
- .typename = "MSI",
+ .name = "MSI",
.startup = io7_startup_irq,
.shutdown = io7_disable_irq,
.enable = io7_enable_irq,
diff --git a/trunk/arch/alpha/kernel/sys_mikasa.c b/trunk/arch/alpha/kernel/sys_mikasa.c
index 4e366641a08e..ee8865169811 100644
--- a/trunk/arch/alpha/kernel/sys_mikasa.c
+++ b/trunk/arch/alpha/kernel/sys_mikasa.c
@@ -69,7 +69,7 @@ mikasa_end_irq(unsigned int irq)
}
static struct irq_chip mikasa_irq_type = {
- .typename = "MIKASA",
+ .name = "MIKASA",
.startup = mikasa_startup_irq,
.shutdown = mikasa_disable_irq,
.enable = mikasa_enable_irq,
diff --git a/trunk/arch/alpha/kernel/sys_noritake.c b/trunk/arch/alpha/kernel/sys_noritake.c
index 35753a173bac..86503fe73a88 100644
--- a/trunk/arch/alpha/kernel/sys_noritake.c
+++ b/trunk/arch/alpha/kernel/sys_noritake.c
@@ -74,7 +74,7 @@ noritake_end_irq(unsigned int irq)
}
static struct irq_chip noritake_irq_type = {
- .typename = "NORITAKE",
+ .name = "NORITAKE",
.startup = noritake_startup_irq,
.shutdown = noritake_disable_irq,
.enable = noritake_enable_irq,
diff --git a/trunk/arch/alpha/kernel/sys_rawhide.c b/trunk/arch/alpha/kernel/sys_rawhide.c
index f3aec7e085c8..26c322bf89ee 100644
--- a/trunk/arch/alpha/kernel/sys_rawhide.c
+++ b/trunk/arch/alpha/kernel/sys_rawhide.c
@@ -136,7 +136,7 @@ rawhide_end_irq(unsigned int irq)
}
static struct irq_chip rawhide_irq_type = {
- .typename = "RAWHIDE",
+ .name = "RAWHIDE",
.startup = rawhide_startup_irq,
.shutdown = rawhide_disable_irq,
.enable = rawhide_enable_irq,
diff --git a/trunk/arch/alpha/kernel/sys_ruffian.c b/trunk/arch/alpha/kernel/sys_ruffian.c
index d9f9cfeb9931..8de1046fe91e 100644
--- a/trunk/arch/alpha/kernel/sys_ruffian.c
+++ b/trunk/arch/alpha/kernel/sys_ruffian.c
@@ -66,7 +66,7 @@ ruffian_init_irq(void)
common_init_isa_dma();
}
-#define RUFFIAN_LATCH ((PIT_TICK_RATE + HZ / 2) / HZ)
+#define RUFFIAN_LATCH DIV_ROUND_CLOSEST(PIT_TICK_RATE, HZ)
static void __init
ruffian_init_rtc(void)
diff --git a/trunk/arch/alpha/kernel/sys_rx164.c b/trunk/arch/alpha/kernel/sys_rx164.c
index fc9246373452..be161129eab9 100644
--- a/trunk/arch/alpha/kernel/sys_rx164.c
+++ b/trunk/arch/alpha/kernel/sys_rx164.c
@@ -73,7 +73,7 @@ rx164_end_irq(unsigned int irq)
}
static struct irq_chip rx164_irq_type = {
- .typename = "RX164",
+ .name = "RX164",
.startup = rx164_startup_irq,
.shutdown = rx164_disable_irq,
.enable = rx164_enable_irq,
diff --git a/trunk/arch/alpha/kernel/sys_sable.c b/trunk/arch/alpha/kernel/sys_sable.c
index 426eb6906d01..b2abe27a23cf 100644
--- a/trunk/arch/alpha/kernel/sys_sable.c
+++ b/trunk/arch/alpha/kernel/sys_sable.c
@@ -502,7 +502,7 @@ sable_lynx_mask_and_ack_irq(unsigned int irq)
}
static struct irq_chip sable_lynx_irq_type = {
- .typename = "SABLE/LYNX",
+ .name = "SABLE/LYNX",
.startup = sable_lynx_startup_irq,
.shutdown = sable_lynx_disable_irq,
.enable = sable_lynx_enable_irq,
diff --git a/trunk/arch/alpha/kernel/sys_takara.c b/trunk/arch/alpha/kernel/sys_takara.c
index 830318c21661..230464885b5c 100644
--- a/trunk/arch/alpha/kernel/sys_takara.c
+++ b/trunk/arch/alpha/kernel/sys_takara.c
@@ -75,7 +75,7 @@ takara_end_irq(unsigned int irq)
}
static struct irq_chip takara_irq_type = {
- .typename = "TAKARA",
+ .name = "TAKARA",
.startup = takara_startup_irq,
.shutdown = takara_disable_irq,
.enable = takara_enable_irq,
diff --git a/trunk/arch/alpha/kernel/sys_titan.c b/trunk/arch/alpha/kernel/sys_titan.c
index 88978fc60f83..288053342c83 100644
--- a/trunk/arch/alpha/kernel/sys_titan.c
+++ b/trunk/arch/alpha/kernel/sys_titan.c
@@ -195,7 +195,7 @@ init_titan_irqs(struct irq_chip * ops, int imin, int imax)
}
static struct irq_chip titan_irq_type = {
- .typename = "TITAN",
+ .name = "TITAN",
.startup = titan_startup_irq,
.shutdown = titan_disable_irq,
.enable = titan_enable_irq,
diff --git a/trunk/arch/alpha/kernel/sys_wildfire.c b/trunk/arch/alpha/kernel/sys_wildfire.c
index e91b4c3838a8..62fd972e18ef 100644
--- a/trunk/arch/alpha/kernel/sys_wildfire.c
+++ b/trunk/arch/alpha/kernel/sys_wildfire.c
@@ -158,7 +158,7 @@ wildfire_end_irq(unsigned int irq)
}
static struct irq_chip wildfire_irq_type = {
- .typename = "WILDFIRE",
+ .name = "WILDFIRE",
.startup = wildfire_startup_irq,
.shutdown = wildfire_disable_irq,
.enable = wildfire_enable_irq,
diff --git a/trunk/arch/arm/include/asm/kmap_types.h b/trunk/arch/arm/include/asm/kmap_types.h
index d16ec97ec9a9..c019949a5189 100644
--- a/trunk/arch/arm/include/asm/kmap_types.h
+++ b/trunk/arch/arm/include/asm/kmap_types.h
@@ -22,4 +22,10 @@ enum km_type {
KM_TYPE_NR
};
+#ifdef CONFIG_DEBUG_HIGHMEM
+#define KM_NMI (-1)
+#define KM_NMI_PTE (-1)
+#define KM_IRQ_PTE (-1)
+#endif
+
#endif
diff --git a/trunk/arch/arm/kernel/isa.c b/trunk/arch/arm/kernel/isa.c
index 8ac9b8424007..346485910732 100644
--- a/trunk/arch/arm/kernel/isa.c
+++ b/trunk/arch/arm/kernel/isa.c
@@ -22,47 +22,42 @@ static unsigned int isa_membase, isa_portbase, isa_portshift;
static ctl_table ctl_isa_vars[4] = {
{
- .ctl_name = BUS_ISA_MEM_BASE,
.procname = "membase",
.data = &isa_membase,
.maxlen = sizeof(isa_membase),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
}, {
- .ctl_name = BUS_ISA_PORT_BASE,
.procname = "portbase",
.data = &isa_portbase,
.maxlen = sizeof(isa_portbase),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
}, {
- .ctl_name = BUS_ISA_PORT_SHIFT,
.procname = "portshift",
.data = &isa_portshift,
.maxlen = sizeof(isa_portshift),
.mode = 0444,
- .proc_handler = &proc_dointvec,
- }, {0}
+ .proc_handler = proc_dointvec,
+ }, {}
};
static struct ctl_table_header *isa_sysctl_header;
static ctl_table ctl_isa[2] = {
{
- .ctl_name = CTL_BUS_ISA,
.procname = "isa",
.mode = 0555,
.child = ctl_isa_vars,
- }, {0}
+ }, {}
};
static ctl_table ctl_bus[2] = {
{
- .ctl_name = CTL_BUS,
.procname = "bus",
.mode = 0555,
.child = ctl_isa,
- }, {0}
+ }, {}
};
void __init
diff --git a/trunk/arch/arm/kernel/signal.c b/trunk/arch/arm/kernel/signal.c
index 2a573d4fea24..e7714f367eb8 100644
--- a/trunk/arch/arm/kernel/signal.c
+++ b/trunk/arch/arm/kernel/signal.c
@@ -662,8 +662,12 @@ static void do_signal(struct pt_regs *regs, int syscall)
regs->ARM_sp -= 4;
usp = (u32 __user *)regs->ARM_sp;
- put_user(regs->ARM_pc, usp);
- regs->ARM_pc = KERN_RESTART_CODE;
+ if (put_user(regs->ARM_pc, usp) == 0) {
+ regs->ARM_pc = KERN_RESTART_CODE;
+ } else {
+ regs->ARM_sp += 4;
+ force_sigsegv(0, current);
+ }
#endif
}
}
diff --git a/trunk/arch/arm/mach-bcmring/arch.c b/trunk/arch/arm/mach-bcmring/arch.c
index 0da693b0f7e1..fbe6fa02c882 100644
--- a/trunk/arch/arm/mach-bcmring/arch.c
+++ b/trunk/arch/arm/mach-bcmring/arch.c
@@ -47,10 +47,6 @@ HW_DECLARE_SPINLOCK(gpio)
EXPORT_SYMBOL(bcmring_gpio_reg_lock);
#endif
-/* FIXME: temporary solution */
-#define BCM_SYSCTL_REBOOT_WARM 1
-#define CTL_BCM_REBOOT 112
-
/* sysctl */
int bcmring_arch_warm_reboot; /* do a warm reboot on hard reset */
@@ -58,18 +54,16 @@ static struct ctl_table_header *bcmring_sysctl_header;
static struct ctl_table bcmring_sysctl_warm_reboot[] = {
{
- .ctl_name = BCM_SYSCTL_REBOOT_WARM,
.procname = "warm",
.data = &bcmring_arch_warm_reboot,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec},
+ .proc_handler = proc_dointvec},
{}
};
static struct ctl_table bcmring_sysctl_reboot[] = {
{
- .ctl_name = CTL_BCM_REBOOT,
.procname = "reboot",
.mode = 0555,
.child = bcmring_sysctl_warm_reboot},
diff --git a/trunk/arch/arm/mach-pxa/cpufreq-pxa2xx.c b/trunk/arch/arm/mach-pxa/cpufreq-pxa2xx.c
index 983cc8c20081..9e4d9816726a 100644
--- a/trunk/arch/arm/mach-pxa/cpufreq-pxa2xx.c
+++ b/trunk/arch/arm/mach-pxa/cpufreq-pxa2xx.c
@@ -447,6 +447,7 @@ static __init int pxa_cpufreq_init(struct cpufreq_policy *policy)
pxa27x_freq_table[i].frequency = freq;
pxa27x_freq_table[i].index = i;
}
+ pxa27x_freq_table[i].index = i;
pxa27x_freq_table[i].frequency = CPUFREQ_TABLE_END;
/*
diff --git a/trunk/arch/arm/mach-pxa/cpufreq-pxa3xx.c b/trunk/arch/arm/mach-pxa/cpufreq-pxa3xx.c
index 67f34a8d8e60..149cdd9aee4d 100644
--- a/trunk/arch/arm/mach-pxa/cpufreq-pxa3xx.c
+++ b/trunk/arch/arm/mach-pxa/cpufreq-pxa3xx.c
@@ -102,7 +102,7 @@ static int setup_freqs_table(struct cpufreq_policy *policy,
table[i].index = i;
table[i].frequency = freqs[i].cpufreq_mhz * 1000;
}
- table[num].frequency = i;
+ table[num].index = i;
table[num].frequency = CPUFREQ_TABLE_END;
pxa3xx_freqs = freqs;
diff --git a/trunk/arch/arm/mach-pxa/spitz.c b/trunk/arch/arm/mach-pxa/spitz.c
index 3da45d051743..d98023f55503 100644
--- a/trunk/arch/arm/mach-pxa/spitz.c
+++ b/trunk/arch/arm/mach-pxa/spitz.c
@@ -802,10 +802,12 @@ static void __init spitz_init(void)
{
spitz_ficp_platform_data.gpio_pwdown = SPITZ_GPIO_IR_ON;
+#ifdef CONFIG_MACH_BORZOI
if (machine_is_borzoi()) {
sharpsl_nand_platform_data.badblock_pattern = &sharpsl_akita_bbt;
sharpsl_nand_platform_data.ecc_layout = &akita_oobinfo;
}
+#endif
platform_scoop_config = &spitz_pcmcia_config;
diff --git a/trunk/arch/arm/tools/mach-types b/trunk/arch/arm/tools/mach-types
index 94be7bb6cb9a..07b976da6174 100644
--- a/trunk/arch/arm/tools/mach-types
+++ b/trunk/arch/arm/tools/mach-types
@@ -12,7 +12,7 @@
#
# http://www.arm.linux.org.uk/developer/machines/?action=new
#
-# Last update: Fri Sep 18 21:42:00 2009
+# Last update: Wed Nov 25 22:14:58 2009
#
# machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number
#
@@ -928,7 +928,7 @@ palmt5 MACH_PALMT5 PALMT5 917
palmtc MACH_PALMTC PALMTC 918
omap_apollon MACH_OMAP_APOLLON OMAP_APOLLON 919
mxc30030evb MACH_MXC30030EVB MXC30030EVB 920
-rea_2d MACH_REA_2D REA_2D 921
+rea_cpu2 MACH_REA_2D REA_2D 921
eti3e524 MACH_TI3E524 TI3E524 922
ateb9200 MACH_ATEB9200 ATEB9200 923
auckland MACH_AUCKLAND AUCKLAND 924
@@ -2421,3 +2421,118 @@ liberty MACH_LIBERTY LIBERTY 2434
mh355 MACH_MH355 MH355 2435
pc7802 MACH_PC7802 PC7802 2436
gnet_sgc MACH_GNET_SGC GNET_SGC 2437
+einstein15 MACH_EINSTEIN15 EINSTEIN15 2438
+cmpd MACH_CMPD CMPD 2439
+davinci_hase1 MACH_DAVINCI_HASE1 DAVINCI_HASE1 2440
+lgeincitephone MACH_LGEINCITEPHONE LGEINCITEPHONE 2441
+ea313x MACH_EA313X EA313X 2442
+fwbd_39064 MACH_FWBD_39064 FWBD_39064 2443
+fwbd_390128 MACH_FWBD_390128 FWBD_390128 2444
+pelco_moe MACH_PELCO_MOE PELCO_MOE 2445
+minimix27 MACH_MINIMIX27 MINIMIX27 2446
+omap3_thunder MACH_OMAP3_THUNDER OMAP3_THUNDER 2447
+passionc MACH_PASSIONC PASSIONC 2448
+mx27amata MACH_MX27AMATA MX27AMATA 2449
+bgat1 MACH_BGAT1 BGAT1 2450
+buzz MACH_BUZZ BUZZ 2451
+mb9g20 MACH_MB9G20 MB9G20 2452
+yushan MACH_YUSHAN YUSHAN 2453
+lizard MACH_LIZARD LIZARD 2454
+omap3polycom MACH_OMAP3POLYCOM OMAP3POLYCOM 2455
+smdkv210 MACH_SMDKV210 SMDKV210 2456
+bravo MACH_BRAVO BRAVO 2457
+siogentoo1 MACH_SIOGENTOO1 SIOGENTOO1 2458
+siogentoo2 MACH_SIOGENTOO2 SIOGENTOO2 2459
+sm3k MACH_SM3K SM3K 2460
+acer_tempo_f900 MACH_ACER_TEMPO_F900 ACER_TEMPO_F900 2461
+sst61vc010_dev MACH_SST61VC010_DEV SST61VC010_DEV 2462
+glittertind MACH_GLITTERTIND GLITTERTIND 2463
+omap_zoom3 MACH_OMAP_ZOOM3 OMAP_ZOOM3 2464
+omap_3630sdp MACH_OMAP_3630SDP OMAP_3630SDP 2465
+cybook2440 MACH_CYBOOK2440 CYBOOK2440 2466
+torino_s MACH_TORINO_S TORINO_S 2467
+havana MACH_HAVANA HAVANA 2468
+beaumont_11 MACH_BEAUMONT_11 BEAUMONT_11 2469
+vanguard MACH_VANGUARD VANGUARD 2470
+s5pc110_draco MACH_S5PC110_DRACO S5PC110_DRACO 2471
+cartesio_two MACH_CARTESIO_TWO CARTESIO_TWO 2472
+aster MACH_ASTER ASTER 2473
+voguesv210 MACH_VOGUESV210 VOGUESV210 2474
+acm500x MACH_ACM500X ACM500X 2475
+km9260 MACH_KM9260 KM9260 2476
+nideflexg1 MACH_NIDEFLEXG1 NIDEFLEXG1 2477
+ctera_plug_io MACH_CTERA_PLUG_IO CTERA_PLUG_IO 2478
+smartq7 MACH_SMARTQ7 SMARTQ7 2479
+at91sam9g10ek2 MACH_AT91SAM9G10EK2 AT91SAM9G10EK2 2480
+asusp527 MACH_ASUSP527 ASUSP527 2481
+at91sam9g20mpm2 MACH_AT91SAM9G20MPM2 AT91SAM9G20MPM2 2482
+topasa900 MACH_TOPASA900 TOPASA900 2483
+electrum_100 MACH_ELECTRUM_100 ELECTRUM_100 2484
+mx51grb MACH_MX51GRB MX51GRB 2485
+xea300 MACH_XEA300 XEA300 2486
+htcstartrek MACH_HTCSTARTREK HTCSTARTREK 2487
+lima MACH_LIMA LIMA 2488
+csb740 MACH_CSB740 CSB740 2489
+usb_s8815 MACH_USB_S8815 USB_S8815 2490
+watson_efm_plugin MACH_WATSON_EFM_PLUGIN WATSON_EFM_PLUGIN 2491
+milkyway MACH_MILKYWAY MILKYWAY 2492
+g4evm MACH_G4EVM G4EVM 2493
+picomod6 MACH_PICOMOD6 PICOMOD6 2494
+omapl138_hawkboard MACH_OMAPL138_HAWKBOARD OMAPL138_HAWKBOARD 2495
+ip6000 MACH_IP6000 IP6000 2496
+ip6010 MACH_IP6010 IP6010 2497
+utm400 MACH_UTM400 UTM400 2498
+omap3_zybex MACH_OMAP3_ZYBEX OMAP3_ZYBEX 2499
+wireless_space MACH_WIRELESS_SPACE WIRELESS_SPACE 2500
+sx560 MACH_SX560 SX560 2501
+ts41x MACH_TS41X TS41X 2502
+elphel10373 MACH_ELPHEL10373 ELPHEL10373 2503
+rhobot MACH_RHOBOT RHOBOT 2504
+mx51_refresh MACH_MX51_REFRESH MX51_REFRESH 2505
+ls9260 MACH_LS9260 LS9260 2506
+shank MACH_SHANK SHANK 2507
+qsd8x50_st1 MACH_QSD8X50_ST1 QSD8X50_ST1 2508
+at91sam9m10ekes MACH_AT91SAM9M10EKES AT91SAM9M10EKES 2509
+hiram MACH_HIRAM HIRAM 2510
+phy3250 MACH_PHY3250 PHY3250 2511
+ea3250 MACH_EA3250 EA3250 2512
+fdi3250 MACH_FDI3250 FDI3250 2513
+whitestone MACH_WHITESTONE WHITESTONE 2514
+at91sam9263nit MACH_AT91SAM9263NIT AT91SAM9263NIT 2515
+ccmx51 MACH_CCMX51 CCMX51 2516
+ccmx51js MACH_CCMX51JS CCMX51JS 2517
+ccwmx51 MACH_CCWMX51 CCWMX51 2518
+ccwmx51js MACH_CCWMX51JS CCWMX51JS 2519
+mini6410 MACH_MINI6410 MINI6410 2520
+tiny6410 MACH_TINY6410 TINY6410 2521
+nano6410 MACH_NANO6410 NANO6410 2522
+at572d940hfnldb MACH_AT572D940HFNLDB AT572D940HFNLDB 2523
+htcleo MACH_HTCLEO HTCLEO 2524
+avp13 MACH_AVP13 AVP13 2525
+xxsvideod MACH_XXSVIDEOD XXSVIDEOD 2526
+vpnext MACH_VPNEXT VPNEXT 2527
+swarco_itc3 MACH_SWARCO_ITC3 SWARCO_ITC3 2528
+tx51 MACH_TX51 TX51 2529
+dolby_cat1021 MACH_DOLBY_CAT1021 DOLBY_CAT1021 2530
+mx28evk MACH_MX28EVK MX28EVK 2531
+phoenix260 MACH_PHOENIX260 PHOENIX260 2532
+uvaca_stork MACH_UVACA_STORK UVACA_STORK 2533
+smartq5 MACH_SMARTQ5 SMARTQ5 2534
+all3078 MACH_ALL3078 ALL3078 2535
+ctera_2bay_ds MACH_CTERA_2BAY_DS CTERA_2BAY_DS 2536
+siogentoo3 MACH_SIOGENTOO3 SIOGENTOO3 2537
+epb5000 MACH_EPB5000 EPB5000 2538
+hy9263 MACH_HY9263 HY9263 2539
+acer_tempo_m900 MACH_ACER_TEMPO_M900 ACER_TEMPO_M900 2540
+acer_tempo_dx650 MACH_ACER_TEMPO_DX900 ACER_TEMPO_DX900 2541
+acer_tempo_x960 MACH_ACER_TEMPO_X960 ACER_TEMPO_X960 2542
+acer_eten_v900 MACH_ACER_ETEN_V900 ACER_ETEN_V900 2543
+acer_eten_x900 MACH_ACER_ETEN_X900 ACER_ETEN_X900 2544
+bonnell MACH_BONNELL BONNELL 2545
+oht_mx27 MACH_OHT_MX27 OHT_MX27 2546
+htcquartz MACH_HTCQUARTZ HTCQUARTZ 2547
+davinci_dm6467tevm MACH_DAVINCI_DM6467TEVM DAVINCI_DM6467TEVM 2548
+c3ax03 MACH_C3AX03 C3AX03 2549
+mxt_td60 MACH_MXT_TD60 MXT_TD60 2550
+esyx MACH_ESYX ESYX 2551
+bulldog MACH_BULLDOG BULLDOG 2553
diff --git a/trunk/arch/avr32/include/asm/bug.h b/trunk/arch/avr32/include/asm/bug.h
index 331d45bab18f..2aa373cc61b5 100644
--- a/trunk/arch/avr32/include/asm/bug.h
+++ b/trunk/arch/avr32/include/asm/bug.h
@@ -52,7 +52,7 @@
#define BUG() \
do { \
_BUG_OR_WARN(0); \
- for (;;); \
+ unreachable(); \
} while (0)
#define WARN_ON(condition) \
diff --git a/trunk/arch/blackfin/kernel/bfin_dma_5xx.c b/trunk/arch/blackfin/kernel/bfin_dma_5xx.c
index 1f170216d2f9..3946aff4f414 100644
--- a/trunk/arch/blackfin/kernel/bfin_dma_5xx.c
+++ b/trunk/arch/blackfin/kernel/bfin_dma_5xx.c
@@ -225,8 +225,13 @@ int blackfin_dma_suspend(void)
void blackfin_dma_resume(void)
{
int i;
- for (i = 0; i < MAX_DMA_SUSPEND_CHANNELS; ++i)
- dma_ch[i].regs->peripheral_map = dma_ch[i].saved_peripheral_map;
+
+ for (i = 0; i < MAX_DMA_CHANNELS; ++i) {
+ dma_ch[i].regs->cfg = 0;
+
+ if (i < MAX_DMA_SUSPEND_CHANNELS)
+ dma_ch[i].regs->peripheral_map = dma_ch[i].saved_peripheral_map;
+ }
}
#endif
diff --git a/trunk/arch/blackfin/kernel/cplb-mpu/cplbinit.c b/trunk/arch/blackfin/kernel/cplb-mpu/cplbinit.c
index f7b9cdce8239..b52c1f8c4bc0 100644
--- a/trunk/arch/blackfin/kernel/cplb-mpu/cplbinit.c
+++ b/trunk/arch/blackfin/kernel/cplb-mpu/cplbinit.c
@@ -38,7 +38,7 @@ void __init generate_cplb_tables_cpu(unsigned int cpu)
#ifdef CONFIG_BFIN_EXTMEM_DCACHEABLE
d_cache = CPLB_L1_CHBL;
-#ifdef CONFIG_BFIN_EXTMEM_WRITETROUGH
+#ifdef CONFIG_BFIN_EXTMEM_WRITETHROUGH
d_cache |= CPLB_L1_AOW | CPLB_WT;
#endif
#endif
diff --git a/trunk/arch/blackfin/kernel/process.c b/trunk/arch/blackfin/kernel/process.c
index 430ae39456e8..5cc7e2e9e415 100644
--- a/trunk/arch/blackfin/kernel/process.c
+++ b/trunk/arch/blackfin/kernel/process.c
@@ -151,7 +151,7 @@ void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_
regs->pc = new_ip;
if (current->mm)
regs->p5 = current->mm->start_data;
-#ifdef CONFIG_SMP
+#ifndef CONFIG_SMP
task_thread_info(current)->l1_task_info.stack_start =
(void *)current->mm->context.stack_start;
task_thread_info(current)->l1_task_info.lowest_sp = (void *)new_sp;
diff --git a/trunk/arch/blackfin/kernel/ptrace.c b/trunk/arch/blackfin/kernel/ptrace.c
index 0982b5d5af10..56b0ba12175f 100644
--- a/trunk/arch/blackfin/kernel/ptrace.c
+++ b/trunk/arch/blackfin/kernel/ptrace.c
@@ -315,7 +315,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
case BFIN_MEM_ACCESS_CORE:
case BFIN_MEM_ACCESS_CORE_ONLY:
copied = access_process_vm(child, addr, &data,
- to_copy, 0);
+ to_copy, 1);
if (copied)
break;
diff --git a/trunk/arch/blackfin/mach-bf518/include/mach/anomaly.h b/trunk/arch/blackfin/mach-bf518/include/mach/anomaly.h
index e9c65390edd1..2829dd0400f1 100644
--- a/trunk/arch/blackfin/mach-bf518/include/mach/anomaly.h
+++ b/trunk/arch/blackfin/mach-bf518/include/mach/anomaly.h
@@ -1,9 +1,13 @@
/*
- * File: include/asm-blackfin/mach-bf518/anomaly.h
- * Bugs: Enter bugs at http://blackfin.uclinux.org/
+ * DO NOT EDIT THIS FILE
+ * This file is under version control at
+ * svn://sources.blackfin.uclinux.org/toolchain/trunk/proc-defs/header-frags/
+ * and can be replaced with that version at any time
+ * DO NOT EDIT THIS FILE
*
- * Copyright (C) 2004-2009 Analog Devices Inc.
- * Licensed under the GPL-2 or later.
+ * Copyright 2004-2009 Analog Devices Inc.
+ * Licensed under the ADI BSD license.
+ * https://docs.blackfin.uclinux.org/doku.php?id=adi_bsd
*/
/* This file should be up to date with:
@@ -70,6 +74,10 @@
#define ANOMALY_05000461 (1)
/* Synchronization Problem at Startup May Cause SPORT Transmit Channels to Misalign */
#define ANOMALY_05000462 (1)
+/* Interrupted 32-Bit SPORT Data Register Access Results In Underflow */
+#define ANOMALY_05000473 (1)
+/* TESTSET Instruction Cannot Be Interrupted */
+#define ANOMALY_05000477 (1)
/* Anomalies that don't exist on this proc */
#define ANOMALY_05000099 (0)
@@ -133,5 +141,7 @@
#define ANOMALY_05000450 (0)
#define ANOMALY_05000465 (0)
#define ANOMALY_05000467 (0)
+#define ANOMALY_05000474 (0)
+#define ANOMALY_05000475 (0)
#endif
diff --git a/trunk/arch/blackfin/mach-bf527/include/mach/anomaly.h b/trunk/arch/blackfin/mach-bf527/include/mach/anomaly.h
index 3f9052687fa8..02040df8ec80 100644
--- a/trunk/arch/blackfin/mach-bf527/include/mach/anomaly.h
+++ b/trunk/arch/blackfin/mach-bf527/include/mach/anomaly.h
@@ -1,14 +1,18 @@
/*
- * File: include/asm-blackfin/mach-bf527/anomaly.h
- * Bugs: Enter bugs at http://blackfin.uclinux.org/
+ * DO NOT EDIT THIS FILE
+ * This file is under version control at
+ * svn://sources.blackfin.uclinux.org/toolchain/trunk/proc-defs/header-frags/
+ * and can be replaced with that version at any time
+ * DO NOT EDIT THIS FILE
*
- * Copyright (C) 2004-2009 Analog Devices Inc.
- * Licensed under the GPL-2 or later.
+ * Copyright 2004-2009 Analog Devices Inc.
+ * Licensed under the ADI BSD license.
+ * https://docs.blackfin.uclinux.org/doku.php?id=adi_bsd
*/
/* This file should be up to date with:
* - Revision D, 08/14/2009; ADSP-BF526 Blackfin Processor Anomaly List
- * - Revision F, 03/03/2009; ADSP-BF527 Blackfin Processor Anomaly List
+ * - Revision G, 08/25/2009; ADSP-BF527 Blackfin Processor Anomaly List
*/
#ifndef _MACH_ANOMALY_H_
@@ -200,6 +204,10 @@
#define ANOMALY_05000467 (1)
/* PLL Latches Incorrect Settings During Reset */
#define ANOMALY_05000469 (1)
+/* Interrupted 32-Bit SPORT Data Register Access Results In Underflow */
+#define ANOMALY_05000473 (1)
+/* TESTSET Instruction Cannot Be Interrupted */
+#define ANOMALY_05000477 (1)
/* Anomalies that don't exist on this proc */
#define ANOMALY_05000099 (0)
@@ -250,5 +258,7 @@
#define ANOMALY_05000412 (0)
#define ANOMALY_05000447 (0)
#define ANOMALY_05000448 (0)
+#define ANOMALY_05000474 (0)
+#define ANOMALY_05000475 (0)
#endif
diff --git a/trunk/arch/blackfin/mach-bf533/include/mach/anomaly.h b/trunk/arch/blackfin/mach-bf533/include/mach/anomaly.h
index cd83db2fb1a1..9b3f7a27714d 100644
--- a/trunk/arch/blackfin/mach-bf533/include/mach/anomaly.h
+++ b/trunk/arch/blackfin/mach-bf533/include/mach/anomaly.h
@@ -1,9 +1,13 @@
/*
- * File: include/asm-blackfin/mach-bf533/anomaly.h
- * Bugs: Enter bugs at http://blackfin.uclinux.org/
+ * DO NOT EDIT THIS FILE
+ * This file is under version control at
+ * svn://sources.blackfin.uclinux.org/toolchain/trunk/proc-defs/header-frags/
+ * and can be replaced with that version at any time
+ * DO NOT EDIT THIS FILE
*
- * Copyright (C) 2004-2009 Analog Devices Inc.
- * Licensed under the GPL-2 or later.
+ * Copyright 2004-2009 Analog Devices Inc.
+ * Licensed under the ADI BSD license.
+ * https://docs.blackfin.uclinux.org/doku.php?id=adi_bsd
*/
/* This file should be up to date with:
@@ -202,6 +206,10 @@
#define ANOMALY_05000443 (1)
/* False Hardware Error when RETI Points to Invalid Memory */
#define ANOMALY_05000461 (1)
+/* Interrupted 32-Bit SPORT Data Register Access Results In Underflow */
+#define ANOMALY_05000473 (1)
+/* TESTSET Instruction Cannot Be Interrupted */
+#define ANOMALY_05000477 (1)
/* These anomalies have been "phased" out of analog.com anomaly sheets and are
* here to show running on older silicon just isn't feasible.
@@ -349,5 +357,7 @@
#define ANOMALY_05000450 (0)
#define ANOMALY_05000465 (0)
#define ANOMALY_05000467 (0)
+#define ANOMALY_05000474 (0)
+#define ANOMALY_05000475 (0)
#endif
diff --git a/trunk/arch/blackfin/mach-bf537/include/mach/anomaly.h b/trunk/arch/blackfin/mach-bf537/include/mach/anomaly.h
index f091ad2d8ea8..d2c427bc6656 100644
--- a/trunk/arch/blackfin/mach-bf537/include/mach/anomaly.h
+++ b/trunk/arch/blackfin/mach-bf537/include/mach/anomaly.h
@@ -1,9 +1,13 @@
/*
- * File: include/asm-blackfin/mach-bf537/anomaly.h
- * Bugs: Enter bugs at http://blackfin.uclinux.org/
+ * DO NOT EDIT THIS FILE
+ * This file is under version control at
+ * svn://sources.blackfin.uclinux.org/toolchain/trunk/proc-defs/header-frags/
+ * and can be replaced with that version at any time
+ * DO NOT EDIT THIS FILE
*
- * Copyright (C) 2004-2009 Analog Devices Inc.
- * Licensed under the GPL-2 or later.
+ * Copyright 2004-2009 Analog Devices Inc.
+ * Licensed under the ADI BSD license.
+ * https://docs.blackfin.uclinux.org/doku.php?id=adi_bsd
*/
/* This file should be up to date with:
@@ -156,6 +160,10 @@
#define ANOMALY_05000443 (1)
/* False Hardware Error when RETI Points to Invalid Memory */
#define ANOMALY_05000461 (1)
+/* Interrupted 32-Bit SPORT Data Register Access Results In Underflow */
+#define ANOMALY_05000473 (1)
+/* TESTSET Instruction Cannot Be Interrupted */
+#define ANOMALY_05000477 (1)
/* Anomalies that don't exist on this proc */
#define ANOMALY_05000099 (0)
@@ -202,5 +210,7 @@
#define ANOMALY_05000450 (0)
#define ANOMALY_05000465 (0)
#define ANOMALY_05000467 (0)
+#define ANOMALY_05000474 (0)
+#define ANOMALY_05000475 (0)
#endif
diff --git a/trunk/arch/blackfin/mach-bf538/include/mach/anomaly.h b/trunk/arch/blackfin/mach-bf538/include/mach/anomaly.h
index 26b76083e14c..d882b7e6f59b 100644
--- a/trunk/arch/blackfin/mach-bf538/include/mach/anomaly.h
+++ b/trunk/arch/blackfin/mach-bf538/include/mach/anomaly.h
@@ -1,9 +1,13 @@
/*
- * File: include/asm-blackfin/mach-bf538/anomaly.h
- * Bugs: Enter bugs at http://blackfin.uclinux.org/
+ * DO NOT EDIT THIS FILE
+ * This file is under version control at
+ * svn://sources.blackfin.uclinux.org/toolchain/trunk/proc-defs/header-frags/
+ * and can be replaced with that version at any time
+ * DO NOT EDIT THIS FILE
*
- * Copyright (C) 2004-2009 Analog Devices Inc.
- * Licensed under the GPL-2 or later.
+ * Copyright 2004-2009 Analog Devices Inc.
+ * Licensed under the ADI BSD license.
+ * https://docs.blackfin.uclinux.org/doku.php?id=adi_bsd
*/
/* This file should be up to date with:
@@ -128,6 +132,10 @@
#define ANOMALY_05000443 (1)
/* False Hardware Error when RETI Points to Invalid Memory */
#define ANOMALY_05000461 (1)
+/* Interrupted 32-Bit SPORT Data Register Access Results In Underflow */
+#define ANOMALY_05000473 (1)
+/* TESTSET Instruction Cannot Be Interrupted */
+#define ANOMALY_05000477 (1)
/* Anomalies that don't exist on this proc */
#define ANOMALY_05000099 (0)
@@ -176,5 +184,7 @@
#define ANOMALY_05000450 (0)
#define ANOMALY_05000465 (0)
#define ANOMALY_05000467 (0)
+#define ANOMALY_05000474 (0)
+#define ANOMALY_05000475 (0)
#endif
diff --git a/trunk/arch/blackfin/mach-bf548/include/mach/anomaly.h b/trunk/arch/blackfin/mach-bf548/include/mach/anomaly.h
index 52b116ae522a..7d08c7524498 100644
--- a/trunk/arch/blackfin/mach-bf548/include/mach/anomaly.h
+++ b/trunk/arch/blackfin/mach-bf548/include/mach/anomaly.h
@@ -1,9 +1,13 @@
/*
- * File: include/asm-blackfin/mach-bf548/anomaly.h
- * Bugs: Enter bugs at http://blackfin.uclinux.org/
+ * DO NOT EDIT THIS FILE
+ * This file is under version control at
+ * svn://sources.blackfin.uclinux.org/toolchain/trunk/proc-defs/header-frags/
+ * and can be replaced with that version at any time
+ * DO NOT EDIT THIS FILE
*
- * Copyright (C) 2004-2009 Analog Devices Inc.
- * Licensed under the GPL-2 or later.
+ * Copyright 2004-2009 Analog Devices Inc.
+ * Licensed under the ADI BSD license.
+ * https://docs.blackfin.uclinux.org/doku.php?id=adi_bsd
*/
/* This file should be up to date with:
@@ -24,6 +28,8 @@
#define ANOMALY_05000119 (1)
/* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */
#define ANOMALY_05000122 (1)
+/* Data Corruption with Cached External Memory and Non-Cached On-Chip L2 Memory */
+#define ANOMALY_05000220 (1)
/* False Hardware Error from an Access in the Shadow of a Conditional Branch */
#define ANOMALY_05000245 (1)
/* Sensitivity To Noise with Slow Input Edge Rates on External SPORT TX and RX Clocks */
@@ -200,6 +206,14 @@
#define ANOMALY_05000466 (1)
/* Possible RX data corruption when control & data EP FIFOs are accessed via the core */
#define ANOMALY_05000467 (1)
+/* Interrupted 32-Bit SPORT Data Register Access Results In Underflow */
+#define ANOMALY_05000473 (1)
+/* Access to DDR-SDRAM causes system hang under certain PLL/VR settings */
+#define ANOMALY_05000474 (1)
+/* Core Hang With L2/L3 Configured in Writeback Cache Mode */
+#define ANOMALY_05000475 (1)
+/* TESTSET Instruction Cannot Be Interrupted */
+#define ANOMALY_05000477 (1)
/* Anomalies that don't exist on this proc */
#define ANOMALY_05000099 (0)
@@ -215,7 +229,6 @@
#define ANOMALY_05000198 (0)
#define ANOMALY_05000202 (0)
#define ANOMALY_05000215 (0)
-#define ANOMALY_05000220 (0)
#define ANOMALY_05000227 (0)
#define ANOMALY_05000230 (0)
#define ANOMALY_05000231 (0)
diff --git a/trunk/arch/blackfin/mach-bf561/atomic.S b/trunk/arch/blackfin/mach-bf561/atomic.S
index 0261a5e751b3..f99f174b129f 100644
--- a/trunk/arch/blackfin/mach-bf561/atomic.S
+++ b/trunk/arch/blackfin/mach-bf561/atomic.S
@@ -19,6 +19,16 @@
\reg\().h = _corelock;
.endm
+.macro safe_testset addr:req, scratch:req
+#if ANOMALY_05000477
+ cli \scratch;
+ testset (\addr);
+ sti \scratch;
+#else
+ testset (\addr);
+#endif
+.endm
+
/*
* r0 = address of atomic data to flush and invalidate (32bit).
*
@@ -33,7 +43,7 @@ ENTRY(_get_core_lock)
cli r0;
coreslot_loadaddr p0;
.Lretry_corelock:
- testset (p0);
+ safe_testset p0, r2;
if cc jump .Ldone_corelock;
SSYNC(r2);
jump .Lretry_corelock
@@ -56,7 +66,7 @@ ENTRY(_get_core_lock_noflush)
cli r0;
coreslot_loadaddr p0;
.Lretry_corelock_noflush:
- testset (p0);
+ safe_testset p0, r2;
if cc jump .Ldone_corelock_noflush;
SSYNC(r2);
jump .Lretry_corelock_noflush
diff --git a/trunk/arch/blackfin/mach-bf561/include/mach/anomaly.h b/trunk/arch/blackfin/mach-bf561/include/mach/anomaly.h
index 70da495c9665..5ddc981e9937 100644
--- a/trunk/arch/blackfin/mach-bf561/include/mach/anomaly.h
+++ b/trunk/arch/blackfin/mach-bf561/include/mach/anomaly.h
@@ -1,9 +1,13 @@
/*
- * File: include/asm-blackfin/mach-bf561/anomaly.h
- * Bugs: Enter bugs at http://blackfin.uclinux.org/
+ * DO NOT EDIT THIS FILE
+ * This file is under version control at
+ * svn://sources.blackfin.uclinux.org/toolchain/trunk/proc-defs/header-frags/
+ * and can be replaced with that version at any time
+ * DO NOT EDIT THIS FILE
*
- * Copyright (C) 2004-2009 Analog Devices Inc.
- * Licensed under the GPL-2 or later.
+ * Copyright 2004-2009 Analog Devices Inc.
+ * Licensed under the ADI BSD license.
+ * https://docs.blackfin.uclinux.org/doku.php?id=adi_bsd
*/
/* This file should be up to date with:
@@ -213,7 +217,11 @@
/* Disabling Peripherals with DMA Running May Cause DMA System Instability */
#define ANOMALY_05000278 (__SILICON_REVISION__ < 5)
/* False Hardware Error Exception when ISR Context Is Not Restored */
-#define ANOMALY_05000281 (__SILICON_REVISION__ < 5)
+/* Temporarily walk around for bug 5423 till this issue is confirmed by
+ * official anomaly document. It looks 05000281 still exists on bf561
+ * v0.5.
+ */
+#define ANOMALY_05000281 (__SILICON_REVISION__ <= 5)
/* System MMR Write Is Stalled Indefinitely when Killed in a Particular Stage */
#define ANOMALY_05000283 (1)
/* Reads Will Receive Incorrect Data under Certain Conditions */
@@ -280,6 +288,12 @@
#define ANOMALY_05000443 (1)
/* False Hardware Error when RETI Points to Invalid Memory */
#define ANOMALY_05000461 (1)
+/* Interrupted 32-Bit SPORT Data Register Access Results In Underflow */
+#define ANOMALY_05000473 (1)
+/* Core Hang With L2/L3 Configured in Writeback Cache Mode */
+#define ANOMALY_05000475 (__SILICON_REVISION__ < 4)
+/* TESTSET Instruction Cannot Be Interrupted */
+#define ANOMALY_05000477 (1)
/* Anomalies that don't exist on this proc */
#define ANOMALY_05000119 (0)
@@ -304,5 +318,6 @@
#define ANOMALY_05000450 (0)
#define ANOMALY_05000465 (0)
#define ANOMALY_05000467 (0)
+#define ANOMALY_05000474 (0)
#endif
diff --git a/trunk/arch/blackfin/mach-common/arch_checks.c b/trunk/arch/blackfin/mach-common/arch_checks.c
index 9dbafcdcf479..f2ca211a76a0 100644
--- a/trunk/arch/blackfin/mach-common/arch_checks.c
+++ b/trunk/arch/blackfin/mach-common/arch_checks.c
@@ -57,3 +57,8 @@
(!defined(CONFIG_BFIN_EXTMEM_DCACHEABLE) && defined(CONFIG_BFIN_L2_WRITEBACK)))
# error You are exposing Anomaly 220 in this config, either config L2 as Write Through, or make External Memory WB.
#endif
+
+#if ANOMALY_05000475 && \
+ (defined(CONFIG_BFIN_EXTMEM_WRITEBACK) || defined(CONFIG_BFIN_L2_WRITEBACK))
+# error "Anomaly 475 does not allow you to use Write Back cache with L2 or External Memory"
+#endif
diff --git a/trunk/arch/blackfin/mach-common/smp.c b/trunk/arch/blackfin/mach-common/smp.c
index d98585f3237d..d92b168c8328 100644
--- a/trunk/arch/blackfin/mach-common/smp.c
+++ b/trunk/arch/blackfin/mach-common/smp.c
@@ -276,10 +276,9 @@ void smp_send_reschedule(int cpu)
if (cpu_is_offline(cpu))
return;
- msg = kmalloc(sizeof(*msg), GFP_ATOMIC);
+ msg = kzalloc(sizeof(*msg), GFP_ATOMIC);
if (!msg)
return;
- memset(msg, 0, sizeof(msg));
INIT_LIST_HEAD(&msg->list);
msg->type = BFIN_IPI_RESCHEDULE;
@@ -305,10 +304,9 @@ void smp_send_stop(void)
if (cpus_empty(callmap))
return;
- msg = kmalloc(sizeof(*msg), GFP_ATOMIC);
+ msg = kzalloc(sizeof(*msg), GFP_ATOMIC);
if (!msg)
return;
- memset(msg, 0, sizeof(msg));
INIT_LIST_HEAD(&msg->list);
msg->type = BFIN_IPI_CPU_STOP;
diff --git a/trunk/arch/frv/kernel/pm.c b/trunk/arch/frv/kernel/pm.c
index 0d4d3e3a4cfc..5fa3889d858b 100644
--- a/trunk/arch/frv/kernel/pm.c
+++ b/trunk/arch/frv/kernel/pm.c
@@ -211,37 +211,6 @@ static int cmode_procctl(ctl_table *ctl, int write,
return try_set_cmode(new_cmode)?:*lenp;
}
-static int cmode_sysctl(ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- if (oldval && oldlenp) {
- size_t oldlen;
-
- if (get_user(oldlen, oldlenp))
- return -EFAULT;
-
- if (oldlen != sizeof(int))
- return -EINVAL;
-
- if (put_user(clock_cmode_current, (unsigned __user *)oldval) ||
- put_user(sizeof(int), oldlenp))
- return -EFAULT;
- }
- if (newval && newlen) {
- int new_cmode;
-
- if (newlen != sizeof(int))
- return -EINVAL;
-
- if (get_user(new_cmode, (int __user *)newval))
- return -EFAULT;
-
- return try_set_cmode(new_cmode)?:1;
- }
- return 1;
-}
-
static int try_set_p0(int new_p0)
{
unsigned long flags, clkc;
@@ -314,37 +283,6 @@ static int p0_procctl(ctl_table *ctl, int write,
return try_set_p0(new_p0)?:*lenp;
}
-static int p0_sysctl(ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- if (oldval && oldlenp) {
- size_t oldlen;
-
- if (get_user(oldlen, oldlenp))
- return -EFAULT;
-
- if (oldlen != sizeof(int))
- return -EINVAL;
-
- if (put_user(clock_p0_current, (unsigned __user *)oldval) ||
- put_user(sizeof(int), oldlenp))
- return -EFAULT;
- }
- if (newval && newlen) {
- int new_p0;
-
- if (newlen != sizeof(int))
- return -EINVAL;
-
- if (get_user(new_p0, (int __user *)newval))
- return -EFAULT;
-
- return try_set_p0(new_p0)?:1;
- }
- return 1;
-}
-
static int cm_procctl(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *fpos)
{
@@ -358,87 +296,47 @@ static int cm_procctl(ctl_table *ctl, int write,
return try_set_cm(new_cm)?:*lenp;
}
-static int cm_sysctl(ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- if (oldval && oldlenp) {
- size_t oldlen;
-
- if (get_user(oldlen, oldlenp))
- return -EFAULT;
-
- if (oldlen != sizeof(int))
- return -EINVAL;
-
- if (put_user(clock_cm_current, (unsigned __user *)oldval) ||
- put_user(sizeof(int), oldlenp))
- return -EFAULT;
- }
- if (newval && newlen) {
- int new_cm;
-
- if (newlen != sizeof(int))
- return -EINVAL;
-
- if (get_user(new_cm, (int __user *)newval))
- return -EFAULT;
-
- return try_set_cm(new_cm)?:1;
- }
- return 1;
-}
-
-
static struct ctl_table pm_table[] =
{
{
- .ctl_name = CTL_PM_SUSPEND,
.procname = "suspend",
.data = NULL,
.maxlen = 0,
.mode = 0200,
- .proc_handler = &sysctl_pm_do_suspend,
+ .proc_handler = sysctl_pm_do_suspend,
},
{
- .ctl_name = CTL_PM_CMODE,
.procname = "cmode",
.data = &clock_cmode_current,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &cmode_procctl,
- .strategy = &cmode_sysctl,
+ .proc_handler = cmode_procctl,
},
{
- .ctl_name = CTL_PM_P0,
.procname = "p0",
.data = &clock_p0_current,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &p0_procctl,
- .strategy = &p0_sysctl,
+ .proc_handler = p0_procctl,
},
{
- .ctl_name = CTL_PM_CM,
.procname = "cm",
.data = &clock_cm_current,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &cm_procctl,
- .strategy = &cm_sysctl,
+ .proc_handler = cm_procctl,
},
- { .ctl_name = 0}
+ { }
};
static struct ctl_table pm_dir_table[] =
{
{
- .ctl_name = CTL_PM,
.procname = "pm",
.mode = 0555,
.child = pm_table,
},
- { .ctl_name = 0}
+ { }
};
/*
diff --git a/trunk/arch/frv/kernel/sysctl.c b/trunk/arch/frv/kernel/sysctl.c
index 3e9d7e03fb95..035516cb7a97 100644
--- a/trunk/arch/frv/kernel/sysctl.c
+++ b/trunk/arch/frv/kernel/sysctl.c
@@ -176,21 +176,19 @@ static int procctl_frv_pin_cxnr(ctl_table *table, int write, struct file *filp,
static struct ctl_table frv_table[] =
{
{
- .ctl_name = 1,
.procname = "cache-mode",
.data = NULL,
.maxlen = 0,
.mode = 0644,
- .proc_handler = &procctl_frv_cachemode,
+ .proc_handler = procctl_frv_cachemode,
},
#ifdef CONFIG_MMU
{
- .ctl_name = 2,
.procname = "pin-cxnr",
.data = NULL,
.maxlen = 0,
.mode = 0644,
- .proc_handler = &procctl_frv_pin_cxnr
+ .proc_handler = procctl_frv_pin_cxnr
},
#endif
{}
@@ -203,7 +201,6 @@ static struct ctl_table frv_table[] =
static struct ctl_table frv_dir_table[] =
{
{
- .ctl_name = CTL_FRV,
.procname = "frv",
.mode = 0555,
.child = frv_table
diff --git a/trunk/arch/ia64/ia32/ia32_entry.S b/trunk/arch/ia64/ia32/ia32_entry.S
index af9405cd70e5..10c37510f4b4 100644
--- a/trunk/arch/ia64/ia32/ia32_entry.S
+++ b/trunk/arch/ia64/ia32/ia32_entry.S
@@ -327,7 +327,7 @@ ia32_syscall_table:
data8 compat_sys_writev
data8 sys_getsid
data8 sys_fdatasync
- data8 sys32_sysctl
+ data8 compat_sys_sysctl
data8 sys_mlock /* 150 */
data8 sys_munlock
data8 sys_mlockall
diff --git a/trunk/arch/ia64/ia32/sys_ia32.c b/trunk/arch/ia64/ia32/sys_ia32.c
index 625ed8f76fce..429ec968c9ee 100644
--- a/trunk/arch/ia64/ia32/sys_ia32.c
+++ b/trunk/arch/ia64/ia32/sys_ia32.c
@@ -1628,61 +1628,6 @@ sys32_msync (unsigned int start, unsigned int len, int flags)
return sys_msync(addr, len + (start - addr), flags);
}
-struct sysctl32 {
- unsigned int name;
- int nlen;
- unsigned int oldval;
- unsigned int oldlenp;
- unsigned int newval;
- unsigned int newlen;
- unsigned int __unused[4];
-};
-
-#ifdef CONFIG_SYSCTL_SYSCALL
-asmlinkage long
-sys32_sysctl (struct sysctl32 __user *args)
-{
- struct sysctl32 a32;
- mm_segment_t old_fs = get_fs ();
- void __user *oldvalp, *newvalp;
- size_t oldlen;
- int __user *namep;
- long ret;
-
- if (copy_from_user(&a32, args, sizeof(a32)))
- return -EFAULT;
-
- /*
- * We need to pre-validate these because we have to disable address checking
- * before calling do_sysctl() because of OLDLEN but we can't run the risk of the
- * user specifying bad addresses here. Well, since we're dealing with 32 bit
- * addresses, we KNOW that access_ok() will always succeed, so this is an
- * expensive NOP, but so what...
- */
- namep = (int __user *) compat_ptr(a32.name);
- oldvalp = compat_ptr(a32.oldval);
- newvalp = compat_ptr(a32.newval);
-
- if ((oldvalp && get_user(oldlen, (int __user *) compat_ptr(a32.oldlenp)))
- || !access_ok(VERIFY_WRITE, namep, 0)
- || !access_ok(VERIFY_WRITE, oldvalp, 0)
- || !access_ok(VERIFY_WRITE, newvalp, 0))
- return -EFAULT;
-
- set_fs(KERNEL_DS);
- lock_kernel();
- ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *) &oldlen,
- newvalp, (size_t) a32.newlen);
- unlock_kernel();
- set_fs(old_fs);
-
- if (oldvalp && put_user (oldlen, (int __user *) compat_ptr(a32.oldlenp)))
- return -EFAULT;
-
- return ret;
-}
-#endif
-
asmlinkage long
sys32_newuname (struct new_utsname __user *name)
{
diff --git a/trunk/arch/ia64/include/asm/swiotlb.h b/trunk/arch/ia64/include/asm/swiotlb.h
index dcbaea7ce128..f0acde68aaea 100644
--- a/trunk/arch/ia64/include/asm/swiotlb.h
+++ b/trunk/arch/ia64/include/asm/swiotlb.h
@@ -4,8 +4,6 @@
#include
#include
-extern int swiotlb_force;
-
#ifdef CONFIG_SWIOTLB
extern int swiotlb;
extern void pci_swiotlb_init(void);
diff --git a/trunk/arch/ia64/kernel/crash.c b/trunk/arch/ia64/kernel/crash.c
index 6631a9dfafdc..b942f4032d7a 100644
--- a/trunk/arch/ia64/kernel/crash.c
+++ b/trunk/arch/ia64/kernel/crash.c
@@ -239,32 +239,29 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
#ifdef CONFIG_SYSCTL
static ctl_table kdump_ctl_table[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "kdump_on_init",
.data = &kdump_on_init,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "kdump_on_fatal_mca",
.data = &kdump_on_fatal_mca,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
- { .ctl_name = 0 }
+ { }
};
static ctl_table sys_table[] = {
{
- .ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
.child = kdump_ctl_table,
},
- { .ctl_name = 0 }
+ { }
};
#endif
diff --git a/trunk/arch/ia64/kernel/pci-swiotlb.c b/trunk/arch/ia64/kernel/pci-swiotlb.c
index 285aae8431c6..53292abf846c 100644
--- a/trunk/arch/ia64/kernel/pci-swiotlb.c
+++ b/trunk/arch/ia64/kernel/pci-swiotlb.c
@@ -41,7 +41,7 @@ struct dma_map_ops swiotlb_dma_ops = {
void __init swiotlb_dma_init(void)
{
dma_ops = &swiotlb_dma_ops;
- swiotlb_init();
+ swiotlb_init(1);
}
void __init pci_swiotlb_init(void)
@@ -51,7 +51,7 @@ void __init pci_swiotlb_init(void)
swiotlb = 1;
printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
machvec_init("dig");
- swiotlb_init();
+ swiotlb_init(1);
dma_ops = &swiotlb_dma_ops;
#else
panic("Unable to find Intel IOMMU");
diff --git a/trunk/arch/ia64/kernel/perfmon.c b/trunk/arch/ia64/kernel/perfmon.c
index f1782705b1f7..402698b6689f 100644
--- a/trunk/arch/ia64/kernel/perfmon.c
+++ b/trunk/arch/ia64/kernel/perfmon.c
@@ -522,42 +522,37 @@ EXPORT_SYMBOL(pfm_sysctl);
static ctl_table pfm_ctl_table[]={
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "debug",
.data = &pfm_sysctl.debug,
.maxlen = sizeof(int),
.mode = 0666,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "debug_ovfl",
.data = &pfm_sysctl.debug_ovfl,
.maxlen = sizeof(int),
.mode = 0666,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "fastctxsw",
.data = &pfm_sysctl.fastctxsw,
.maxlen = sizeof(int),
.mode = 0600,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "expert_mode",
.data = &pfm_sysctl.expert_mode,
.maxlen = sizeof(int),
.mode = 0600,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{}
};
static ctl_table pfm_sysctl_dir[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "perfmon",
.mode = 0555,
.child = pfm_ctl_table,
@@ -566,7 +561,6 @@ static ctl_table pfm_sysctl_dir[] = {
};
static ctl_table pfm_sysctl_root[] = {
{
- .ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
.child = pfm_sysctl_dir,
diff --git a/trunk/arch/microblaze/include/asm/prom.h b/trunk/arch/microblaze/include/asm/prom.h
index 37e6f305a68e..ef3ec1d6ceb3 100644
--- a/trunk/arch/microblaze/include/asm/prom.h
+++ b/trunk/arch/microblaze/include/asm/prom.h
@@ -12,23 +12,15 @@
* 2 of the License, or (at your option) any later version.
*/
+#include /* linux/of.h gets to determine #include ordering */
+
#ifndef _ASM_MICROBLAZE_PROM_H
#define _ASM_MICROBLAZE_PROM_H
#ifdef __KERNEL__
-
-/* Definitions used by the flattened device tree */
-#define OF_DT_HEADER 0xd00dfeed /* marker */
-#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */
-#define OF_DT_END_NODE 0x2 /* End node */
-#define OF_DT_PROP 0x3 /* Property: name off, size, content */
-#define OF_DT_NOP 0x4 /* nop */
-#define OF_DT_END 0x9
-
-#define OF_DT_VERSION 0x10
-
#ifndef __ASSEMBLY__
#include
+#include
#include
#include
#include
@@ -41,122 +33,19 @@
#define of_prop_cmp(s1, s2) strcmp((s1), (s2))
#define of_node_cmp(s1, s2) strcasecmp((s1), (s2))
-/*
- * This is what gets passed to the kernel by prom_init or kexec
- *
- * The dt struct contains the device tree structure, full pathes and
- * property contents. The dt strings contain a separate block with just
- * the strings for the property names, and is fully page aligned and
- * self contained in a page, so that it can be kept around by the kernel,
- * each property name appears only once in this page (cheap compression)
- *
- * the mem_rsvmap contains a map of reserved ranges of physical memory,
- * passing it here instead of in the device-tree itself greatly simplifies
- * the job of everybody. It's just a list of u64 pairs (base/size) that
- * ends when size is 0
- */
-struct boot_param_header {
- u32 magic; /* magic word OF_DT_HEADER */
- u32 totalsize; /* total size of DT block */
- u32 off_dt_struct; /* offset to structure */
- u32 off_dt_strings; /* offset to strings */
- u32 off_mem_rsvmap; /* offset to memory reserve map */
- u32 version; /* format version */
- u32 last_comp_version; /* last compatible version */
- /* version 2 fields below */
- u32 boot_cpuid_phys; /* Physical CPU id we're booting on */
- /* version 3 fields below */
- u32 dt_strings_size; /* size of the DT strings block */
- /* version 17 fields below */
- u32 dt_struct_size; /* size of the DT structure block */
-};
-
-typedef u32 phandle;
-typedef u32 ihandle;
-
-struct property {
- char *name;
- int length;
- void *value;
- struct property *next;
-};
-
-struct device_node {
- const char *name;
- const char *type;
- phandle node;
- phandle linux_phandle;
- char *full_name;
-
- struct property *properties;
- struct property *deadprops; /* removed properties */
- struct device_node *parent;
- struct device_node *child;
- struct device_node *sibling;
- struct device_node *next; /* next device of same type */
- struct device_node *allnext; /* next in list of all nodes */
- struct proc_dir_entry *pde; /* this node's proc directory */
- struct kref kref;
- unsigned long _flags;
- void *data;
-};
-
extern struct device_node *of_chosen;
-static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
-{
- return test_bit(flag, &n->_flags);
-}
-
-static inline void of_node_set_flag(struct device_node *n, unsigned long flag)
-{
- set_bit(flag, &n->_flags);
-}
-
#define HAVE_ARCH_DEVTREE_FIXUPS
-static inline void set_node_proc_entry(struct device_node *dn,
- struct proc_dir_entry *de)
-{
- dn->pde = de;
-}
-
extern struct device_node *allnodes; /* temporary while merging */
extern rwlock_t devtree_lock; /* temporary while merging */
-extern struct device_node *of_find_all_nodes(struct device_node *prev);
-extern struct device_node *of_node_get(struct device_node *node);
-extern void of_node_put(struct device_node *node);
-
-/* For scanning the flat device-tree at boot time */
-extern int __init of_scan_flat_dt(int (*it)(unsigned long node,
- const char *uname, int depth,
- void *data),
- void *data);
-extern void *__init of_get_flat_dt_prop(unsigned long node, const char *name,
- unsigned long *size);
-extern int __init
- of_flat_dt_is_compatible(unsigned long node, const char *name);
-extern unsigned long __init of_get_flat_dt_root(void);
-
/* For updating the device tree at runtime */
extern void of_attach_node(struct device_node *);
extern void of_detach_node(struct device_node *);
/* Other Prototypes */
-extern void finish_device_tree(void);
-extern void unflatten_device_tree(void);
extern int early_uartlite_console(void);
-extern void early_init_devtree(void *);
-extern int machine_is_compatible(const char *compat);
-extern void print_properties(struct device_node *node);
-extern int prom_n_intr_cells(struct device_node *np);
-extern void prom_get_irq_senses(unsigned char *senses, int off, int max);
-extern int prom_add_property(struct device_node *np, struct property *prop);
-extern int prom_remove_property(struct device_node *np, struct property *prop);
-extern int prom_update_property(struct device_node *np,
- struct property *newprop,
- struct property *oldprop);
extern struct resource *request_OF_resource(struct device_node *node,
int index, const char *name_postfix);
@@ -166,18 +55,6 @@ extern int release_OF_resource(struct device_node *node, int index);
* OF address retreival & translation
*/
-/* Helper to read a big number; size is in cells (not bytes) */
-static inline u64 of_read_number(const u32 *cell, int size)
-{
- u64 r = 0;
- while (size--)
- r = (r << 32) | *(cell++);
- return r;
-}
-
-/* Like of_read_number, but we want an unsigned long result */
-#define of_read_ulong(cell, size) of_read_number(cell, size)
-
/* Translate an OF address block into a CPU physical address
*/
extern u64 of_translate_address(struct device_node *np, const u32 *addr);
@@ -305,12 +182,6 @@ extern int of_irq_to_resource(struct device_node *dev, int index,
*/
extern void __iomem *of_iomap(struct device_node *device, int index);
-/*
- * NB: This is here while we transition from using asm/prom.h
- * to linux/of.h
- */
-#include
-
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_MICROBLAZE_PROM_H */
diff --git a/trunk/arch/microblaze/kernel/head.S b/trunk/arch/microblaze/kernel/head.S
index 697ce3007f30..30916193fcc7 100644
--- a/trunk/arch/microblaze/kernel/head.S
+++ b/trunk/arch/microblaze/kernel/head.S
@@ -31,7 +31,7 @@
#include
#include
#include
-#include /* for OF_DT_HEADER */
+#include /* for OF_DT_HEADER */
#ifdef CONFIG_MMU
#include /* COMMAND_LINE_SIZE */
diff --git a/trunk/arch/microblaze/kernel/prom.c b/trunk/arch/microblaze/kernel/prom.c
index c005cc6f1aaf..b817df172aa9 100644
--- a/trunk/arch/microblaze/kernel/prom.c
+++ b/trunk/arch/microblaze/kernel/prom.c
@@ -859,29 +859,6 @@ struct device_node *of_find_node_by_phandle(phandle handle)
}
EXPORT_SYMBOL(of_find_node_by_phandle);
-/**
- * of_find_all_nodes - Get next node in global list
- * @prev: Previous node or NULL to start iteration
- * of_node_put() will be called on it
- *
- * Returns a node pointer with refcount incremented, use
- * of_node_put() on it when done.
- */
-struct device_node *of_find_all_nodes(struct device_node *prev)
-{
- struct device_node *np;
-
- read_lock(&devtree_lock);
- np = prev ? prev->allnext : allnodes;
- for (; np != NULL; np = np->allnext)
- if (of_node_get(np))
- break;
- of_node_put(prev);
- read_unlock(&devtree_lock);
- return np;
-}
-EXPORT_SYMBOL(of_find_all_nodes);
-
/**
* of_node_get - Increment refcount of a node
* @node: Node to inc refcount, NULL is supported to
diff --git a/trunk/arch/mips/Kconfig b/trunk/arch/mips/Kconfig
index 1aad0d9f5074..fd7620f025fa 100644
--- a/trunk/arch/mips/Kconfig
+++ b/trunk/arch/mips/Kconfig
@@ -358,7 +358,14 @@ config SGI_IP22
select SWAP_IO_SPACE
select SYS_HAS_CPU_R4X00
select SYS_HAS_CPU_R5000
- select SYS_HAS_EARLY_PRINTK
+ #
+ # Disable EARLY_PRINTK for now since it leads to overwritten prom
+ # memory during early boot on some machines.
+ #
+ # See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com
+ # for a more details discussion
+ #
+ # select SYS_HAS_EARLY_PRINTK
select SYS_SUPPORTS_32BIT_KERNEL
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_BIG_ENDIAN
@@ -410,7 +417,14 @@ config SGI_IP28
select SGI_HAS_ZILOG
select SWAP_IO_SPACE
select SYS_HAS_CPU_R10000
- select SYS_HAS_EARLY_PRINTK
+ #
+ # Disable EARLY_PRINTK for now since it leads to overwritten prom
+ # memory during early boot on some machines.
+ #
+ # See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com
+ # for a more details discussion
+ #
+ # select SYS_HAS_EARLY_PRINTK
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_BIG_ENDIAN
help
@@ -1439,6 +1453,7 @@ choice
config PAGE_SIZE_4KB
bool "4kB"
+ depends on !CPU_LOONGSON2
help
This option select the standard 4kB Linux page size. On some
R3000-family processors this is the only available page size. Using
@@ -1763,7 +1778,7 @@ config SYS_SUPPORTS_SMARTMIPS
config ARCH_FLATMEM_ENABLE
def_bool y
- depends on !NUMA
+ depends on !NUMA && !CPU_LOONGSON2
config ARCH_DISCONTIGMEM_ENABLE
bool
diff --git a/trunk/arch/mips/include/asm/bug.h b/trunk/arch/mips/include/asm/bug.h
index 6cf29c26e873..540c98a810d1 100644
--- a/trunk/arch/mips/include/asm/bug.h
+++ b/trunk/arch/mips/include/asm/bug.h
@@ -11,9 +11,7 @@
static inline void __noreturn BUG(void)
{
__asm__ __volatile__("break %0" : : "i" (BRK_BUG));
- /* Fool GCC into thinking the function doesn't return. */
- while (1)
- ;
+ unreachable();
}
#define HAVE_ARCH_BUG
diff --git a/trunk/arch/mips/include/asm/mman.h b/trunk/arch/mips/include/asm/mman.h
index a2250f390a29..c892bfb3e2c1 100644
--- a/trunk/arch/mips/include/asm/mman.h
+++ b/trunk/arch/mips/include/asm/mman.h
@@ -75,6 +75,7 @@
#define MADV_MERGEABLE 12 /* KSM may merge identical pages */
#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
+#define MADV_HWPOISON 100 /* poison a page for testing */
/* compatibility flags */
#define MAP_FILE 0
diff --git a/trunk/arch/mips/include/asm/system.h b/trunk/arch/mips/include/asm/system.h
index fcf5f98d90cc..83b5509e09e8 100644
--- a/trunk/arch/mips/include/asm/system.h
+++ b/trunk/arch/mips/include/asm/system.h
@@ -12,6 +12,7 @@
#ifndef _ASM_SYSTEM_H
#define _ASM_SYSTEM_H
+#include
#include
#include
@@ -193,10 +194,6 @@ extern __u64 __xchg_u64_unsupported_on_32bit_kernels(volatile __u64 * m, __u64 v
#define __xchg_u64 __xchg_u64_unsupported_on_32bit_kernels
#endif
-/* This function doesn't exist, so you'll get a linker error
- if something tries to do an invalid xchg(). */
-extern void __xchg_called_with_bad_pointer(void);
-
static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
{
switch (size) {
@@ -205,11 +202,17 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
case 8:
return __xchg_u64(ptr, x);
}
- __xchg_called_with_bad_pointer();
+
return x;
}
-#define xchg(ptr, x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
+#define xchg(ptr, x) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) & ~0xc); \
+ \
+ ((__typeof__(*(ptr))) \
+ __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \
+})
extern void set_handler(unsigned long offset, void *addr, unsigned long len);
extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len);
diff --git a/trunk/arch/mips/kernel/linux32.c b/trunk/arch/mips/kernel/linux32.c
index b77fefaff9da..1a2793efdc4e 100644
--- a/trunk/arch/mips/kernel/linux32.c
+++ b/trunk/arch/mips/kernel/linux32.c
@@ -265,67 +265,6 @@ SYSCALL_DEFINE5(n32_msgrcv, int, msqid, u32, msgp, size_t, msgsz,
}
#endif
-struct sysctl_args32
-{
- compat_caddr_t name;
- int nlen;
- compat_caddr_t oldval;
- compat_caddr_t oldlenp;
- compat_caddr_t newval;
- compat_size_t newlen;
- unsigned int __unused[4];
-};
-
-#ifdef CONFIG_SYSCTL_SYSCALL
-
-SYSCALL_DEFINE1(32_sysctl, struct sysctl_args32 __user *, args)
-{
- struct sysctl_args32 tmp;
- int error;
- size_t oldlen;
- size_t __user *oldlenp = NULL;
- unsigned long addr = (((unsigned long)&args->__unused[0]) + 7) & ~7;
-
- if (copy_from_user(&tmp, args, sizeof(tmp)))
- return -EFAULT;
-
- if (tmp.oldval && tmp.oldlenp) {
- /* Duh, this is ugly and might not work if sysctl_args
- is in read-only memory, but do_sysctl does indirectly
- a lot of uaccess in both directions and we'd have to
- basically copy the whole sysctl.c here, and
- glibc's __sysctl uses rw memory for the structure
- anyway. */
- if (get_user(oldlen, (u32 __user *)A(tmp.oldlenp)) ||
- put_user(oldlen, (size_t __user *)addr))
- return -EFAULT;
- oldlenp = (size_t __user *)addr;
- }
-
- lock_kernel();
- error = do_sysctl((int __user *)A(tmp.name), tmp.nlen, (void __user *)A(tmp.oldval),
- oldlenp, (void __user *)A(tmp.newval), tmp.newlen);
- unlock_kernel();
- if (oldlenp) {
- if (!error) {
- if (get_user(oldlen, (size_t __user *)addr) ||
- put_user(oldlen, (u32 __user *)A(tmp.oldlenp)))
- error = -EFAULT;
- }
- copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused));
- }
- return error;
-}
-
-#else
-
-SYSCALL_DEFINE1(32_sysctl, struct sysctl_args32 __user *, args)
-{
- return -ENOSYS;
-}
-
-#endif /* CONFIG_SYSCTL_SYSCALL */
-
SYSCALL_DEFINE1(32_newuname, struct new_utsname __user *, name)
{
int ret = 0;
diff --git a/trunk/arch/mips/kernel/scall64-n32.S b/trunk/arch/mips/kernel/scall64-n32.S
index 6ebc07976694..8a0be0bdebad 100644
--- a/trunk/arch/mips/kernel/scall64-n32.S
+++ b/trunk/arch/mips/kernel/scall64-n32.S
@@ -272,7 +272,7 @@ EXPORT(sysn32_call_table)
PTR sys_munlockall
PTR sys_vhangup /* 6150 */
PTR sys_pivot_root
- PTR sys_32_sysctl
+ PTR compat_sys_sysctl
PTR sys_prctl
PTR compat_sys_adjtimex
PTR compat_sys_setrlimit /* 6155 */
diff --git a/trunk/arch/mips/kernel/scall64-o32.S b/trunk/arch/mips/kernel/scall64-o32.S
index 14dde4ca932e..41dbdb7d67e5 100644
--- a/trunk/arch/mips/kernel/scall64-o32.S
+++ b/trunk/arch/mips/kernel/scall64-o32.S
@@ -356,7 +356,7 @@ sys_call_table:
PTR sys_ni_syscall /* 4150 */
PTR sys_getsid
PTR sys_fdatasync
- PTR sys_32_sysctl
+ PTR compat_sys_sysctl
PTR sys_mlock
PTR sys_munlock /* 4155 */
PTR sys_mlockall
diff --git a/trunk/arch/mips/kernel/syscall.c b/trunk/arch/mips/kernel/syscall.c
index 3fe1fcfa2e73..fe0d79805603 100644
--- a/trunk/arch/mips/kernel/syscall.c
+++ b/trunk/arch/mips/kernel/syscall.c
@@ -306,6 +306,7 @@ static inline int mips_atomic_set(struct pt_regs *regs,
if (cpu_has_llsc && R10000_LLSC_WAR) {
__asm__ __volatile__ (
+ " .set mips3 \n"
" li %[err], 0 \n"
"1: ll %[old], (%[addr]) \n"
" move %[tmp], %[new] \n"
@@ -320,6 +321,7 @@ static inline int mips_atomic_set(struct pt_regs *regs,
" "STR(PTR)" 1b, 4b \n"
" "STR(PTR)" 2b, 4b \n"
" .previous \n"
+ " .set mips0 \n"
: [old] "=&r" (old),
[err] "=&r" (err),
[tmp] "=&r" (tmp)
@@ -329,6 +331,7 @@ static inline int mips_atomic_set(struct pt_regs *regs,
: "memory");
} else if (cpu_has_llsc) {
__asm__ __volatile__ (
+ " .set mips3 \n"
" li %[err], 0 \n"
"1: ll %[old], (%[addr]) \n"
" move %[tmp], %[new] \n"
@@ -347,6 +350,7 @@ static inline int mips_atomic_set(struct pt_regs *regs,
" "STR(PTR)" 1b, 5b \n"
" "STR(PTR)" 2b, 5b \n"
" .previous \n"
+ " .set mips0 \n"
: [old] "=&r" (old),
[err] "=&r" (err),
[tmp] "=&r" (tmp)
diff --git a/trunk/arch/mips/lasat/sysctl.c b/trunk/arch/mips/lasat/sysctl.c
index b3deed8db619..14b9a28a4aec 100644
--- a/trunk/arch/mips/lasat/sysctl.c
+++ b/trunk/arch/mips/lasat/sysctl.c
@@ -37,23 +37,6 @@
#include "ds1603.h"
#endif
-/* Strategy function to write EEPROM after changing string entry */
-int sysctl_lasatstring(ctl_table *table,
- void *oldval, size_t *oldlenp,
- void *newval, size_t newlen)
-{
- int r;
-
- r = sysctl_string(table, oldval, oldlenp, newval, newlen);
- if (r < 0)
- return r;
-
- if (newval && newlen)
- lasat_write_eeprom_info();
-
- return 0;
-}
-
/* And the same for proc */
int proc_dolasatstring(ctl_table *table, int write,
@@ -113,46 +96,6 @@ int proc_dolasatrtc(ctl_table *table, int write,
}
#endif
-/* Sysctl for setting the IP addresses */
-int sysctl_lasat_intvec(ctl_table *table,
- void *oldval, size_t *oldlenp,
- void *newval, size_t newlen)
-{
- int r;
-
- r = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
- if (r < 0)
- return r;
-
- if (newval && newlen)
- lasat_write_eeprom_info();
-
- return 0;
-}
-
-#ifdef CONFIG_DS1603
-/* Same for RTC */
-int sysctl_lasat_rtc(ctl_table *table,
- void *oldval, size_t *oldlenp,
- void *newval, size_t newlen)
-{
- struct timespec ts;
- int r;
-
- read_persistent_clock(&ts);
- rtctmp = ts.tv_sec;
- if (rtctmp < 0)
- rtctmp = 0;
- r = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
- if (r < 0)
- return r;
- if (newval && newlen)
- rtc_mips_set_mmss(rtctmp);
-
- return r;
-}
-#endif
-
#ifdef CONFIG_INET
int proc_lasat_ip(ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -214,23 +157,6 @@ int proc_lasat_ip(ctl_table *table, int write,
}
#endif
-static int sysctl_lasat_prid(ctl_table *table,
- void *oldval, size_t *oldlenp,
- void *newval, size_t newlen)
-{
- int r;
-
- r = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
- if (r < 0)
- return r;
- if (newval && newlen) {
- lasat_board_info.li_eeprom_info.prid = *(int *)newval;
- lasat_write_eeprom_info();
- lasat_init_board_info();
- }
- return 0;
-}
-
int proc_lasat_prid(ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -252,115 +178,92 @@ extern int lasat_boot_to_service;
static ctl_table lasat_table[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "cpu-hz",
.data = &lasat_board_info.li_cpu_hz,
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
- .strategy = &sysctl_intvec
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "bus-hz",
.data = &lasat_board_info.li_bus_hz,
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
- .strategy = &sysctl_intvec
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "bmid",
.data = &lasat_board_info.li_bmid,
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
- .strategy = &sysctl_intvec
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "prid",
.data = &lasat_board_info.li_prid,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_lasat_prid,
- .strategy = &sysctl_lasat_prid
- },
+ .proc_handler = proc_lasat_prid,
+. },
#ifdef CONFIG_INET
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "ipaddr",
.data = &lasat_board_info.li_eeprom_info.ipaddr,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_lasat_ip,
- .strategy = &sysctl_lasat_intvec
+ .proc_handler = proc_lasat_ip,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "netmask",
.data = &lasat_board_info.li_eeprom_info.netmask,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_lasat_ip,
- .strategy = &sysctl_lasat_intvec
+ .proc_handler = proc_lasat_ip,
},
#endif
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "passwd_hash",
.data = &lasat_board_info.li_eeprom_info.passwd_hash,
.maxlen =
sizeof(lasat_board_info.li_eeprom_info.passwd_hash),
.mode = 0600,
- .proc_handler = &proc_dolasatstring,
- .strategy = &sysctl_lasatstring
+ .proc_handler = proc_dolasatstring,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "boot-service",
.data = &lasat_boot_to_service,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
- .strategy = &sysctl_intvec
+ .proc_handler = proc_dointvec,
},
#ifdef CONFIG_DS1603
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "rtc",
.data = &rtctmp,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dolasatrtc,
- .strategy = &sysctl_lasat_rtc
+ .proc_handler = proc_dolasatrtc,
},
#endif
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "namestr",
.data = &lasat_board_info.li_namestr,
.maxlen = sizeof(lasat_board_info.li_namestr),
.mode = 0444,
- .proc_handler = &proc_dostring,
- .strategy = &sysctl_string
+ .proc_handler = proc_dostring,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "typestr",
.data = &lasat_board_info.li_typestr,
.maxlen = sizeof(lasat_board_info.li_typestr),
.mode = 0444,
- .proc_handler = &proc_dostring,
- .strategy = &sysctl_string
+ .proc_handler = proc_dostring,
},
{}
};
static ctl_table lasat_root_table[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "lasat",
.mode = 0555,
.child = lasat_table
diff --git a/trunk/arch/mips/rb532/devices.c b/trunk/arch/mips/rb532/devices.c
index 9f40e1ff9b4f..041fc1afc3f4 100644
--- a/trunk/arch/mips/rb532/devices.c
+++ b/trunk/arch/mips/rb532/devices.c
@@ -110,7 +110,6 @@ static struct korina_device korina_dev0_data = {
static struct platform_device korina_dev0 = {
.id = -1,
.name = "korina",
- .dev.driver_data = &korina_dev0_data,
.resource = korina_dev0_res,
.num_resources = ARRAY_SIZE(korina_dev0_res),
};
@@ -332,6 +331,8 @@ static int __init plat_setup_devices(void)
/* set the uart clock to the current cpu frequency */
rb532_uart_res[0].uartclk = idt_cpu_freq;
+ dev_set_drvdata(&korina_dev0.dev, &korina_dev0_data);
+
return platform_add_devices(rb532_devs, ARRAY_SIZE(rb532_devs));
}
diff --git a/trunk/arch/parisc/kernel/sys_parisc32.c b/trunk/arch/parisc/kernel/sys_parisc32.c
index 561388b17c91..76d23ec8dfaa 100644
--- a/trunk/arch/parisc/kernel/sys_parisc32.c
+++ b/trunk/arch/parisc/kernel/sys_parisc32.c
@@ -90,77 +90,6 @@ asmlinkage long sys32_unimplemented(int r26, int r25, int r24, int r23,
return -ENOSYS;
}
-#ifdef CONFIG_SYSCTL
-
-struct __sysctl_args32 {
- u32 name;
- int nlen;
- u32 oldval;
- u32 oldlenp;
- u32 newval;
- u32 newlen;
- u32 __unused[4];
-};
-
-asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args)
-{
-#ifndef CONFIG_SYSCTL_SYSCALL
- return -ENOSYS;
-#else
- struct __sysctl_args32 tmp;
- int error;
- unsigned int oldlen32;
- size_t oldlen, __user *oldlenp = NULL;
- unsigned long addr = (((long __force)&args->__unused[0]) + 7) & ~7;
-
- DBG(("sysctl32(%p)\n", args));
-
- if (copy_from_user(&tmp, args, sizeof(tmp)))
- return -EFAULT;
-
- if (tmp.oldval && tmp.oldlenp) {
- /* Duh, this is ugly and might not work if sysctl_args
- is in read-only memory, but do_sysctl does indirectly
- a lot of uaccess in both directions and we'd have to
- basically copy the whole sysctl.c here, and
- glibc's __sysctl uses rw memory for the structure
- anyway. */
- /* a possibly better hack than this, which will avoid the
- * problem if the struct is read only, is to push the
- * 'oldlen' value out to the user's stack instead. -PB
- */
- if (get_user(oldlen32, (u32 *)(u64)tmp.oldlenp))
- return -EFAULT;
- oldlen = oldlen32;
- if (put_user(oldlen, (size_t *)addr))
- return -EFAULT;
- oldlenp = (size_t *)addr;
- }
-
- lock_kernel();
- error = do_sysctl((int __user *)(u64)tmp.name, tmp.nlen,
- (void __user *)(u64)tmp.oldval, oldlenp,
- (void __user *)(u64)tmp.newval, tmp.newlen);
- unlock_kernel();
- if (oldlenp) {
- if (!error) {
- if (get_user(oldlen, (size_t *)addr)) {
- error = -EFAULT;
- } else {
- oldlen32 = oldlen;
- if (put_user(oldlen32, (u32 *)(u64)tmp.oldlenp))
- error = -EFAULT;
- }
- }
- if (copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused)))
- error = -EFAULT;
- }
- return error;
-#endif
-}
-
-#endif /* CONFIG_SYSCTL */
-
asmlinkage long sys32_sched_rr_get_interval(pid_t pid,
struct compat_timespec __user *interval)
{
diff --git a/trunk/arch/parisc/kernel/syscall_table.S b/trunk/arch/parisc/kernel/syscall_table.S
index 843f423dec67..01c4fcf8f481 100644
--- a/trunk/arch/parisc/kernel/syscall_table.S
+++ b/trunk/arch/parisc/kernel/syscall_table.S
@@ -234,7 +234,7 @@
ENTRY_SAME(getsid)
ENTRY_SAME(fdatasync)
/* struct __sysctl_args is a mess */
- ENTRY_DIFF(sysctl)
+ ENTRY_COMP(sysctl)
ENTRY_SAME(mlock) /* 150 */
ENTRY_SAME(munlock)
ENTRY_SAME(mlockall)
diff --git a/trunk/arch/parisc/kernel/unwind.c b/trunk/arch/parisc/kernel/unwind.c
index 69dad5a850a8..a36799e85693 100644
--- a/trunk/arch/parisc/kernel/unwind.c
+++ b/trunk/arch/parisc/kernel/unwind.c
@@ -28,7 +28,7 @@
#define dbg(x...)
#endif
-#define KERNEL_START (KERNEL_BINARY_TEXT_START - 0x1000)
+#define KERNEL_START (KERNEL_BINARY_TEXT_START)
extern struct unwind_table_entry __start___unwind[];
extern struct unwind_table_entry __stop___unwind[];
diff --git a/trunk/arch/parisc/kernel/vmlinux.lds.S b/trunk/arch/parisc/kernel/vmlinux.lds.S
index fda4baa059b5..9dab4a4e09f7 100644
--- a/trunk/arch/parisc/kernel/vmlinux.lds.S
+++ b/trunk/arch/parisc/kernel/vmlinux.lds.S
@@ -78,9 +78,6 @@ SECTIONS
*/
. = ALIGN(PAGE_SIZE);
data_start = .;
- EXCEPTION_TABLE(16)
-
- NOTES
/* unwind info */
.PARISC.unwind : {
@@ -89,6 +86,9 @@ SECTIONS
__stop___unwind = .;
}
+ EXCEPTION_TABLE(16)
+ NOTES
+
/* Data */
RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
diff --git a/trunk/arch/powerpc/Kconfig.debug b/trunk/arch/powerpc/Kconfig.debug
index 3b1005185390..bf3382f1904d 100644
--- a/trunk/arch/powerpc/Kconfig.debug
+++ b/trunk/arch/powerpc/Kconfig.debug
@@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE
config HCALL_STATS
bool "Hypervisor call instrumentation"
- depends on PPC_PSERIES && DEBUG_FS
+ depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
help
Adds code to keep track of the number of hypervisor calls made and
the amount of time spent in hypervisor calls. Wall time spent in
diff --git a/trunk/arch/powerpc/configs/pseries_defconfig b/trunk/arch/powerpc/configs/pseries_defconfig
index f1889abb89b1..c568329723b8 100644
--- a/trunk/arch/powerpc/configs/pseries_defconfig
+++ b/trunk/arch/powerpc/configs/pseries_defconfig
@@ -1683,7 +1683,7 @@ CONFIG_HAVE_ARCH_KGDB=y
CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_DEBUG_PAGEALLOC is not set
-CONFIG_HCALL_STATS=y
+# CONFIG_HCALL_STATS is not set
# CONFIG_CODE_PATCHING_SELFTEST is not set
# CONFIG_FTR_FIXUP_SELFTEST is not set
# CONFIG_MSI_BITMAP_SELFTEST is not set
diff --git a/trunk/arch/powerpc/include/asm/emulated_ops.h b/trunk/arch/powerpc/include/asm/emulated_ops.h
index 9154e8526732..f0fb4fc1f6e6 100644
--- a/trunk/arch/powerpc/include/asm/emulated_ops.h
+++ b/trunk/arch/powerpc/include/asm/emulated_ops.h
@@ -19,6 +19,7 @@
#define _ASM_POWERPC_EMULATED_OPS_H
#include
+#include
#ifdef CONFIG_PPC_EMULATED_STATS
@@ -57,7 +58,7 @@ extern u32 ppc_warn_emulated;
extern void ppc_warn_emulated_print(const char *type);
-#define PPC_WARN_EMULATED(type) \
+#define __PPC_WARN_EMULATED(type) \
do { \
atomic_inc(&ppc_emulated.type.val); \
if (ppc_warn_emulated) \
@@ -66,8 +67,22 @@ extern void ppc_warn_emulated_print(const char *type);
#else /* !CONFIG_PPC_EMULATED_STATS */
-#define PPC_WARN_EMULATED(type) do { } while (0)
+#define __PPC_WARN_EMULATED(type) do { } while (0)
#endif /* !CONFIG_PPC_EMULATED_STATS */
+#define PPC_WARN_EMULATED(type, regs) \
+ do { \
+ perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \
+ 1, 0, regs, 0); \
+ __PPC_WARN_EMULATED(type); \
+ } while (0)
+
+#define PPC_WARN_ALIGNMENT(type, regs) \
+ do { \
+ perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \
+ 1, 0, regs, regs->dar); \
+ __PPC_WARN_EMULATED(type); \
+ } while (0)
+
#endif /* _ASM_POWERPC_EMULATED_OPS_H */
diff --git a/trunk/arch/powerpc/include/asm/hvcall.h b/trunk/arch/powerpc/include/asm/hvcall.h
index 6251a4b10be7..c27caac47ad1 100644
--- a/trunk/arch/powerpc/include/asm/hvcall.h
+++ b/trunk/arch/powerpc/include/asm/hvcall.h
@@ -274,6 +274,8 @@ struct hcall_stats {
unsigned long num_calls; /* number of calls (on this CPU) */
unsigned long tb_total; /* total wall time (mftb) of calls. */
unsigned long purr_total; /* total cpu time (PURR) of calls. */
+ unsigned long tb_start;
+ unsigned long purr_start;
};
#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
diff --git a/trunk/arch/powerpc/include/asm/kmap_types.h b/trunk/arch/powerpc/include/asm/kmap_types.h
index b6bac6f61c16..916369575c97 100644
--- a/trunk/arch/powerpc/include/asm/kmap_types.h
+++ b/trunk/arch/powerpc/include/asm/kmap_types.h
@@ -29,5 +29,16 @@ enum km_type {
KM_TYPE_NR
};
+/*
+ * This is a temporary build fix that (so they say on lkml....) should no longer
+ * be required after 2.6.33, because of changes planned to the kmap code.
+ * Let's try to remove this cruft then.
+ */
+#ifdef CONFIG_DEBUG_HIGHMEM
+#define KM_NMI (-1)
+#define KM_NMI_PTE (-1)
+#define KM_IRQ_PTE (-1)
+#endif
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_KMAP_TYPES_H */
diff --git a/trunk/arch/powerpc/include/asm/prom.h b/trunk/arch/powerpc/include/asm/prom.h
index 6ff04185d2aa..2ab9cbd98826 100644
--- a/trunk/arch/powerpc/include/asm/prom.h
+++ b/trunk/arch/powerpc/include/asm/prom.h
@@ -1,3 +1,4 @@
+#include /* linux/of.h gets to determine #include ordering */
#ifndef _POWERPC_PROM_H
#define _POWERPC_PROM_H
#ifdef __KERNEL__
@@ -16,6 +17,7 @@
* 2 of the License, or (at your option) any later version.
*/
#include
+#include
#include
#include
#include
@@ -28,133 +30,14 @@
#define of_prop_cmp(s1, s2) strcmp((s1), (s2))
#define of_node_cmp(s1, s2) strcasecmp((s1), (s2))
-/* Definitions used by the flattened device tree */
-#define OF_DT_HEADER 0xd00dfeed /* marker */
-#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */
-#define OF_DT_END_NODE 0x2 /* End node */
-#define OF_DT_PROP 0x3 /* Property: name off, size,
- * content */
-#define OF_DT_NOP 0x4 /* nop */
-#define OF_DT_END 0x9
-
-#define OF_DT_VERSION 0x10
-
-/*
- * This is what gets passed to the kernel by prom_init or kexec
- *
- * The dt struct contains the device tree structure, full pathes and
- * property contents. The dt strings contain a separate block with just
- * the strings for the property names, and is fully page aligned and
- * self contained in a page, so that it can be kept around by the kernel,
- * each property name appears only once in this page (cheap compression)
- *
- * the mem_rsvmap contains a map of reserved ranges of physical memory,
- * passing it here instead of in the device-tree itself greatly simplifies
- * the job of everybody. It's just a list of u64 pairs (base/size) that
- * ends when size is 0
- */
-struct boot_param_header
-{
- u32 magic; /* magic word OF_DT_HEADER */
- u32 totalsize; /* total size of DT block */
- u32 off_dt_struct; /* offset to structure */
- u32 off_dt_strings; /* offset to strings */
- u32 off_mem_rsvmap; /* offset to memory reserve map */
- u32 version; /* format version */
- u32 last_comp_version; /* last compatible version */
- /* version 2 fields below */
- u32 boot_cpuid_phys; /* Physical CPU id we're booting on */
- /* version 3 fields below */
- u32 dt_strings_size; /* size of the DT strings block */
- /* version 17 fields below */
- u32 dt_struct_size; /* size of the DT structure block */
-};
-
-
-
-typedef u32 phandle;
-typedef u32 ihandle;
-
-struct property {
- char *name;
- int length;
- void *value;
- struct property *next;
-};
-
-struct device_node {
- const char *name;
- const char *type;
- phandle node;
- phandle linux_phandle;
- char *full_name;
-
- struct property *properties;
- struct property *deadprops; /* removed properties */
- struct device_node *parent;
- struct device_node *child;
- struct device_node *sibling;
- struct device_node *next; /* next device of same type */
- struct device_node *allnext; /* next in list of all nodes */
- struct proc_dir_entry *pde; /* this node's proc directory */
- struct kref kref;
- unsigned long _flags;
- void *data;
-};
-
extern struct device_node *of_chosen;
-static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
-{
- return test_bit(flag, &n->_flags);
-}
-
-static inline void of_node_set_flag(struct device_node *n, unsigned long flag)
-{
- set_bit(flag, &n->_flags);
-}
-
-
#define HAVE_ARCH_DEVTREE_FIXUPS
-static inline void set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de)
-{
- dn->pde = de;
-}
-
-
-extern struct device_node *of_find_all_nodes(struct device_node *prev);
-extern struct device_node *of_node_get(struct device_node *node);
-extern void of_node_put(struct device_node *node);
-
-/* For scanning the flat device-tree at boot time */
-extern int __init of_scan_flat_dt(int (*it)(unsigned long node,
- const char *uname, int depth,
- void *data),
- void *data);
-extern void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
- unsigned long *size);
-extern int __init of_flat_dt_is_compatible(unsigned long node, const char *name);
-extern unsigned long __init of_get_flat_dt_root(void);
-
/* For updating the device tree at runtime */
extern void of_attach_node(struct device_node *);
extern void of_detach_node(struct device_node *);
-/* Other Prototypes */
-extern void finish_device_tree(void);
-extern void unflatten_device_tree(void);
-extern void early_init_devtree(void *);
-extern int machine_is_compatible(const char *compat);
-extern void print_properties(struct device_node *node);
-extern int prom_n_intr_cells(struct device_node* np);
-extern void prom_get_irq_senses(unsigned char *senses, int off, int max);
-extern int prom_add_property(struct device_node* np, struct property* prop);
-extern int prom_remove_property(struct device_node *np, struct property *prop);
-extern int prom_update_property(struct device_node *np,
- struct property *newprop,
- struct property *oldprop);
-
#ifdef CONFIG_PPC32
/*
* PCI <-> OF matching functions
@@ -178,26 +61,6 @@ extern int release_OF_resource(struct device_node* node, int index);
* OF address retreival & translation
*/
-
-/* Helper to read a big number; size is in cells (not bytes) */
-static inline u64 of_read_number(const u32 *cell, int size)
-{
- u64 r = 0;
- while (size--)
- r = (r << 32) | *(cell++);
- return r;
-}
-
-/* Like of_read_number, but we want an unsigned long result */
-#ifdef CONFIG_PPC32
-static inline unsigned long of_read_ulong(const u32 *cell, int size)
-{
- return cell[size-1];
-}
-#else
-#define of_read_ulong(cell, size) of_read_number(cell, size)
-#endif
-
/* Translate an OF address block into a CPU physical address
*/
extern u64 of_translate_address(struct device_node *np, const u32 *addr);
@@ -349,11 +212,5 @@ extern int of_irq_to_resource(struct device_node *dev, int index,
*/
extern void __iomem *of_iomap(struct device_node *device, int index);
-/*
- * NB: This is here while we transition from using asm/prom.h
- * to linux/of.h
- */
-#include
-
#endif /* __KERNEL__ */
#endif /* _POWERPC_PROM_H */
diff --git a/trunk/arch/powerpc/include/asm/reg.h b/trunk/arch/powerpc/include/asm/reg.h
index 6315edc205d8..bc8dd53f718a 100644
--- a/trunk/arch/powerpc/include/asm/reg.h
+++ b/trunk/arch/powerpc/include/asm/reg.h
@@ -489,6 +489,8 @@
#define SPRN_MMCR1 798
#define SPRN_MMCRA 0x312
#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
+#define MMCRA_SDAR_DCACHE_MISS 0x40000000UL
+#define MMCRA_SDAR_ERAT_MISS 0x20000000UL
#define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */
#define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */
#define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */
diff --git a/trunk/arch/powerpc/include/asm/trace.h b/trunk/arch/powerpc/include/asm/trace.h
new file mode 100644
index 000000000000..cbe2297d68b6
--- /dev/null
+++ b/trunk/arch/powerpc/include/asm/trace.h
@@ -0,0 +1,133 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM powerpc
+
+#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWERPC_H
+
+#include
+
+struct pt_regs;
+
+TRACE_EVENT(irq_entry,
+
+ TP_PROTO(struct pt_regs *regs),
+
+ TP_ARGS(regs),
+
+ TP_STRUCT__entry(
+ __field(struct pt_regs *, regs)
+ ),
+
+ TP_fast_assign(
+ __entry->regs = regs;
+ ),
+
+ TP_printk("pt_regs=%p", __entry->regs)
+);
+
+TRACE_EVENT(irq_exit,
+
+ TP_PROTO(struct pt_regs *regs),
+
+ TP_ARGS(regs),
+
+ TP_STRUCT__entry(
+ __field(struct pt_regs *, regs)
+ ),
+
+ TP_fast_assign(
+ __entry->regs = regs;
+ ),
+
+ TP_printk("pt_regs=%p", __entry->regs)
+);
+
+TRACE_EVENT(timer_interrupt_entry,
+
+ TP_PROTO(struct pt_regs *regs),
+
+ TP_ARGS(regs),
+
+ TP_STRUCT__entry(
+ __field(struct pt_regs *, regs)
+ ),
+
+ TP_fast_assign(
+ __entry->regs = regs;
+ ),
+
+ TP_printk("pt_regs=%p", __entry->regs)
+);
+
+TRACE_EVENT(timer_interrupt_exit,
+
+ TP_PROTO(struct pt_regs *regs),
+
+ TP_ARGS(regs),
+
+ TP_STRUCT__entry(
+ __field(struct pt_regs *, regs)
+ ),
+
+ TP_fast_assign(
+ __entry->regs = regs;
+ ),
+
+ TP_printk("pt_regs=%p", __entry->regs)
+);
+
+#ifdef CONFIG_PPC_PSERIES
+extern void hcall_tracepoint_regfunc(void);
+extern void hcall_tracepoint_unregfunc(void);
+
+TRACE_EVENT_FN(hcall_entry,
+
+ TP_PROTO(unsigned long opcode, unsigned long *args),
+
+ TP_ARGS(opcode, args),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, opcode)
+ ),
+
+ TP_fast_assign(
+ __entry->opcode = opcode;
+ ),
+
+ TP_printk("opcode=%lu", __entry->opcode),
+
+ hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
+);
+
+TRACE_EVENT_FN(hcall_exit,
+
+ TP_PROTO(unsigned long opcode, unsigned long retval,
+ unsigned long *retbuf),
+
+ TP_ARGS(opcode, retval, retbuf),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, opcode)
+ __field(unsigned long, retval)
+ ),
+
+ TP_fast_assign(
+ __entry->opcode = opcode;
+ __entry->retval = retval;
+ ),
+
+ TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
+
+ hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
+);
+#endif
+
+#endif /* _TRACE_POWERPC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm
+#define TRACE_INCLUDE_FILE trace
+
+#include
diff --git a/trunk/arch/powerpc/kernel/align.c b/trunk/arch/powerpc/kernel/align.c
index a5b632e52fae..3839839f83c7 100644
--- a/trunk/arch/powerpc/kernel/align.c
+++ b/trunk/arch/powerpc/kernel/align.c
@@ -732,7 +732,7 @@ int fix_alignment(struct pt_regs *regs)
#ifdef CONFIG_SPE
if ((instr >> 26) == 0x4) {
- PPC_WARN_EMULATED(spe);
+ PPC_WARN_ALIGNMENT(spe, regs);
return emulate_spe(regs, reg, instr);
}
#endif
@@ -786,7 +786,7 @@ int fix_alignment(struct pt_regs *regs)
flags |= SPLT;
nb = 8;
}
- PPC_WARN_EMULATED(vsx);
+ PPC_WARN_ALIGNMENT(vsx, regs);
return emulate_vsx(addr, reg, areg, regs, flags, nb);
}
#endif
@@ -794,7 +794,7 @@ int fix_alignment(struct pt_regs *regs)
* the exception of DCBZ which is handled as a special case here
*/
if (instr == DCBZ) {
- PPC_WARN_EMULATED(dcbz);
+ PPC_WARN_ALIGNMENT(dcbz, regs);
return emulate_dcbz(regs, addr);
}
if (unlikely(nb == 0))
@@ -804,7 +804,7 @@ int fix_alignment(struct pt_regs *regs)
* function
*/
if (flags & M) {
- PPC_WARN_EMULATED(multiple);
+ PPC_WARN_ALIGNMENT(multiple, regs);
return emulate_multiple(regs, addr, reg, nb,
flags, instr, swiz);
}
@@ -825,11 +825,11 @@ int fix_alignment(struct pt_regs *regs)
/* Special case for 16-byte FP loads and stores */
if (nb == 16) {
- PPC_WARN_EMULATED(fp_pair);
+ PPC_WARN_ALIGNMENT(fp_pair, regs);
return emulate_fp_pair(addr, reg, flags);
}
- PPC_WARN_EMULATED(unaligned);
+ PPC_WARN_ALIGNMENT(unaligned, regs);
/* If we are loading, get the data from user space, else
* get it from register values
diff --git a/trunk/arch/powerpc/kernel/entry_64.S b/trunk/arch/powerpc/kernel/entry_64.S
index 9763267e38b4..bdcb557d470a 100644
--- a/trunk/arch/powerpc/kernel/entry_64.S
+++ b/trunk/arch/powerpc/kernel/entry_64.S
@@ -551,7 +551,7 @@ restore:
BEGIN_FW_FTR_SECTION
ld r5,SOFTE(r1)
FW_FTR_SECTION_ELSE
- b iseries_check_pending_irqs
+ b .Liseries_check_pending_irqs
ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
2:
TRACE_AND_RESTORE_IRQ(r5);
@@ -623,7 +623,7 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
#endif /* CONFIG_PPC_BOOK3E */
-iseries_check_pending_irqs:
+.Liseries_check_pending_irqs:
#ifdef CONFIG_PPC_ISERIES
ld r5,SOFTE(r1)
cmpdi 0,r5,0
diff --git a/trunk/arch/powerpc/kernel/exceptions-64s.S b/trunk/arch/powerpc/kernel/exceptions-64s.S
index 1808876edcc9..c7eb4e0eb86c 100644
--- a/trunk/arch/powerpc/kernel/exceptions-64s.S
+++ b/trunk/arch/powerpc/kernel/exceptions-64s.S
@@ -185,12 +185,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
* prolog code of the PerformanceMonitor one. A little
* trickery is thus necessary
*/
+performance_monitor_pSeries_1:
. = 0xf00
b performance_monitor_pSeries
+altivec_unavailable_pSeries_1:
. = 0xf20
b altivec_unavailable_pSeries
+vsx_unavailable_pSeries_1:
. = 0xf40
b vsx_unavailable_pSeries
diff --git a/trunk/arch/powerpc/kernel/idle.c b/trunk/arch/powerpc/kernel/idle.c
index 88d9c1d5e5fb..049dda60e475 100644
--- a/trunk/arch/powerpc/kernel/idle.c
+++ b/trunk/arch/powerpc/kernel/idle.c
@@ -110,18 +110,16 @@ int powersave_nap;
*/
static ctl_table powersave_nap_ctl_table[]={
{
- .ctl_name = KERN_PPC_POWERSAVE_NAP,
.procname = "powersave-nap",
.data = &powersave_nap,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{}
};
static ctl_table powersave_nap_sysctl_root[] = {
{
- .ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
.child = powersave_nap_ctl_table,
diff --git a/trunk/arch/powerpc/kernel/irq.c b/trunk/arch/powerpc/kernel/irq.c
index e5d121177984..02a334662cc0 100644
--- a/trunk/arch/powerpc/kernel/irq.c
+++ b/trunk/arch/powerpc/kernel/irq.c
@@ -70,6 +70,8 @@
#include
#include
#endif
+#define CREATE_TRACE_POINTS
+#include
int __irq_offset_value;
static int ppc_spurious_interrupts;
@@ -325,6 +327,8 @@ void do_IRQ(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
unsigned int irq;
+ trace_irq_entry(regs);
+
irq_enter();
check_stack_overflow();
@@ -348,6 +352,8 @@ void do_IRQ(struct pt_regs *regs)
timer_interrupt(regs);
}
#endif
+
+ trace_irq_exit(regs);
}
void __init init_IRQ(void)
diff --git a/trunk/arch/powerpc/kernel/perf_event.c b/trunk/arch/powerpc/kernel/perf_event.c
index 87f1663584b0..1eb85fbf53a5 100644
--- a/trunk/arch/powerpc/kernel/perf_event.c
+++ b/trunk/arch/powerpc/kernel/perf_event.c
@@ -1165,7 +1165,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
*/
if (record) {
struct perf_sample_data data = {
- .addr = 0,
+ .addr = ~0ULL,
.period = event->hw.last_period,
};
diff --git a/trunk/arch/powerpc/kernel/power5+-pmu.c b/trunk/arch/powerpc/kernel/power5+-pmu.c
index 0f4c1c73a6ad..199de527d411 100644
--- a/trunk/arch/powerpc/kernel/power5+-pmu.c
+++ b/trunk/arch/powerpc/kernel/power5+-pmu.c
@@ -72,10 +72,6 @@
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0x7f
-/*
- * Bits in MMCRA
- */
-
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
diff --git a/trunk/arch/powerpc/kernel/power5-pmu.c b/trunk/arch/powerpc/kernel/power5-pmu.c
index c351b3a57fbb..98b6a729a9dd 100644
--- a/trunk/arch/powerpc/kernel/power5-pmu.c
+++ b/trunk/arch/powerpc/kernel/power5-pmu.c
@@ -72,10 +72,6 @@
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0x7f
-/*
- * Bits in MMCRA
- */
-
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
@@ -390,7 +386,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], unsigned long mmcr[])
{
unsigned long mmcr1 = 0;
- unsigned long mmcra = 0;
+ unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
unsigned int pmc, unit, byte, psel;
unsigned int ttm, grp;
int i, isbus, bit, grsel;
diff --git a/trunk/arch/powerpc/kernel/power6-pmu.c b/trunk/arch/powerpc/kernel/power6-pmu.c
index ca399ba5034c..84a607bda8fb 100644
--- a/trunk/arch/powerpc/kernel/power6-pmu.c
+++ b/trunk/arch/powerpc/kernel/power6-pmu.c
@@ -178,7 +178,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], unsigned long mmcr[])
{
unsigned long mmcr1 = 0;
- unsigned long mmcra = 0;
+ unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
int i;
unsigned int pmc, ev, b, u, s, psel;
unsigned int ttmset = 0;
diff --git a/trunk/arch/powerpc/kernel/power7-pmu.c b/trunk/arch/powerpc/kernel/power7-pmu.c
index 28a4daacdc02..852f7b7f6b40 100644
--- a/trunk/arch/powerpc/kernel/power7-pmu.c
+++ b/trunk/arch/powerpc/kernel/power7-pmu.c
@@ -50,10 +50,6 @@
#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
#define MMCR1_PMCSEL_MSK 0xff
-/*
- * Bits in MMCRA
- */
-
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
@@ -230,7 +226,7 @@ static int power7_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], unsigned long mmcr[])
{
unsigned long mmcr1 = 0;
- unsigned long mmcra = 0;
+ unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
unsigned int pmc, unit, combine, l2sel, psel;
unsigned int pmc_inuse = 0;
int i;
diff --git a/trunk/arch/powerpc/kernel/ppc970-pmu.c b/trunk/arch/powerpc/kernel/ppc970-pmu.c
index 479574413a93..8eff48e20dba 100644
--- a/trunk/arch/powerpc/kernel/ppc970-pmu.c
+++ b/trunk/arch/powerpc/kernel/ppc970-pmu.c
@@ -83,10 +83,6 @@ static short mmcr1_adder_bits[8] = {
MMCR1_PMC8_ADDER_SEL_SH
};
-/*
- * Bits in MMCRA
- */
-
/*
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
diff --git a/trunk/arch/powerpc/kernel/prom.c b/trunk/arch/powerpc/kernel/prom.c
index d4405b95bfaa..4ec300862466 100644
--- a/trunk/arch/powerpc/kernel/prom.c
+++ b/trunk/arch/powerpc/kernel/prom.c
@@ -1316,29 +1316,6 @@ struct device_node *of_find_next_cache_node(struct device_node *np)
return NULL;
}
-/**
- * of_find_all_nodes - Get next node in global list
- * @prev: Previous node or NULL to start iteration
- * of_node_put() will be called on it
- *
- * Returns a node pointer with refcount incremented, use
- * of_node_put() on it when done.
- */
-struct device_node *of_find_all_nodes(struct device_node *prev)
-{
- struct device_node *np;
-
- read_lock(&devtree_lock);
- np = prev ? prev->allnext : allnodes;
- for (; np != 0; np = np->allnext)
- if (of_node_get(np))
- break;
- of_node_put(prev);
- read_unlock(&devtree_lock);
- return np;
-}
-EXPORT_SYMBOL(of_find_all_nodes);
-
/**
* of_node_get - Increment refcount of a node
* @node: Node to inc refcount, NULL is supported to
diff --git a/trunk/arch/powerpc/kernel/setup-common.c b/trunk/arch/powerpc/kernel/setup-common.c
index 4271f7a655a3..845c72ab7357 100644
--- a/trunk/arch/powerpc/kernel/setup-common.c
+++ b/trunk/arch/powerpc/kernel/setup-common.c
@@ -660,6 +660,7 @@ late_initcall(check_cache_coherency);
#ifdef CONFIG_DEBUG_FS
struct dentry *powerpc_debugfs_root;
+EXPORT_SYMBOL(powerpc_debugfs_root);
static int powerpc_debugfs_init(void)
{
diff --git a/trunk/arch/powerpc/kernel/setup_32.c b/trunk/arch/powerpc/kernel/setup_32.c
index 53bcf3d792db..b152de3e64d4 100644
--- a/trunk/arch/powerpc/kernel/setup_32.c
+++ b/trunk/arch/powerpc/kernel/setup_32.c
@@ -345,7 +345,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_SWIOTLB
if (ppc_swiotlb_enable)
- swiotlb_init();
+ swiotlb_init(1);
#endif
paging_init();
diff --git a/trunk/arch/powerpc/kernel/setup_64.c b/trunk/arch/powerpc/kernel/setup_64.c
index 04f638d82fb3..df2c9e932b37 100644
--- a/trunk/arch/powerpc/kernel/setup_64.c
+++ b/trunk/arch/powerpc/kernel/setup_64.c
@@ -550,7 +550,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_SWIOTLB
if (ppc_swiotlb_enable)
- swiotlb_init();
+ swiotlb_init(1);
#endif
paging_init();
diff --git a/trunk/arch/powerpc/kernel/sys_ppc32.c b/trunk/arch/powerpc/kernel/sys_ppc32.c
index b97c2d67f4ac..c5a4732bcc48 100644
--- a/trunk/arch/powerpc/kernel/sys_ppc32.c
+++ b/trunk/arch/powerpc/kernel/sys_ppc32.c
@@ -520,58 +520,6 @@ asmlinkage long compat_sys_umask(u32 mask)
return sys_umask((int)mask);
}
-#ifdef CONFIG_SYSCTL_SYSCALL
-struct __sysctl_args32 {
- u32 name;
- int nlen;
- u32 oldval;
- u32 oldlenp;
- u32 newval;
- u32 newlen;
- u32 __unused[4];
-};
-
-asmlinkage long compat_sys_sysctl(struct __sysctl_args32 __user *args)
-{
- struct __sysctl_args32 tmp;
- int error;
- size_t oldlen;
- size_t __user *oldlenp = NULL;
- unsigned long addr = (((unsigned long)&args->__unused[0]) + 7) & ~7;
-
- if (copy_from_user(&tmp, args, sizeof(tmp)))
- return -EFAULT;
-
- if (tmp.oldval && tmp.oldlenp) {
- /* Duh, this is ugly and might not work if sysctl_args
- is in read-only memory, but do_sysctl does indirectly
- a lot of uaccess in both directions and we'd have to
- basically copy the whole sysctl.c here, and
- glibc's __sysctl uses rw memory for the structure
- anyway. */
- oldlenp = (size_t __user *)addr;
- if (get_user(oldlen, (compat_size_t __user *)compat_ptr(tmp.oldlenp)) ||
- put_user(oldlen, oldlenp))
- return -EFAULT;
- }
-
- lock_kernel();
- error = do_sysctl(compat_ptr(tmp.name), tmp.nlen,
- compat_ptr(tmp.oldval), oldlenp,
- compat_ptr(tmp.newval), tmp.newlen);
- unlock_kernel();
- if (oldlenp) {
- if (!error) {
- if (get_user(oldlen, oldlenp) ||
- put_user(oldlen, (compat_size_t __user *)compat_ptr(tmp.oldlenp)))
- error = -EFAULT;
- }
- copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused));
- }
- return error;
-}
-#endif
-
unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
diff --git a/trunk/arch/powerpc/kernel/time.c b/trunk/arch/powerpc/kernel/time.c
index a136a11c490d..36707dec94d7 100644
--- a/trunk/arch/powerpc/kernel/time.c
+++ b/trunk/arch/powerpc/kernel/time.c
@@ -54,6 +54,7 @@
#include
#include
#include
+#include
#include
#include
@@ -571,6 +572,8 @@ void timer_interrupt(struct pt_regs * regs)
struct clock_event_device *evt = &decrementer->event;
u64 now;
+ trace_timer_interrupt_entry(regs);
+
/* Ensure a positive value is written to the decrementer, or else
* some CPUs will continuue to take decrementer exceptions */
set_dec(DECREMENTER_MAX);
@@ -590,6 +593,7 @@ void timer_interrupt(struct pt_regs * regs)
now = decrementer->next_tb - now;
if (now <= DECREMENTER_MAX)
set_dec((int)now);
+ trace_timer_interrupt_exit(regs);
return;
}
old_regs = set_irq_regs(regs);
@@ -620,6 +624,8 @@ void timer_interrupt(struct pt_regs * regs)
irq_exit();
set_irq_regs(old_regs);
+
+ trace_timer_interrupt_exit(regs);
}
void wakeup_decrementer(void)
diff --git a/trunk/arch/powerpc/kernel/traps.c b/trunk/arch/powerpc/kernel/traps.c
index 6f0ae1a9bfae..9d1f9354d6ca 100644
--- a/trunk/arch/powerpc/kernel/traps.c
+++ b/trunk/arch/powerpc/kernel/traps.c
@@ -759,7 +759,7 @@ static int emulate_instruction(struct pt_regs *regs)
/* Emulate the mfspr rD, PVR. */
if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
- PPC_WARN_EMULATED(mfpvr);
+ PPC_WARN_EMULATED(mfpvr, regs);
rd = (instword >> 21) & 0x1f;
regs->gpr[rd] = mfspr(SPRN_PVR);
return 0;
@@ -767,7 +767,7 @@ static int emulate_instruction(struct pt_regs *regs)
/* Emulating the dcba insn is just a no-op. */
if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
- PPC_WARN_EMULATED(dcba);
+ PPC_WARN_EMULATED(dcba, regs);
return 0;
}
@@ -776,7 +776,7 @@ static int emulate_instruction(struct pt_regs *regs)
int shift = (instword >> 21) & 0x1c;
unsigned long msk = 0xf0000000UL >> shift;
- PPC_WARN_EMULATED(mcrxr);
+ PPC_WARN_EMULATED(mcrxr, regs);
regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
regs->xer &= ~0xf0000000UL;
return 0;
@@ -784,19 +784,19 @@ static int emulate_instruction(struct pt_regs *regs)
/* Emulate load/store string insn. */
if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
- PPC_WARN_EMULATED(string);
+ PPC_WARN_EMULATED(string, regs);
return emulate_string_inst(regs, instword);
}
/* Emulate the popcntb (Population Count Bytes) instruction. */
if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
- PPC_WARN_EMULATED(popcntb);
+ PPC_WARN_EMULATED(popcntb, regs);
return emulate_popcntb_inst(regs, instword);
}
/* Emulate isel (Integer Select) instruction */
if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
- PPC_WARN_EMULATED(isel);
+ PPC_WARN_EMULATED(isel, regs);
return emulate_isel(regs, instword);
}
@@ -995,7 +995,7 @@ void SoftwareEmulation(struct pt_regs *regs)
#ifdef CONFIG_MATH_EMULATION
errcode = do_mathemu(regs);
if (errcode >= 0)
- PPC_WARN_EMULATED(math);
+ PPC_WARN_EMULATED(math, regs);
switch (errcode) {
case 0:
@@ -1018,7 +1018,7 @@ void SoftwareEmulation(struct pt_regs *regs)
#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
errcode = Soft_emulate_8xx(regs);
if (errcode >= 0)
- PPC_WARN_EMULATED(8xx);
+ PPC_WARN_EMULATED(8xx, regs);
switch (errcode) {
case 0:
@@ -1129,7 +1129,7 @@ void altivec_assist_exception(struct pt_regs *regs)
flush_altivec_to_thread(current);
- PPC_WARN_EMULATED(altivec);
+ PPC_WARN_EMULATED(altivec, regs);
err = emulate_altivec(regs);
if (err == 0) {
regs->nip += 4; /* skip emulated instruction */
diff --git a/trunk/arch/powerpc/lib/copypage_64.S b/trunk/arch/powerpc/lib/copypage_64.S
index 75f3267fdc30..e68beac0a171 100644
--- a/trunk/arch/powerpc/lib/copypage_64.S
+++ b/trunk/arch/powerpc/lib/copypage_64.S
@@ -26,11 +26,11 @@ BEGIN_FTR_SECTION
srd r8,r5,r11
mtctr r8
-setup:
+.Lsetup:
dcbt r9,r4
dcbz r9,r3
add r9,r9,r12
- bdnz setup
+ bdnz .Lsetup
END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
addi r3,r3,-8
srdi r8,r5,7 /* page is copied in 128 byte strides */
diff --git a/trunk/arch/powerpc/platforms/pseries/hvCall.S b/trunk/arch/powerpc/platforms/pseries/hvCall.S
index c1427b3634ec..383a5d0e9818 100644
--- a/trunk/arch/powerpc/platforms/pseries/hvCall.S
+++ b/trunk/arch/powerpc/platforms/pseries/hvCall.S
@@ -14,68 +14,94 @@
#define STK_PARM(i) (48 + ((i)-3)*8)
-#ifdef CONFIG_HCALL_STATS
+#ifdef CONFIG_TRACEPOINTS
+
+ .section ".toc","aw"
+
+ .globl hcall_tracepoint_refcount
+hcall_tracepoint_refcount:
+ .llong 0
+
+ .section ".text"
+
/*
* precall must preserve all registers. use unused STK_PARM()
- * areas to save snapshots and opcode.
+ * areas to save snapshots and opcode. We branch around this
+ * in early init (eg when populating the MMU hashtable) by using an
+ * unconditional cpu feature.
*/
-#define HCALL_INST_PRECALL \
- std r3,STK_PARM(r3)(r1); /* save opcode */ \
- mftb r0; /* get timebase and */ \
- std r0,STK_PARM(r5)(r1); /* save for later */ \
+#define HCALL_INST_PRECALL(FIRST_REG) \
BEGIN_FTR_SECTION; \
- mfspr r0,SPRN_PURR; /* get PURR and */ \
- std r0,STK_PARM(r6)(r1); /* save for later */ \
-END_FTR_SECTION_IFSET(CPU_FTR_PURR);
-
+ b 1f; \
+END_FTR_SECTION(0, 1); \
+ ld r12,hcall_tracepoint_refcount@toc(r2); \
+ cmpdi r12,0; \
+ beq+ 1f; \
+ mflr r0; \
+ std r3,STK_PARM(r3)(r1); \
+ std r4,STK_PARM(r4)(r1); \
+ std r5,STK_PARM(r5)(r1); \
+ std r6,STK_PARM(r6)(r1); \
+ std r7,STK_PARM(r7)(r1); \
+ std r8,STK_PARM(r8)(r1); \
+ std r9,STK_PARM(r9)(r1); \
+ std r10,STK_PARM(r10)(r1); \
+ std r0,16(r1); \
+ addi r4,r1,STK_PARM(FIRST_REG); \
+ stdu r1,-STACK_FRAME_OVERHEAD(r1); \
+ bl .__trace_hcall_entry; \
+ addi r1,r1,STACK_FRAME_OVERHEAD; \
+ ld r0,16(r1); \
+ ld r3,STK_PARM(r3)(r1); \
+ ld r4,STK_PARM(r4)(r1); \
+ ld r5,STK_PARM(r5)(r1); \
+ ld r6,STK_PARM(r6)(r1); \
+ ld r7,STK_PARM(r7)(r1); \
+ ld r8,STK_PARM(r8)(r1); \
+ ld r9,STK_PARM(r9)(r1); \
+ ld r10,STK_PARM(r10)(r1); \
+ mtlr r0; \
+1:
+
/*
* postcall is performed immediately before function return which
* allows liberal use of volatile registers. We branch around this
* in early init (eg when populating the MMU hashtable) by using an
* unconditional cpu feature.
*/
-#define HCALL_INST_POSTCALL \
+#define __HCALL_INST_POSTCALL \
BEGIN_FTR_SECTION; \
b 1f; \
END_FTR_SECTION(0, 1); \
- ld r4,STK_PARM(r3)(r1); /* validate opcode */ \
- cmpldi cr7,r4,MAX_HCALL_OPCODE; \
- bgt- cr7,1f; \
- \
- /* get time and PURR snapshots after hcall */ \
- mftb r7; /* timebase after */ \
-BEGIN_FTR_SECTION; \
- mfspr r8,SPRN_PURR; /* PURR after */ \
- ld r6,STK_PARM(r6)(r1); /* PURR before */ \
- subf r6,r6,r8; /* delta */ \
-END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
- ld r5,STK_PARM(r5)(r1); /* timebase before */ \
- subf r5,r5,r7; /* time delta */ \
- \
- /* calculate address of stat structure r4 = opcode */ \
- srdi r4,r4,2; /* index into array */ \
- mulli r4,r4,HCALL_STAT_SIZE; \
- LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \
- add r4,r4,r7; \
- ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \
- add r4,r4,r7; \
- \
- /* update stats */ \
- ld r7,HCALL_STAT_CALLS(r4); /* count */ \
- addi r7,r7,1; \
- std r7,HCALL_STAT_CALLS(r4); \
- ld r7,HCALL_STAT_TB(r4); /* timebase */ \
- add r7,r7,r5; \
- std r7,HCALL_STAT_TB(r4); \
-BEGIN_FTR_SECTION; \
- ld r7,HCALL_STAT_PURR(r4); /* PURR */ \
- add r7,r7,r6; \
- std r7,HCALL_STAT_PURR(r4); \
-END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
+ ld r12,hcall_tracepoint_refcount@toc(r2); \
+ cmpdi r12,0; \
+ beq+ 1f; \
+ mflr r0; \
+ ld r6,STK_PARM(r3)(r1); \
+ std r3,STK_PARM(r3)(r1); \
+ mr r4,r3; \
+ mr r3,r6; \
+ std r0,16(r1); \
+ stdu r1,-STACK_FRAME_OVERHEAD(r1); \
+ bl .__trace_hcall_exit; \
+ addi r1,r1,STACK_FRAME_OVERHEAD; \
+ ld r0,16(r1); \
+ ld r3,STK_PARM(r3)(r1); \
+ mtlr r0; \
1:
+
+#define HCALL_INST_POSTCALL_NORETS \
+ li r5,0; \
+ __HCALL_INST_POSTCALL
+
+#define HCALL_INST_POSTCALL(BUFREG) \
+ mr r5,BUFREG; \
+ __HCALL_INST_POSTCALL
+
#else
-#define HCALL_INST_PRECALL
-#define HCALL_INST_POSTCALL
+#define HCALL_INST_PRECALL(FIRST_ARG)
+#define HCALL_INST_POSTCALL_NORETS
+#define HCALL_INST_POSTCALL(BUFREG)
#endif
.text
@@ -86,11 +112,11 @@ _GLOBAL(plpar_hcall_norets)
mfcr r0
stw r0,8(r1)
- HCALL_INST_PRECALL
+ HCALL_INST_PRECALL(r4)
HVSC /* invoke the hypervisor */
- HCALL_INST_POSTCALL
+ HCALL_INST_POSTCALL_NORETS
lwz r0,8(r1)
mtcrf 0xff,r0
@@ -102,7 +128,7 @@ _GLOBAL(plpar_hcall)
mfcr r0
stw r0,8(r1)
- HCALL_INST_PRECALL
+ HCALL_INST_PRECALL(r5)
std r4,STK_PARM(r4)(r1) /* Save ret buffer */
@@ -121,7 +147,7 @@ _GLOBAL(plpar_hcall)
std r6, 16(r12)
std r7, 24(r12)
- HCALL_INST_POSTCALL
+ HCALL_INST_POSTCALL(r12)
lwz r0,8(r1)
mtcrf 0xff,r0
@@ -168,7 +194,7 @@ _GLOBAL(plpar_hcall9)
mfcr r0
stw r0,8(r1)
- HCALL_INST_PRECALL
+ HCALL_INST_PRECALL(r5)
std r4,STK_PARM(r4)(r1) /* Save ret buffer */
@@ -196,7 +222,7 @@ _GLOBAL(plpar_hcall9)
std r11,56(r12)
std r0, 64(r12)
- HCALL_INST_POSTCALL
+ HCALL_INST_POSTCALL(r12)
lwz r0,8(r1)
mtcrf 0xff,r0
diff --git a/trunk/arch/powerpc/platforms/pseries/hvCall_inst.c b/trunk/arch/powerpc/platforms/pseries/hvCall_inst.c
index 3631a4f277eb..2f58c71b7259 100644
--- a/trunk/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/trunk/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -26,6 +26,7 @@
#include
#include
#include
+#include
DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
@@ -100,6 +101,35 @@ static const struct file_operations hcall_inst_seq_fops = {
#define HCALL_ROOT_DIR "hcall_inst"
#define CPU_NAME_BUF_SIZE 32
+
+static void probe_hcall_entry(unsigned long opcode, unsigned long *args)
+{
+ struct hcall_stats *h;
+
+ if (opcode > MAX_HCALL_OPCODE)
+ return;
+
+ h = &get_cpu_var(hcall_stats)[opcode / 4];
+ h->tb_start = mftb();
+ h->purr_start = mfspr(SPRN_PURR);
+}
+
+static void probe_hcall_exit(unsigned long opcode, unsigned long retval,
+ unsigned long *retbuf)
+{
+ struct hcall_stats *h;
+
+ if (opcode > MAX_HCALL_OPCODE)
+ return;
+
+ h = &__get_cpu_var(hcall_stats)[opcode / 4];
+ h->num_calls++;
+ h->tb_total = mftb() - h->tb_start;
+ h->purr_total = mfspr(SPRN_PURR) - h->purr_start;
+
+ put_cpu_var(hcall_stats);
+}
+
static int __init hcall_inst_init(void)
{
struct dentry *hcall_root;
@@ -110,6 +140,14 @@ static int __init hcall_inst_init(void)
if (!firmware_has_feature(FW_FEATURE_LPAR))
return 0;
+ if (register_trace_hcall_entry(probe_hcall_entry))
+ return -EINVAL;
+
+ if (register_trace_hcall_exit(probe_hcall_exit)) {
+ unregister_trace_hcall_entry(probe_hcall_entry);
+ return -EINVAL;
+ }
+
hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
if (!hcall_root)
return -ENOMEM;
diff --git a/trunk/arch/powerpc/platforms/pseries/lpar.c b/trunk/arch/powerpc/platforms/pseries/lpar.c
index 903eb9eec687..0707653612ba 100644
--- a/trunk/arch/powerpc/platforms/pseries/lpar.c
+++ b/trunk/arch/powerpc/platforms/pseries/lpar.c
@@ -39,6 +39,7 @@
#include
#include
#include
+#include
#include "plpar_wrappers.h"
#include "pseries.h"
@@ -661,3 +662,35 @@ void arch_free_page(struct page *page, int order)
EXPORT_SYMBOL(arch_free_page);
#endif
+
+#ifdef CONFIG_TRACEPOINTS
+/*
+ * We optimise our hcall path by placing hcall_tracepoint_refcount
+ * directly in the TOC so we can check if the hcall tracepoints are
+ * enabled via a single load.
+ */
+
+/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
+extern long hcall_tracepoint_refcount;
+
+void hcall_tracepoint_regfunc(void)
+{
+ hcall_tracepoint_refcount++;
+}
+
+void hcall_tracepoint_unregfunc(void)
+{
+ hcall_tracepoint_refcount--;
+}
+
+void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
+{
+ trace_hcall_entry(opcode, args);
+}
+
+void __trace_hcall_exit(long opcode, unsigned long retval,
+ unsigned long *retbuf)
+{
+ trace_hcall_exit(opcode, retval, retbuf);
+}
+#endif
diff --git a/trunk/arch/s390/Kconfig b/trunk/arch/s390/Kconfig
index 43c0acad7160..16c673096a22 100644
--- a/trunk/arch/s390/Kconfig
+++ b/trunk/arch/s390/Kconfig
@@ -95,6 +95,34 @@ config S390
select HAVE_ARCH_TRACEHOOK
select INIT_ALL_POSSIBLE
select HAVE_PERF_EVENTS
+ select ARCH_INLINE_SPIN_TRYLOCK
+ select ARCH_INLINE_SPIN_TRYLOCK_BH
+ select ARCH_INLINE_SPIN_LOCK
+ select ARCH_INLINE_SPIN_LOCK_BH
+ select ARCH_INLINE_SPIN_LOCK_IRQ
+ select ARCH_INLINE_SPIN_LOCK_IRQSAVE
+ select ARCH_INLINE_SPIN_UNLOCK
+ select ARCH_INLINE_SPIN_UNLOCK_BH
+ select ARCH_INLINE_SPIN_UNLOCK_IRQ
+ select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
+ select ARCH_INLINE_READ_TRYLOCK
+ select ARCH_INLINE_READ_LOCK
+ select ARCH_INLINE_READ_LOCK_BH
+ select ARCH_INLINE_READ_LOCK_IRQ
+ select ARCH_INLINE_READ_LOCK_IRQSAVE
+ select ARCH_INLINE_READ_UNLOCK
+ select ARCH_INLINE_READ_UNLOCK_BH
+ select ARCH_INLINE_READ_UNLOCK_IRQ
+ select ARCH_INLINE_READ_UNLOCK_IRQRESTORE
+ select ARCH_INLINE_WRITE_TRYLOCK
+ select ARCH_INLINE_WRITE_LOCK
+ select ARCH_INLINE_WRITE_LOCK_BH
+ select ARCH_INLINE_WRITE_LOCK_IRQ
+ select ARCH_INLINE_WRITE_LOCK_IRQSAVE
+ select ARCH_INLINE_WRITE_UNLOCK
+ select ARCH_INLINE_WRITE_UNLOCK_BH
+ select ARCH_INLINE_WRITE_UNLOCK_IRQ
+ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
config SCHED_OMIT_FRAME_POINTER
bool
diff --git a/trunk/arch/s390/appldata/appldata_base.c b/trunk/arch/s390/appldata/appldata_base.c
index b55fd7ed1c31..495589950dc7 100644
--- a/trunk/arch/s390/appldata/appldata_base.c
+++ b/trunk/arch/s390/appldata/appldata_base.c
@@ -61,12 +61,12 @@ static struct ctl_table appldata_table[] = {
{
.procname = "timer",
.mode = S_IRUGO | S_IWUSR,
- .proc_handler = &appldata_timer_handler,
+ .proc_handler = appldata_timer_handler,
},
{
.procname = "interval",
.mode = S_IRUGO | S_IWUSR,
- .proc_handler = &appldata_interval_handler,
+ .proc_handler = appldata_interval_handler,
},
{ },
};
diff --git a/trunk/arch/s390/include/asm/bug.h b/trunk/arch/s390/include/asm/bug.h
index 7efd0abe8887..efb74fd5156e 100644
--- a/trunk/arch/s390/include/asm/bug.h
+++ b/trunk/arch/s390/include/asm/bug.h
@@ -49,7 +49,7 @@
#define BUG() do { \
__EMIT_BUG(0); \
- for (;;); \
+ unreachable(); \
} while (0)
#define WARN_ON(x) ({ \
diff --git a/trunk/arch/s390/include/asm/spinlock.h b/trunk/arch/s390/include/asm/spinlock.h
index 41ce6861174e..c9af0d19c7ab 100644
--- a/trunk/arch/s390/include/asm/spinlock.h
+++ b/trunk/arch/s390/include/asm/spinlock.h
@@ -191,33 +191,4 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw)
#define _raw_read_relax(lock) cpu_relax()
#define _raw_write_relax(lock) cpu_relax()
-#define __always_inline__spin_lock
-#define __always_inline__read_lock
-#define __always_inline__write_lock
-#define __always_inline__spin_lock_bh
-#define __always_inline__read_lock_bh
-#define __always_inline__write_lock_bh
-#define __always_inline__spin_lock_irq
-#define __always_inline__read_lock_irq
-#define __always_inline__write_lock_irq
-#define __always_inline__spin_lock_irqsave
-#define __always_inline__read_lock_irqsave
-#define __always_inline__write_lock_irqsave
-#define __always_inline__spin_trylock
-#define __always_inline__read_trylock
-#define __always_inline__write_trylock
-#define __always_inline__spin_trylock_bh
-#define __always_inline__spin_unlock
-#define __always_inline__read_unlock
-#define __always_inline__write_unlock
-#define __always_inline__spin_unlock_bh
-#define __always_inline__read_unlock_bh
-#define __always_inline__write_unlock_bh
-#define __always_inline__spin_unlock_irq
-#define __always_inline__read_unlock_irq
-#define __always_inline__write_unlock_irq
-#define __always_inline__spin_unlock_irqrestore
-#define __always_inline__read_unlock_irqrestore
-#define __always_inline__write_unlock_irqrestore
-
#endif /* __ASM_SPINLOCK_H */
diff --git a/trunk/arch/s390/kernel/compat_linux.c b/trunk/arch/s390/kernel/compat_linux.c
index 0debcec23a39..fda1a8123f9b 100644
--- a/trunk/arch/s390/kernel/compat_linux.c
+++ b/trunk/arch/s390/kernel/compat_linux.c
@@ -527,59 +527,6 @@ asmlinkage long sys32_sendfile64(int out_fd, int in_fd,
return ret;
}
-#ifdef CONFIG_SYSCTL_SYSCALL
-struct __sysctl_args32 {
- u32 name;
- int nlen;
- u32 oldval;
- u32 oldlenp;
- u32 newval;
- u32 newlen;
- u32 __unused[4];
-};
-
-asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args)
-{
- struct __sysctl_args32 tmp;
- int error;
- size_t oldlen;
- size_t __user *oldlenp = NULL;
- unsigned long addr = (((unsigned long)&args->__unused[0]) + 7) & ~7;
-
- if (copy_from_user(&tmp, args, sizeof(tmp)))
- return -EFAULT;
-
- if (tmp.oldval && tmp.oldlenp) {
- /* Duh, this is ugly and might not work if sysctl_args
- is in read-only memory, but do_sysctl does indirectly
- a lot of uaccess in both directions and we'd have to
- basically copy the whole sysctl.c here, and
- glibc's __sysctl uses rw memory for the structure
- anyway. */
- if (get_user(oldlen, (u32 __user *)compat_ptr(tmp.oldlenp)) ||
- put_user(oldlen, (size_t __user *)addr))
- return -EFAULT;
- oldlenp = (size_t __user *)addr;
- }
-
- lock_kernel();
- error = do_sysctl(compat_ptr(tmp.name), tmp.nlen, compat_ptr(tmp.oldval),
- oldlenp, compat_ptr(tmp.newval), tmp.newlen);
- unlock_kernel();
- if (oldlenp) {
- if (!error) {
- if (get_user(oldlen, (size_t __user *)addr) ||
- put_user(oldlen, (u32 __user *)compat_ptr(tmp.oldlenp)))
- error = -EFAULT;
- }
- if (copy_to_user(args->__unused, tmp.__unused,
- sizeof(tmp.__unused)))
- error = -EFAULT;
- }
- return error;
-}
-#endif
-
struct stat64_emu31 {
unsigned long long st_dev;
unsigned int __pad1;
diff --git a/trunk/arch/s390/kernel/compat_linux.h b/trunk/arch/s390/kernel/compat_linux.h
index c07f9ca05ade..45e9092b3aad 100644
--- a/trunk/arch/s390/kernel/compat_linux.h
+++ b/trunk/arch/s390/kernel/compat_linux.h
@@ -162,7 +162,6 @@ struct ucontext32 {
compat_sigset_t uc_sigmask; /* mask last for extensibility */
};
-struct __sysctl_args32;
struct stat64_emu31;
struct mmap_arg_struct_emu31;
struct fadvise64_64_args;
@@ -212,7 +211,6 @@ long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset,
size_t count);
long sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset,
s32 count);
-long sys32_sysctl(struct __sysctl_args32 __user *args);
long sys32_stat64(char __user * filename, struct stat64_emu31 __user * statbuf);
long sys32_lstat64(char __user * filename,
struct stat64_emu31 __user * statbuf);
diff --git a/trunk/arch/s390/kernel/compat_wrapper.S b/trunk/arch/s390/kernel/compat_wrapper.S
index cbd9901dc0f8..30de2d0e52bb 100644
--- a/trunk/arch/s390/kernel/compat_wrapper.S
+++ b/trunk/arch/s390/kernel/compat_wrapper.S
@@ -689,8 +689,6 @@ sys32_fdatasync_wrapper:
llgfr %r2,%r2 # unsigned int
jg sys_fdatasync # branch to system call
-#sys32_sysctl_wrapper # tbd
-
.globl sys32_mlock_wrapper
sys32_mlock_wrapper:
llgfr %r2,%r2 # unsigned long
@@ -1087,8 +1085,8 @@ sys32_stime_wrapper:
.globl sys32_sysctl_wrapper
sys32_sysctl_wrapper:
- llgtr %r2,%r2 # struct __sysctl_args32 *
- jg sys32_sysctl
+ llgtr %r2,%r2 # struct compat_sysctl_args *
+ jg compat_sys_sysctl
.globl sys32_fstat64_wrapper
sys32_fstat64_wrapper:
diff --git a/trunk/arch/s390/kernel/debug.c b/trunk/arch/s390/kernel/debug.c
index 20f282c911c2..071c81f179ef 100644
--- a/trunk/arch/s390/kernel/debug.c
+++ b/trunk/arch/s390/kernel/debug.c
@@ -893,35 +893,30 @@ s390dbf_procactive(ctl_table *table, int write,
static struct ctl_table s390dbf_table[] = {
{
- .ctl_name = CTL_S390DBF_STOPPABLE,
.procname = "debug_stoppable",
.data = &debug_stoppable,
.maxlen = sizeof(int),
.mode = S_IRUGO | S_IWUSR,
- .proc_handler = &proc_dointvec,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_S390DBF_ACTIVE,
.procname = "debug_active",
.data = &debug_active,
.maxlen = sizeof(int),
.mode = S_IRUGO | S_IWUSR,
- .proc_handler = &s390dbf_procactive,
- .strategy = &sysctl_intvec,
+ .proc_handler = s390dbf_procactive,
},
- { .ctl_name = 0 }
+ { }
};
static struct ctl_table s390dbf_dir_table[] = {
{
- .ctl_name = CTL_S390DBF,
.procname = "s390dbf",
.maxlen = 0,
.mode = S_IRUGO | S_IXUGO,
.child = s390dbf_table,
},
- { .ctl_name = 0 }
+ { }
};
static struct ctl_table_header *s390dbf_sysctl_header;
diff --git a/trunk/arch/s390/kernel/ftrace.c b/trunk/arch/s390/kernel/ftrace.c
index f5fe34dd821b..5a82bc68193e 100644
--- a/trunk/arch/s390/kernel/ftrace.c
+++ b/trunk/arch/s390/kernel/ftrace.c
@@ -203,73 +203,10 @@ unsigned long prepare_ftrace_return(unsigned long ip, unsigned long parent)
#ifdef CONFIG_FTRACE_SYSCALLS
-extern unsigned long __start_syscalls_metadata[];
-extern unsigned long __stop_syscalls_metadata[];
extern unsigned int sys_call_table[];
-static struct syscall_metadata **syscalls_metadata;
-
-struct syscall_metadata *syscall_nr_to_meta(int nr)
-{
- if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
- return NULL;
-
- return syscalls_metadata[nr];
-}
-
-int syscall_name_to_nr(char *name)
-{
- int i;
-
- if (!syscalls_metadata)
- return -1;
- for (i = 0; i < NR_syscalls; i++)
- if (syscalls_metadata[i])
- if (!strcmp(syscalls_metadata[i]->name, name))
- return i;
- return -1;
-}
-
-void set_syscall_enter_id(int num, int id)
-{
- syscalls_metadata[num]->enter_id = id;
-}
-
-void set_syscall_exit_id(int num, int id)
+unsigned long __init arch_syscall_addr(int nr)
{
- syscalls_metadata[num]->exit_id = id;
-}
-
-static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
-{
- struct syscall_metadata *start;
- struct syscall_metadata *stop;
- char str[KSYM_SYMBOL_LEN];
-
- start = (struct syscall_metadata *)__start_syscalls_metadata;
- stop = (struct syscall_metadata *)__stop_syscalls_metadata;
- kallsyms_lookup(syscall, NULL, NULL, NULL, str);
-
- for ( ; start < stop; start++) {
- if (start->name && !strcmp(start->name + 3, str + 3))
- return start;
- }
- return NULL;
-}
-
-static int __init arch_init_ftrace_syscalls(void)
-{
- struct syscall_metadata *meta;
- int i;
- syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls,
- GFP_KERNEL);
- if (!syscalls_metadata)
- return -ENOMEM;
- for (i = 0; i < NR_syscalls; i++) {
- meta = find_syscall_meta((unsigned long)sys_call_table[i]);
- syscalls_metadata[i] = meta;
- }
- return 0;
+ return (unsigned long)sys_call_table[nr];
}
-arch_initcall(arch_init_ftrace_syscalls);
#endif
diff --git a/trunk/arch/s390/mm/cmm.c b/trunk/arch/s390/mm/cmm.c
index b201135cc18c..ff58779bf7e9 100644
--- a/trunk/arch/s390/mm/cmm.c
+++ b/trunk/arch/s390/mm/cmm.c
@@ -343,30 +343,29 @@ static struct ctl_table cmm_table[] = {
{
.procname = "cmm_pages",
.mode = 0644,
- .proc_handler = &cmm_pages_handler,
+ .proc_handler = cmm_pages_handler,
},
{
.procname = "cmm_timed_pages",
.mode = 0644,
- .proc_handler = &cmm_pages_handler,
+ .proc_handler = cmm_pages_handler,
},
{
.procname = "cmm_timeout",
.mode = 0644,
- .proc_handler = &cmm_timeout_handler,
+ .proc_handler = cmm_timeout_handler,
},
- { .ctl_name = 0 }
+ { }
};
static struct ctl_table cmm_dir_table[] = {
{
- .ctl_name = CTL_VM,
.procname = "vm",
.maxlen = 0,
.mode = 0555,
.child = cmm_table,
},
- { .ctl_name = 0 }
+ { }
};
#endif
diff --git a/trunk/arch/sh/kernel/traps_64.c b/trunk/arch/sh/kernel/traps_64.c
index 267e5ebbb475..75c0cbe2eda0 100644
--- a/trunk/arch/sh/kernel/traps_64.c
+++ b/trunk/arch/sh/kernel/traps_64.c
@@ -877,44 +877,39 @@ static int misaligned_fixup(struct pt_regs *regs)
static ctl_table unaligned_table[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "kernel_reports",
.data = &kernel_mode_unaligned_fixup_count,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "user_reports",
.data = &user_mode_unaligned_fixup_count,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = proc_dointvec
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "user_enable",
.data = &user_mode_unaligned_fixup_enable,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec},
+ .proc_handler = proc_dointvec},
{}
};
static ctl_table unaligned_root[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "unaligned_fixup",
.mode = 0555,
- unaligned_table
+ .child = unaligned_table
},
{}
};
static ctl_table sh64_root[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "sh64",
.mode = 0555,
.child = unaligned_root
diff --git a/trunk/arch/sparc/include/asm/prom.h b/trunk/arch/sparc/include/asm/prom.h
index 82a190d7efc1..f845828ca4c6 100644
--- a/trunk/arch/sparc/include/asm/prom.h
+++ b/trunk/arch/sparc/include/asm/prom.h
@@ -1,3 +1,4 @@
+#include /* linux/of.h gets to determine #include ordering */
#ifndef _SPARC_PROM_H
#define _SPARC_PROM_H
#ifdef __KERNEL__
@@ -28,50 +29,11 @@
#define of_prop_cmp(s1, s2) strcasecmp((s1), (s2))
#define of_node_cmp(s1, s2) strcmp((s1), (s2))
-typedef u32 phandle;
-typedef u32 ihandle;
-
-struct property {
- char *name;
- int length;
- void *value;
- struct property *next;
- unsigned long _flags;
- unsigned int unique_id;
-};
-
-struct of_irq_controller;
-struct device_node {
- const char *name;
- const char *type;
- phandle node;
- char *path_component_name;
- char *full_name;
-
- struct property *properties;
- struct property *deadprops; /* removed properties */
- struct device_node *parent;
- struct device_node *child;
- struct device_node *sibling;
- struct device_node *next; /* next device of same type */
- struct device_node *allnext; /* next in list of all nodes */
- struct proc_dir_entry *pde; /* this node's proc directory */
- struct kref kref;
- unsigned long _flags;
- void *data;
- unsigned int unique_id;
-
- struct of_irq_controller *irq_trans;
-};
-
struct of_irq_controller {
unsigned int (*irq_build)(struct device_node *, unsigned int, void *);
void *data;
};
-#define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
-#define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
-
extern struct device_node *of_find_node_by_cpuid(int cpuid);
extern int of_set_property(struct device_node *node, const char *name, void *val, int len);
extern struct mutex of_set_property_mutex;
@@ -89,15 +51,6 @@ extern void prom_build_devicetree(void);
extern void of_populate_present_mask(void);
extern void of_fill_in_cpu_data(void);
-/* Dummy ref counting routines - to be implemented later */
-static inline struct device_node *of_node_get(struct device_node *node)
-{
- return node;
-}
-static inline void of_node_put(struct device_node *node)
-{
-}
-
/* These routines are here to provide compatibility with how powerpc
* handles IRQ mapping for OF device nodes. We precompute and permanently
* register them in the of_device objects, whereas powerpc computes them
@@ -108,12 +61,6 @@ static inline void irq_dispose_mapping(unsigned int virq)
{
}
-/*
- * NB: This is here while we transition from using asm/prom.h
- * to linux/of.h
- */
-#include
-
extern struct device_node *of_console_device;
extern char *of_console_path;
extern char *of_console_options;
diff --git a/trunk/arch/sparc/kernel/sys_sparc32.c b/trunk/arch/sparc/kernel/sys_sparc32.c
index 04e28b2671c8..f862372074bc 100644
--- a/trunk/arch/sparc/kernel/sys_sparc32.c
+++ b/trunk/arch/sparc/kernel/sys_sparc32.c
@@ -591,63 +591,6 @@ asmlinkage unsigned long sys32_mremap(unsigned long addr,
return ret;
}
-struct __sysctl_args32 {
- u32 name;
- int nlen;
- u32 oldval;
- u32 oldlenp;
- u32 newval;
- u32 newlen;
- u32 __unused[4];
-};
-
-asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args)
-{
-#ifndef CONFIG_SYSCTL_SYSCALL
- return -ENOSYS;
-#else
- struct __sysctl_args32 tmp;
- int error;
- size_t oldlen, __user *oldlenp = NULL;
- unsigned long addr = (((unsigned long)&args->__unused[0]) + 7UL) & ~7UL;
-
- if (copy_from_user(&tmp, args, sizeof(tmp)))
- return -EFAULT;
-
- if (tmp.oldval && tmp.oldlenp) {
- /* Duh, this is ugly and might not work if sysctl_args
- is in read-only memory, but do_sysctl does indirectly
- a lot of uaccess in both directions and we'd have to
- basically copy the whole sysctl.c here, and
- glibc's __sysctl uses rw memory for the structure
- anyway. */
- if (get_user(oldlen, (u32 __user *)(unsigned long)tmp.oldlenp) ||
- put_user(oldlen, (size_t __user *)addr))
- return -EFAULT;
- oldlenp = (size_t __user *)addr;
- }
-
- lock_kernel();
- error = do_sysctl((int __user *)(unsigned long) tmp.name,
- tmp.nlen,
- (void __user *)(unsigned long) tmp.oldval,
- oldlenp,
- (void __user *)(unsigned long) tmp.newval,
- tmp.newlen);
- unlock_kernel();
- if (oldlenp) {
- if (!error) {
- if (get_user(oldlen, (size_t __user *)addr) ||
- put_user(oldlen, (u32 __user *)(unsigned long) tmp.oldlenp))
- error = -EFAULT;
- }
- if (copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused)))
- error = -EFAULT;
- }
- return error;
-#endif
-}
-
long sys32_lookup_dcookie(unsigned long cookie_high,
unsigned long cookie_low,
char __user *buf, size_t len)
diff --git a/trunk/arch/sparc/kernel/systbls_64.S b/trunk/arch/sparc/kernel/systbls_64.S
index 009825f6e73c..034b10e0d4b0 100644
--- a/trunk/arch/sparc/kernel/systbls_64.S
+++ b/trunk/arch/sparc/kernel/systbls_64.S
@@ -68,7 +68,7 @@ sys_call_table32:
.word compat_sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys32_mlockall
/*240*/ .word sys_munlockall, sys32_sched_setparam, sys32_sched_getparam, sys32_sched_setscheduler, sys32_sched_getscheduler
.word sys_sched_yield, sys32_sched_get_priority_max, sys32_sched_get_priority_min, sys32_sched_rr_get_interval, compat_sys_nanosleep
-/*250*/ .word sys32_mremap, sys32_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl
+/*250*/ .word sys32_mremap, compat_sys_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl
.word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
/*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
.word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
diff --git a/trunk/arch/sparc/mm/init_64.h b/trunk/arch/sparc/mm/init_64.h
index c2f772dbd556..77d1b313e344 100644
--- a/trunk/arch/sparc/mm/init_64.h
+++ b/trunk/arch/sparc/mm/init_64.h
@@ -45,7 +45,7 @@ extern void free_initmem(void);
#define VMEMMAP_ALIGN(x) (((x)+VMEMMAP_CHUNK-1UL)&VMEMMAP_CHUNK_MASK)
#define VMEMMAP_SIZE ((((1UL << MAX_PHYSADDR_BITS) >> PAGE_SHIFT) * \
- sizeof(struct page *)) >> VMEMMAP_CHUNK_SHIFT)
+ sizeof(struct page)) >> VMEMMAP_CHUNK_SHIFT)
extern unsigned long vmemmap_table[VMEMMAP_SIZE];
#endif
diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig
index 72ace9515a07..178084b4377c 100644
--- a/trunk/arch/x86/Kconfig
+++ b/trunk/arch/x86/Kconfig
@@ -49,6 +49,7 @@ config X86
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
+ select HAVE_HW_BREAKPOINT
select HAVE_ARCH_KMEMCHECK
config OUTPUT_FORMAT
diff --git a/trunk/arch/x86/Kconfig.cpu b/trunk/arch/x86/Kconfig.cpu
index 2649840d888f..5e99762eb5c2 100644
--- a/trunk/arch/x86/Kconfig.cpu
+++ b/trunk/arch/x86/Kconfig.cpu
@@ -406,7 +406,7 @@ config X86_CMPXCHG64
# generates cmov.
config X86_CMOV
def_bool y
- depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM)
+ depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
config X86_MINIMUM_CPU_FAMILY
int
diff --git a/trunk/arch/x86/Kconfig.debug b/trunk/arch/x86/Kconfig.debug
index d105f29bb6bb..731318e5ac1d 100644
--- a/trunk/arch/x86/Kconfig.debug
+++ b/trunk/arch/x86/Kconfig.debug
@@ -186,6 +186,15 @@ config X86_DS_SELFTEST
config HAVE_MMIOTRACE_SUPPORT
def_bool y
+config X86_DECODER_SELFTEST
+ bool "x86 instruction decoder selftest"
+ depends on DEBUG_KERNEL
+ ---help---
+ Perform x86 instruction decoder selftests at build time.
+ This option is useful for checking the sanity of x86 instruction
+ decoder code.
+ If unsure, say "N".
+
#
# IO delay types:
#
@@ -287,4 +296,18 @@ config OPTIMIZE_INLINING
If unsure, say N.
+config DEBUG_STRICT_USER_COPY_CHECKS
+ bool "Strict copy size checks"
+ depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING
+ ---help---
+ Enabling this option turns a certain set of sanity checks for user
+ copy operations into compile time failures.
+
+ The copy_from_user() etc checks are there to help test if there
+ are sufficient security checks on the length argument of
+ the copy operation, by having gcc prove that the argument is
+ within bounds.
+
+ If unsure, or if you run an older (pre 4.4) gcc, say N.
+
endmenu
diff --git a/trunk/arch/x86/Makefile b/trunk/arch/x86/Makefile
index d2d24c9ee64d..78b32be55e9e 100644
--- a/trunk/arch/x86/Makefile
+++ b/trunk/arch/x86/Makefile
@@ -155,6 +155,9 @@ all: bzImage
KBUILD_IMAGE := $(boot)/bzImage
bzImage: vmlinux
+ifeq ($(CONFIG_X86_DECODER_SELFTEST),y)
+ $(Q)$(MAKE) $(build)=arch/x86/tools posttest
+endif
$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
$(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
diff --git a/trunk/arch/x86/Makefile_32.cpu b/trunk/arch/x86/Makefile_32.cpu
index 30e9a264f69d..cbf0776dbec1 100644
--- a/trunk/arch/x86/Makefile_32.cpu
+++ b/trunk/arch/x86/Makefile_32.cpu
@@ -41,7 +41,7 @@ cflags-$(CONFIG_X86_ELAN) += -march=i486
# Geode GX1 support
cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx
-
+cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx)
# add at the end to overwrite eventual tuning options from earlier
# cpu entries
cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))
diff --git a/trunk/arch/x86/ia32/ia32entry.S b/trunk/arch/x86/ia32/ia32entry.S
index 581b0568fe19..5d2584839be4 100644
--- a/trunk/arch/x86/ia32/ia32entry.S
+++ b/trunk/arch/x86/ia32/ia32entry.S
@@ -653,7 +653,7 @@ ia32_sys_call_table:
.quad compat_sys_writev
.quad sys_getsid
.quad sys_fdatasync
- .quad sys32_sysctl /* sysctl */
+ .quad compat_sys_sysctl /* sysctl */
.quad sys_mlock /* 150 */
.quad sys_munlock
.quad sys_mlockall
diff --git a/trunk/arch/x86/ia32/sys_ia32.c b/trunk/arch/x86/ia32/sys_ia32.c
index 9f5527198825..df82c0e48ded 100644
--- a/trunk/arch/x86/ia32/sys_ia32.c
+++ b/trunk/arch/x86/ia32/sys_ia32.c
@@ -434,62 +434,6 @@ asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig,
return ret;
}
-#ifdef CONFIG_SYSCTL_SYSCALL
-struct sysctl_ia32 {
- unsigned int name;
- int nlen;
- unsigned int oldval;
- unsigned int oldlenp;
- unsigned int newval;
- unsigned int newlen;
- unsigned int __unused[4];
-};
-
-
-asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *args32)
-{
- struct sysctl_ia32 a32;
- mm_segment_t old_fs = get_fs();
- void __user *oldvalp, *newvalp;
- size_t oldlen;
- int __user *namep;
- long ret;
-
- if (copy_from_user(&a32, args32, sizeof(a32)))
- return -EFAULT;
-
- /*
- * We need to pre-validate these because we have to disable
- * address checking before calling do_sysctl() because of
- * OLDLEN but we can't run the risk of the user specifying bad
- * addresses here. Well, since we're dealing with 32 bit
- * addresses, we KNOW that access_ok() will always succeed, so
- * this is an expensive NOP, but so what...
- */
- namep = compat_ptr(a32.name);
- oldvalp = compat_ptr(a32.oldval);
- newvalp = compat_ptr(a32.newval);
-
- if ((oldvalp && get_user(oldlen, (int __user *)compat_ptr(a32.oldlenp)))
- || !access_ok(VERIFY_WRITE, namep, 0)
- || !access_ok(VERIFY_WRITE, oldvalp, 0)
- || !access_ok(VERIFY_WRITE, newvalp, 0))
- return -EFAULT;
-
- set_fs(KERNEL_DS);
- lock_kernel();
- ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *)&oldlen,
- newvalp, (size_t) a32.newlen);
- unlock_kernel();
- set_fs(old_fs);
-
- if (oldvalp && put_user(oldlen, (int __user *)compat_ptr(a32.oldlenp)))
- return -EFAULT;
-
- return ret;
-}
-#endif
-
/* warning: next two assume little endian */
asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
u32 poslo, u32 poshi)
diff --git a/trunk/arch/x86/include/asm/Kbuild b/trunk/arch/x86/include/asm/Kbuild
index 4a8e80cdcfa5..9f828f87ca35 100644
--- a/trunk/arch/x86/include/asm/Kbuild
+++ b/trunk/arch/x86/include/asm/Kbuild
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h
header-y += sigcontext32.h
header-y += ucontext.h
header-y += processor-flags.h
+header-y += hw_breakpoint.h
unifdef-y += e820.h
unifdef-y += ist.h
diff --git a/trunk/arch/x86/include/asm/a.out-core.h b/trunk/arch/x86/include/asm/a.out-core.h
index bb70e397aa84..7a15588e45d4 100644
--- a/trunk/arch/x86/include/asm/a.out-core.h
+++ b/trunk/arch/x86/include/asm/a.out-core.h
@@ -17,6 +17,7 @@
#include
#include
+#include
/*
* fill in the user structure for an a.out core dump
@@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
>> PAGE_SHIFT;
dump->u_dsize -= dump->u_tsize;
dump->u_ssize = 0;
- dump->u_debugreg[0] = current->thread.debugreg0;
- dump->u_debugreg[1] = current->thread.debugreg1;
- dump->u_debugreg[2] = current->thread.debugreg2;
- dump->u_debugreg[3] = current->thread.debugreg3;
- dump->u_debugreg[4] = 0;
- dump->u_debugreg[5] = 0;
- dump->u_debugreg[6] = current->thread.debugreg6;
- dump->u_debugreg[7] = current->thread.debugreg7;
+ aout_dump_debugregs(dump);
if (dump->start_stack < TASK_SIZE)
dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
diff --git a/trunk/arch/x86/include/asm/alternative-asm.h b/trunk/arch/x86/include/asm/alternative-asm.h
index e2077d343c33..b97f786a48d5 100644
--- a/trunk/arch/x86/include/asm/alternative-asm.h
+++ b/trunk/arch/x86/include/asm/alternative-asm.h
@@ -1,17 +1,13 @@
#ifdef __ASSEMBLY__
-#ifdef CONFIG_X86_32
-# define X86_ALIGN .long
-#else
-# define X86_ALIGN .quad
-#endif
+#include
#ifdef CONFIG_SMP
.macro LOCK_PREFIX
1: lock
.section .smp_locks,"a"
- .align 4
- X86_ALIGN 1b
+ _ASM_ALIGN
+ _ASM_PTR 1b
.previous
.endm
#else
diff --git a/trunk/arch/x86/include/asm/alternative.h b/trunk/arch/x86/include/asm/alternative.h
index c240efc74e00..69b74a7b877f 100644
--- a/trunk/arch/x86/include/asm/alternative.h
+++ b/trunk/arch/x86/include/asm/alternative.h
@@ -84,6 +84,7 @@ static inline void alternatives_smp_switch(int smp) {}
" .byte " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
+ " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \
".previous\n" \
".section .altinstr_replacement, \"ax\"\n" \
"663:\n\t" newinstr "\n664:\n" /* replacement */ \
diff --git a/trunk/arch/x86/include/asm/amd_iommu.h b/trunk/arch/x86/include/asm/amd_iommu.h
index 4b180897e6b5..5af2982133b5 100644
--- a/trunk/arch/x86/include/asm/amd_iommu.h
+++ b/trunk/arch/x86/include/asm/amd_iommu.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
* Author: Joerg Roedel
* Leo Duran
*
@@ -23,19 +23,13 @@
#include
#ifdef CONFIG_AMD_IOMMU
-extern int amd_iommu_init(void);
-extern int amd_iommu_init_dma_ops(void);
-extern int amd_iommu_init_passthrough(void);
+
extern void amd_iommu_detect(void);
-extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
-extern void amd_iommu_flush_all_domains(void);
-extern void amd_iommu_flush_all_devices(void);
-extern void amd_iommu_shutdown(void);
-extern void amd_iommu_apply_erratum_63(u16 devid);
+
#else
-static inline int amd_iommu_init(void) { return -ENODEV; }
+
static inline void amd_iommu_detect(void) { }
-static inline void amd_iommu_shutdown(void) { }
+
#endif
#endif /* _ASM_X86_AMD_IOMMU_H */
diff --git a/trunk/arch/x86/include/asm/amd_iommu_proto.h b/trunk/arch/x86/include/asm/amd_iommu_proto.h
new file mode 100644
index 000000000000..84786fb9a23b
--- /dev/null
+++ b/trunk/arch/x86/include/asm/amd_iommu_proto.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2009 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_X86_AMD_IOMMU_PROTO_H
+#define _ASM_X86_AMD_IOMMU_PROTO_H
+
+struct amd_iommu;
+
+extern int amd_iommu_init_dma_ops(void);
+extern int amd_iommu_init_passthrough(void);
+extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
+extern void amd_iommu_flush_all_domains(void);
+extern void amd_iommu_flush_all_devices(void);
+extern void amd_iommu_apply_erratum_63(u16 devid);
+extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
+
+#ifndef CONFIG_AMD_IOMMU_STATS
+
+static inline void amd_iommu_stats_init(void) { }
+
+#endif /* !CONFIG_AMD_IOMMU_STATS */
+
+#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */
diff --git a/trunk/arch/x86/include/asm/amd_iommu_types.h b/trunk/arch/x86/include/asm/amd_iommu_types.h
index 2a2cc7a78a81..ba19ad4c47d0 100644
--- a/trunk/arch/x86/include/asm/amd_iommu_types.h
+++ b/trunk/arch/x86/include/asm/amd_iommu_types.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
* Author: Joerg Roedel
* Leo Duran
*
@@ -24,6 +24,11 @@
#include
#include
+/*
+ * Maximum number of IOMMUs supported
+ */
+#define MAX_IOMMUS 32
+
/*
* some size calculation constants
*/
@@ -206,6 +211,9 @@ extern bool amd_iommu_dump;
printk(KERN_INFO "AMD-Vi: " format, ## arg); \
} while(0);
+/* global flag if IOMMUs cache non-present entries */
+extern bool amd_iommu_np_cache;
+
/*
* Make iterating over all IOMMUs easier
*/
@@ -226,6 +234,8 @@ extern bool amd_iommu_dump;
* independent of their use.
*/
struct protection_domain {
+ struct list_head list; /* for list of all protection domains */
+ struct list_head dev_list; /* List of all devices in this domain */
spinlock_t lock; /* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
int mode; /* paging mode (0-6 levels) */
@@ -233,7 +243,20 @@ struct protection_domain {
unsigned long flags; /* flags to find out type of domain */
bool updated; /* complete domain flush required */
unsigned dev_cnt; /* devices assigned to this domain */
+ unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
void *priv; /* private data */
+
+};
+
+/*
+ * This struct contains device specific data for the IOMMU
+ */
+struct iommu_dev_data {
+ struct list_head list; /* For domain->dev_list */
+ struct device *dev; /* Device this data belong to */
+ struct device *alias; /* The Alias Device */
+ struct protection_domain *domain; /* Domain the device is bound to */
+ atomic_t bind; /* Domain attach reverent count */
};
/*
@@ -291,6 +314,9 @@ struct dma_ops_domain {
struct amd_iommu {
struct list_head list;
+ /* Index within the IOMMU array */
+ int index;
+
/* locks the accesses to the hardware */
spinlock_t lock;
@@ -356,6 +382,21 @@ struct amd_iommu {
*/
extern struct list_head amd_iommu_list;
+/*
+ * Array with pointers to each IOMMU struct
+ * The indices are referenced in the protection domains
+ */
+extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
+
+/* Number of IOMMUs present in the system */
+extern int amd_iommus_present;
+
+/*
+ * Declarations for the global list of all protection domains
+ */
+extern spinlock_t amd_iommu_pd_lock;
+extern struct list_head amd_iommu_pd_list;
+
/*
* Structure defining one entry in the device table
*/
@@ -416,15 +457,9 @@ extern unsigned amd_iommu_aperture_order;
/* largest PCI device id we expect translation requests for */
extern u16 amd_iommu_last_bdf;
-/* data structures for protection domain handling */
-extern struct protection_domain **amd_iommu_pd_table;
-
/* allocation bitmap for domain ids */
extern unsigned long *amd_iommu_pd_alloc_bitmap;
-/* will be 1 if device isolation is enabled */
-extern bool amd_iommu_isolate;
-
/*
* If true, the addresses will be flushed on unmap time, not when
* they are reused
@@ -462,11 +497,6 @@ struct __iommu_counter {
#define ADD_STATS_COUNTER(name, x)
#define SUB_STATS_COUNTER(name, x)
-static inline void amd_iommu_stats_init(void) { }
-
#endif /* CONFIG_AMD_IOMMU_STATS */
-/* some function prototypes */
-extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
-
#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/trunk/arch/x86/include/asm/apic.h b/trunk/arch/x86/include/asm/apic.h
index 474d80d3e6cc..b4ac2cdcb64f 100644
--- a/trunk/arch/x86/include/asm/apic.h
+++ b/trunk/arch/x86/include/asm/apic.h
@@ -297,20 +297,20 @@ struct apic {
int disable_esr;
int dest_logical;
- unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid);
+ unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid);
unsigned long (*check_apicid_present)(int apicid);
void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
void (*init_apic_ldr)(void);
- physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map);
+ void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
void (*setup_apic_routing)(void);
int (*multi_timer_check)(int apic, int irq);
int (*apicid_to_node)(int logical_apicid);
int (*cpu_to_logical_apicid)(int cpu);
int (*cpu_present_to_apicid)(int mps_cpu);
- physid_mask_t (*apicid_to_cpu_present)(int phys_apicid);
+ void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
void (*setup_portio_remap)(void);
int (*check_phys_apicid_present)(int phys_apicid);
void (*enable_apic_mode)(void);
@@ -488,6 +488,8 @@ static inline unsigned int read_apic_id(void)
extern void default_setup_apic_routing(void);
+extern struct apic apic_noop;
+
#ifdef CONFIG_X86_32
extern struct apic apic_default;
@@ -532,9 +534,9 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
return (unsigned int)(mask1 & mask2 & mask3);
}
-static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid)
+static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid)
{
- return physid_isset(apicid, bitmap);
+ return physid_isset(apicid, *map);
}
static inline unsigned long default_check_apicid_present(int bit)
@@ -542,9 +544,9 @@ static inline unsigned long default_check_apicid_present(int bit)
return physid_isset(bit, phys_cpu_present_map);
}
-static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map)
+static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
- return phys_map;
+ *retmap = *phys_map;
}
/* Mapping from cpu number to logical apicid */
@@ -583,11 +585,6 @@ extern int default_cpu_present_to_apicid(int mps_cpu);
extern int default_check_phys_apicid_present(int phys_apicid);
#endif
-static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid)
-{
- return physid_mask_of_physid(phys_apicid);
-}
-
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_32
diff --git a/trunk/arch/x86/include/asm/apicdef.h b/trunk/arch/x86/include/asm/apicdef.h
index 3b62da926de9..7fe3b3060f08 100644
--- a/trunk/arch/x86/include/asm/apicdef.h
+++ b/trunk/arch/x86/include/asm/apicdef.h
@@ -11,6 +11,12 @@
#define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000
#define APIC_DEFAULT_PHYS_BASE 0xfee00000
+/*
+ * This is the IO-APIC register space as specified
+ * by Intel docs:
+ */
+#define IO_APIC_SLOT_SIZE 1024
+
#define APIC_ID 0x20
#define APIC_LVR 0x30
diff --git a/trunk/arch/x86/include/asm/apicnum.h b/trunk/arch/x86/include/asm/apicnum.h
deleted file mode 100644
index 82f613c607ce..000000000000
--- a/trunk/arch/x86/include/asm/apicnum.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_X86_APICNUM_H
-#define _ASM_X86_APICNUM_H
-
-/* define MAX_IO_APICS */
-#ifdef CONFIG_X86_32
-# define MAX_IO_APICS 64
-#else
-# define MAX_IO_APICS 128
-# define MAX_LOCAL_APIC 32768
-#endif
-
-#endif /* _ASM_X86_APICNUM_H */
diff --git a/trunk/arch/x86/include/asm/bug.h b/trunk/arch/x86/include/asm/bug.h
index d9cf1cd156d2..f654d1bb17fb 100644
--- a/trunk/arch/x86/include/asm/bug.h
+++ b/trunk/arch/x86/include/asm/bug.h
@@ -22,14 +22,14 @@ do { \
".popsection" \
: : "i" (__FILE__), "i" (__LINE__), \
"i" (sizeof(struct bug_entry))); \
- for (;;) ; \
+ unreachable(); \
} while (0)
#else
#define BUG() \
do { \
asm volatile("ud2"); \
- for (;;) ; \
+ unreachable(); \
} while (0)
#endif
diff --git a/trunk/arch/x86/include/asm/calgary.h b/trunk/arch/x86/include/asm/calgary.h
index b03bedb62aa7..0918654305af 100644
--- a/trunk/arch/x86/include/asm/calgary.h
+++ b/trunk/arch/x86/include/asm/calgary.h
@@ -62,10 +62,8 @@ struct cal_chipset_ops {
extern int use_calgary;
#ifdef CONFIG_CALGARY_IOMMU
-extern int calgary_iommu_init(void);
extern void detect_calgary(void);
#else
-static inline int calgary_iommu_init(void) { return 1; }
static inline void detect_calgary(void) { return; }
#endif
diff --git a/trunk/arch/x86/include/asm/cmpxchg_32.h b/trunk/arch/x86/include/asm/cmpxchg_32.h
index ee1931be6593..ffb9bb6b6c37 100644
--- a/trunk/arch/x86/include/asm/cmpxchg_32.h
+++ b/trunk/arch/x86/include/asm/cmpxchg_32.h
@@ -8,14 +8,50 @@
* you need to test for the feature in boot_cpu_data.
*/
-#define xchg(ptr, v) \
- ((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), sizeof(*(ptr))))
+extern void __xchg_wrong_size(void);
+
+/*
+ * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+ * Note 2: xchg has side effect, so that attribute volatile is necessary,
+ * but generally the primitive is invalid, *ptr is output argument. --ANK
+ */
struct __xchg_dummy {
unsigned long a[100];
};
#define __xg(x) ((struct __xchg_dummy *)(x))
+#define __xchg(x, ptr, size) \
+({ \
+ __typeof(*(ptr)) __x = (x); \
+ switch (size) { \
+ case 1: \
+ asm volatile("xchgb %b0,%1" \
+ : "=q" (__x) \
+ : "m" (*__xg(ptr)), "0" (__x) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm volatile("xchgw %w0,%1" \
+ : "=r" (__x) \
+ : "m" (*__xg(ptr)), "0" (__x) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm volatile("xchgl %0,%1" \
+ : "=r" (__x) \
+ : "m" (*__xg(ptr)), "0" (__x) \
+ : "memory"); \
+ break; \
+ default: \
+ __xchg_wrong_size(); \
+ } \
+ __x; \
+})
+
+#define xchg(ptr, v) \
+ __xchg((v), (ptr), sizeof(*ptr))
+
/*
* The semantics of XCHGCMP8B are a bit strange, this is why
* there is a loop and the loading of %%eax and %%edx has to
@@ -71,57 +107,63 @@ static inline void __set_64bit_var(unsigned long long *ptr,
(unsigned int)((value) >> 32)) \
: __set_64bit(ptr, ll_low((value)), ll_high((value))))
-/*
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
- * Note 2: xchg has side effect, so that attribute volatile is necessary,
- * but generally the primitive is invalid, *ptr is output argument. --ANK
- */
-static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
- int size)
-{
- switch (size) {
- case 1:
- asm volatile("xchgb %b0,%1"
- : "=q" (x)
- : "m" (*__xg(ptr)), "0" (x)
- : "memory");
- break;
- case 2:
- asm volatile("xchgw %w0,%1"
- : "=r" (x)
- : "m" (*__xg(ptr)), "0" (x)
- : "memory");
- break;
- case 4:
- asm volatile("xchgl %0,%1"
- : "=r" (x)
- : "m" (*__xg(ptr)), "0" (x)
- : "memory");
- break;
- }
- return x;
-}
+extern void __cmpxchg_wrong_size(void);
/*
* Atomic compare and exchange. Compare OLD with MEM, if identical,
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
+#define __raw_cmpxchg(ptr, old, new, size, lock) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __typeof__(*(ptr)) __old = (old); \
+ __typeof__(*(ptr)) __new = (new); \
+ switch (size) { \
+ case 1: \
+ asm volatile(lock "cmpxchgb %b1,%2" \
+ : "=a"(__ret) \
+ : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm volatile(lock "cmpxchgw %w1,%2" \
+ : "=a"(__ret) \
+ : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm volatile(lock "cmpxchgl %1,%2" \
+ : "=a"(__ret) \
+ : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \
+ : "memory"); \
+ break; \
+ default: \
+ __cmpxchg_wrong_size(); \
+ } \
+ __ret; \
+})
+
+#define __cmpxchg(ptr, old, new, size) \
+ __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
+
+#define __sync_cmpxchg(ptr, old, new, size) \
+ __raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
+
+#define __cmpxchg_local(ptr, old, new, size) \
+ __raw_cmpxchg((ptr), (old), (new), (size), "")
#ifdef CONFIG_X86_CMPXCHG
#define __HAVE_ARCH_CMPXCHG 1
-#define cmpxchg(ptr, o, n) \
- ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \
- (unsigned long)(n), \
- sizeof(*(ptr))))
-#define sync_cmpxchg(ptr, o, n) \
- ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \
- (unsigned long)(n), \
- sizeof(*(ptr))))
-#define cmpxchg_local(ptr, o, n) \
- ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \
- (unsigned long)(n), \
- sizeof(*(ptr))))
+
+#define cmpxchg(ptr, old, new) \
+ __cmpxchg((ptr), (old), (new), sizeof(*ptr))
+
+#define sync_cmpxchg(ptr, old, new) \
+ __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
+
+#define cmpxchg_local(ptr, old, new) \
+ __cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
#endif
#ifdef CONFIG_X86_CMPXCHG64
@@ -133,94 +175,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
(unsigned long long)(n)))
#endif
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
- unsigned long new, int size)
-{
- unsigned long prev;
- switch (size) {
- case 1:
- asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 2:
- asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 4:
- asm volatile(LOCK_PREFIX "cmpxchgl %1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- }
- return old;
-}
-
-/*
- * Always use locked operations when touching memory shared with a
- * hypervisor, since the system may be SMP even if the guest kernel
- * isn't.
- */
-static inline unsigned long __sync_cmpxchg(volatile void *ptr,
- unsigned long old,
- unsigned long new, int size)
-{
- unsigned long prev;
- switch (size) {
- case 1:
- asm volatile("lock; cmpxchgb %b1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 2:
- asm volatile("lock; cmpxchgw %w1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 4:
- asm volatile("lock; cmpxchgl %1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- }
- return old;
-}
-
-static inline unsigned long __cmpxchg_local(volatile void *ptr,
- unsigned long old,
- unsigned long new, int size)
-{
- unsigned long prev;
- switch (size) {
- case 1:
- asm volatile("cmpxchgb %b1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 2:
- asm volatile("cmpxchgw %w1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 4:
- asm volatile("cmpxchgl %1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- }
- return old;
-}
-
static inline unsigned long long __cmpxchg64(volatile void *ptr,
unsigned long long old,
unsigned long long new)
diff --git a/trunk/arch/x86/include/asm/cmpxchg_64.h b/trunk/arch/x86/include/asm/cmpxchg_64.h
index 52de72e0de8c..485ae415faec 100644
--- a/trunk/arch/x86/include/asm/cmpxchg_64.h
+++ b/trunk/arch/x86/include/asm/cmpxchg_64.h
@@ -3,9 +3,6 @@
#include /* Provides LOCK_PREFIX */
-#define xchg(ptr, v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v), \
- (ptr), sizeof(*(ptr))))
-
#define __xg(x) ((volatile long *)(x))
static inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
@@ -15,167 +12,118 @@ static inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
#define _set_64bit set_64bit
+extern void __xchg_wrong_size(void);
+extern void __cmpxchg_wrong_size(void);
+
/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
* Note 2: xchg has side effect, so that attribute volatile is necessary,
* but generally the primitive is invalid, *ptr is output argument. --ANK
*/
-static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
- int size)
-{
- switch (size) {
- case 1:
- asm volatile("xchgb %b0,%1"
- : "=q" (x)
- : "m" (*__xg(ptr)), "0" (x)
- : "memory");
- break;
- case 2:
- asm volatile("xchgw %w0,%1"
- : "=r" (x)
- : "m" (*__xg(ptr)), "0" (x)
- : "memory");
- break;
- case 4:
- asm volatile("xchgl %k0,%1"
- : "=r" (x)
- : "m" (*__xg(ptr)), "0" (x)
- : "memory");
- break;
- case 8:
- asm volatile("xchgq %0,%1"
- : "=r" (x)
- : "m" (*__xg(ptr)), "0" (x)
- : "memory");
- break;
- }
- return x;
-}
+#define __xchg(x, ptr, size) \
+({ \
+ __typeof(*(ptr)) __x = (x); \
+ switch (size) { \
+ case 1: \
+ asm volatile("xchgb %b0,%1" \
+ : "=q" (__x) \
+ : "m" (*__xg(ptr)), "0" (__x) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm volatile("xchgw %w0,%1" \
+ : "=r" (__x) \
+ : "m" (*__xg(ptr)), "0" (__x) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm volatile("xchgl %k0,%1" \
+ : "=r" (__x) \
+ : "m" (*__xg(ptr)), "0" (__x) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm volatile("xchgq %0,%1" \
+ : "=r" (__x) \
+ : "m" (*__xg(ptr)), "0" (__x) \
+ : "memory"); \
+ break; \
+ default: \
+ __xchg_wrong_size(); \
+ } \
+ __x; \
+})
+
+#define xchg(ptr, v) \
+ __xchg((v), (ptr), sizeof(*ptr))
+
+#define __HAVE_ARCH_CMPXCHG 1
/*
* Atomic compare and exchange. Compare OLD with MEM, if identical,
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
+#define __raw_cmpxchg(ptr, old, new, size, lock) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __typeof__(*(ptr)) __old = (old); \
+ __typeof__(*(ptr)) __new = (new); \
+ switch (size) { \
+ case 1: \
+ asm volatile(lock "cmpxchgb %b1,%2" \
+ : "=a"(__ret) \
+ : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm volatile(lock "cmpxchgw %w1,%2" \
+ : "=a"(__ret) \
+ : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm volatile(lock "cmpxchgl %k1,%2" \
+ : "=a"(__ret) \
+ : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm volatile(lock "cmpxchgq %1,%2" \
+ : "=a"(__ret) \
+ : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \
+ : "memory"); \
+ break; \
+ default: \
+ __cmpxchg_wrong_size(); \
+ } \
+ __ret; \
+})
-#define __HAVE_ARCH_CMPXCHG 1
+#define __cmpxchg(ptr, old, new, size) \
+ __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
- unsigned long new, int size)
-{
- unsigned long prev;
- switch (size) {
- case 1:
- asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 2:
- asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 4:
- asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 8:
- asm volatile(LOCK_PREFIX "cmpxchgq %1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- }
- return old;
-}
+#define __sync_cmpxchg(ptr, old, new, size) \
+ __raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
-/*
- * Always use locked operations when touching memory shared with a
- * hypervisor, since the system may be SMP even if the guest kernel
- * isn't.
- */
-static inline unsigned long __sync_cmpxchg(volatile void *ptr,
- unsigned long old,
- unsigned long new, int size)
-{
- unsigned long prev;
- switch (size) {
- case 1:
- asm volatile("lock; cmpxchgb %b1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 2:
- asm volatile("lock; cmpxchgw %w1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 4:
- asm volatile("lock; cmpxchgl %1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- }
- return old;
-}
+#define __cmpxchg_local(ptr, old, new, size) \
+ __raw_cmpxchg((ptr), (old), (new), (size), "")
-static inline unsigned long __cmpxchg_local(volatile void *ptr,
- unsigned long old,
- unsigned long new, int size)
-{
- unsigned long prev;
- switch (size) {
- case 1:
- asm volatile("cmpxchgb %b1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 2:
- asm volatile("cmpxchgw %w1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 4:
- asm volatile("cmpxchgl %k1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 8:
- asm volatile("cmpxchgq %1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- }
- return old;
-}
+#define cmpxchg(ptr, old, new) \
+ __cmpxchg((ptr), (old), (new), sizeof(*ptr))
+
+#define sync_cmpxchg(ptr, old, new) \
+ __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
+
+#define cmpxchg_local(ptr, old, new) \
+ __cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
-#define cmpxchg(ptr, o, n) \
- ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \
- (unsigned long)(n), sizeof(*(ptr))))
#define cmpxchg64(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
cmpxchg((ptr), (o), (n)); \
})
-#define cmpxchg_local(ptr, o, n) \
- ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \
- (unsigned long)(n), \
- sizeof(*(ptr))))
-#define sync_cmpxchg(ptr, o, n) \
- ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \
- (unsigned long)(n), \
- sizeof(*(ptr))))
+
#define cmpxchg64_local(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
diff --git a/trunk/arch/x86/include/asm/debugreg.h b/trunk/arch/x86/include/asm/debugreg.h
index 3ea6f37be9e2..8240f76b531e 100644
--- a/trunk/arch/x86/include/asm/debugreg.h
+++ b/trunk/arch/x86/include/asm/debugreg.h
@@ -18,6 +18,7 @@
#define DR_TRAP1 (0x2) /* db1 */
#define DR_TRAP2 (0x4) /* db2 */
#define DR_TRAP3 (0x8) /* db3 */
+#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
#define DR_STEP (0x4000) /* single-step */
#define DR_SWITCH (0x8000) /* task switch */
@@ -49,6 +50,8 @@
#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
+#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
+#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
@@ -67,4 +70,34 @@
#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
+/*
+ * HW breakpoint additions
+ */
+#ifdef __KERNEL__
+
+DECLARE_PER_CPU(unsigned long, cpu_dr7);
+
+static inline void hw_breakpoint_disable(void)
+{
+ /* Zero the control register for HW Breakpoint */
+ set_debugreg(0UL, 7);
+
+ /* Zero-out the individual HW breakpoint address registers */
+ set_debugreg(0UL, 0);
+ set_debugreg(0UL, 1);
+ set_debugreg(0UL, 2);
+ set_debugreg(0UL, 3);
+}
+
+static inline int hw_breakpoint_active(void)
+{
+ return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
+}
+
+extern void aout_dump_debugregs(struct user *dump);
+
+extern void hw_breakpoint_restore(void);
+
+#endif /* __KERNEL__ */
+
#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/trunk/arch/x86/include/asm/device.h b/trunk/arch/x86/include/asm/device.h
index cee34e9ca45b..029f230ab637 100644
--- a/trunk/arch/x86/include/asm/device.h
+++ b/trunk/arch/x86/include/asm/device.h
@@ -8,7 +8,7 @@ struct dev_archdata {
#ifdef CONFIG_X86_64
struct dma_map_ops *dma_ops;
#endif
-#ifdef CONFIG_DMAR
+#if defined(CONFIG_DMAR) || defined(CONFIG_AMD_IOMMU)
void *iommu; /* hook for IOMMU specific extension */
#endif
};
diff --git a/trunk/arch/x86/include/asm/dma-mapping.h b/trunk/arch/x86/include/asm/dma-mapping.h
index 6a25d5d42836..0f6c02f3b7d4 100644
--- a/trunk/arch/x86/include/asm/dma-mapping.h
+++ b/trunk/arch/x86/include/asm/dma-mapping.h
@@ -20,7 +20,8 @@
# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32)
#endif
-extern dma_addr_t bad_dma_address;
+#define DMA_ERROR_CODE 0
+
extern int iommu_merge;
extern struct device x86_dma_fallback_dev;
extern int panic_on_overflow;
@@ -48,7 +49,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
if (ops->mapping_error)
return ops->mapping_error(dev, dma_addr);
- return (dma_addr == bad_dma_address);
+ return (dma_addr == DMA_ERROR_CODE);
}
#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
diff --git a/trunk/arch/x86/include/asm/gart.h b/trunk/arch/x86/include/asm/gart.h
index 6cfdafa409d8..4ac5b0f33fc1 100644
--- a/trunk/arch/x86/include/asm/gart.h
+++ b/trunk/arch/x86/include/asm/gart.h
@@ -35,8 +35,7 @@ extern int gart_iommu_aperture_allowed;
extern int gart_iommu_aperture_disabled;
extern void early_gart_iommu_check(void);
-extern void gart_iommu_init(void);
-extern void gart_iommu_shutdown(void);
+extern int gart_iommu_init(void);
extern void __init gart_parse_options(char *);
extern void gart_iommu_hole_init(void);
@@ -48,12 +47,6 @@ extern void gart_iommu_hole_init(void);
static inline void early_gart_iommu_check(void)
{
}
-static inline void gart_iommu_init(void)
-{
-}
-static inline void gart_iommu_shutdown(void)
-{
-}
static inline void gart_parse_options(char *options)
{
}
diff --git a/trunk/arch/x86/include/asm/hardirq.h b/trunk/arch/x86/include/asm/hardirq.h
index 82e3e8f01043..108eb6fd1ae7 100644
--- a/trunk/arch/x86/include/asm/hardirq.h
+++ b/trunk/arch/x86/include/asm/hardirq.h
@@ -20,11 +20,11 @@ typedef struct {
unsigned int irq_call_count;
unsigned int irq_tlb_count;
#endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
unsigned int irq_thermal_count;
-# ifdef CONFIG_X86_MCE_THRESHOLD
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
unsigned int irq_threshold_count;
-# endif
#endif
} ____cacheline_aligned irq_cpustat_t;
diff --git a/trunk/arch/x86/include/asm/hw_breakpoint.h b/trunk/arch/x86/include/asm/hw_breakpoint.h
new file mode 100644
index 000000000000..0675a7c4c20e
--- /dev/null
+++ b/trunk/arch/x86/include/asm/hw_breakpoint.h
@@ -0,0 +1,73 @@
+#ifndef _I386_HW_BREAKPOINT_H
+#define _I386_HW_BREAKPOINT_H
+
+#ifdef __KERNEL__
+#define __ARCH_HW_BREAKPOINT_H
+
+/*
+ * The name should probably be something dealt in
+ * a higher level. While dealing with the user
+ * (display/resolving)
+ */
+struct arch_hw_breakpoint {
+ char *name; /* Contains name of the symbol to set bkpt */
+ unsigned long address;
+ u8 len;
+ u8 type;
+};
+
+#include
+#include
+#include
+
+/* Available HW breakpoint length encodings */
+#define X86_BREAKPOINT_LEN_1 0x40
+#define X86_BREAKPOINT_LEN_2 0x44
+#define X86_BREAKPOINT_LEN_4 0x4c
+#define X86_BREAKPOINT_LEN_EXECUTE 0x40
+
+#ifdef CONFIG_X86_64
+#define X86_BREAKPOINT_LEN_8 0x48
+#endif
+
+/* Available HW breakpoint type encodings */
+
+/* trigger on instruction execute */
+#define X86_BREAKPOINT_EXECUTE 0x80
+/* trigger on memory write */
+#define X86_BREAKPOINT_WRITE 0x81
+/* trigger on memory read or write */
+#define X86_BREAKPOINT_RW 0x83
+
+/* Total number of available HW breakpoint registers */
+#define HBP_NUM 4
+
+struct perf_event;
+struct pmu;
+
+extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
+ struct task_struct *tsk);
+extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+ unsigned long val, void *data);
+
+
+int arch_install_hw_breakpoint(struct perf_event *bp);
+void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void hw_breakpoint_pmu_read(struct perf_event *bp);
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
+
+extern void
+arch_fill_perf_breakpoint(struct perf_event *bp);
+
+unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
+int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
+
+extern int arch_bp_generic_fields(int x86_len, int x86_type,
+ int *gen_len, int *gen_type);
+
+extern struct pmu perf_ops_bp;
+
+#endif /* __KERNEL__ */
+#endif /* _I386_HW_BREAKPOINT_H */
+
diff --git a/trunk/arch/x86/include/asm/hw_irq.h b/trunk/arch/x86/include/asm/hw_irq.h
index ba180d93b08c..6e124269fd4b 100644
--- a/trunk/arch/x86/include/asm/hw_irq.h
+++ b/trunk/arch/x86/include/asm/hw_irq.h
@@ -79,14 +79,32 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
int ioapic, int ioapic_pin,
int trigger, int polarity)
{
- irq_attr->ioapic = ioapic;
- irq_attr->ioapic_pin = ioapic_pin;
- irq_attr->trigger = trigger;
- irq_attr->polarity = polarity;
+ irq_attr->ioapic = ioapic;
+ irq_attr->ioapic_pin = ioapic_pin;
+ irq_attr->trigger = trigger;
+ irq_attr->polarity = polarity;
}
-extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin,
- struct io_apic_irq_attr *irq_attr);
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * Most irqs are mapped 1:1 with pins.
+ */
+struct irq_cfg {
+ struct irq_pin_list *irq_2_pin;
+ cpumask_var_t domain;
+ cpumask_var_t old_domain;
+ u8 vector;
+ u8 move_in_progress : 1;
+};
+
+extern struct irq_cfg *irq_cfg(unsigned int);
+extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
+extern void send_cleanup_vector(struct irq_cfg *);
+
+struct irq_desc;
+extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
extern void setup_ioapic_dest(void);
extern void enable_IO_APIC(void);
diff --git a/trunk/arch/x86/include/asm/inat.h b/trunk/arch/x86/include/asm/inat.h
new file mode 100644
index 000000000000..205b063e3e32
--- /dev/null
+++ b/trunk/arch/x86/include/asm/inat.h
@@ -0,0 +1,220 @@
+#ifndef _ASM_X86_INAT_H
+#define _ASM_X86_INAT_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include
+
+/*
+ * Internal bits. Don't use bitmasks directly, because these bits are
+ * unstable. You should use checking functions.
+ */
+
+#define INAT_OPCODE_TABLE_SIZE 256
+#define INAT_GROUP_TABLE_SIZE 8
+
+/* Legacy last prefixes */
+#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */
+#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */
+#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */
+/* Other Legacy prefixes */
+#define INAT_PFX_LOCK 4 /* 0xF0 */
+#define INAT_PFX_CS 5 /* 0x2E */
+#define INAT_PFX_DS 6 /* 0x3E */
+#define INAT_PFX_ES 7 /* 0x26 */
+#define INAT_PFX_FS 8 /* 0x64 */
+#define INAT_PFX_GS 9 /* 0x65 */
+#define INAT_PFX_SS 10 /* 0x36 */
+#define INAT_PFX_ADDRSZ 11 /* 0x67 */
+/* x86-64 REX prefix */
+#define INAT_PFX_REX 12 /* 0x4X */
+/* AVX VEX prefixes */
+#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
+#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
+
+#define INAT_LSTPFX_MAX 3
+#define INAT_LGCPFX_MAX 11
+
+/* Immediate size */
+#define INAT_IMM_BYTE 1
+#define INAT_IMM_WORD 2
+#define INAT_IMM_DWORD 3
+#define INAT_IMM_QWORD 4
+#define INAT_IMM_PTR 5
+#define INAT_IMM_VWORD32 6
+#define INAT_IMM_VWORD 7
+
+/* Legacy prefix */
+#define INAT_PFX_OFFS 0
+#define INAT_PFX_BITS 4
+#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
+#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
+/* Escape opcodes */
+#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS)
+#define INAT_ESC_BITS 2
+#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1)
+#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS)
+/* Group opcodes (1-16) */
+#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS)
+#define INAT_GRP_BITS 5
+#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1)
+#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS)
+/* Immediates */
+#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS)
+#define INAT_IMM_BITS 3
+#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
+/* Flags */
+#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS)
+#define INAT_MODRM (1 << (INAT_FLAG_OFFS))
+#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1))
+#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2))
+#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
+#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
+#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
+#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
+/* Attribute making macros for attribute tables */
+#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
+#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
+#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM)
+#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS)
+
+/* Attribute search APIs */
+extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
+ insn_byte_t last_pfx,
+ insn_attr_t esc_attr);
+extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
+ insn_byte_t last_pfx,
+ insn_attr_t esc_attr);
+extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
+ insn_byte_t vex_m,
+ insn_byte_t vex_pp);
+
+/* Attribute checking functions */
+static inline int inat_is_legacy_prefix(insn_attr_t attr)
+{
+ attr &= INAT_PFX_MASK;
+ return attr && attr <= INAT_LGCPFX_MAX;
+}
+
+static inline int inat_is_address_size_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
+}
+
+static inline int inat_is_operand_size_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
+}
+
+static inline int inat_is_rex_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
+}
+
+static inline int inat_last_prefix_id(insn_attr_t attr)
+{
+ if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
+ return 0;
+ else
+ return attr & INAT_PFX_MASK;
+}
+
+static inline int inat_is_vex_prefix(insn_attr_t attr)
+{
+ attr &= INAT_PFX_MASK;
+ return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
+}
+
+static inline int inat_is_vex3_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
+}
+
+static inline int inat_is_escape(insn_attr_t attr)
+{
+ return attr & INAT_ESC_MASK;
+}
+
+static inline int inat_escape_id(insn_attr_t attr)
+{
+ return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
+}
+
+static inline int inat_is_group(insn_attr_t attr)
+{
+ return attr & INAT_GRP_MASK;
+}
+
+static inline int inat_group_id(insn_attr_t attr)
+{
+ return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
+}
+
+static inline int inat_group_common_attribute(insn_attr_t attr)
+{
+ return attr & ~INAT_GRP_MASK;
+}
+
+static inline int inat_has_immediate(insn_attr_t attr)
+{
+ return attr & INAT_IMM_MASK;
+}
+
+static inline int inat_immediate_size(insn_attr_t attr)
+{
+ return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
+}
+
+static inline int inat_has_modrm(insn_attr_t attr)
+{
+ return attr & INAT_MODRM;
+}
+
+static inline int inat_is_force64(insn_attr_t attr)
+{
+ return attr & INAT_FORCE64;
+}
+
+static inline int inat_has_second_immediate(insn_attr_t attr)
+{
+ return attr & INAT_SCNDIMM;
+}
+
+static inline int inat_has_moffset(insn_attr_t attr)
+{
+ return attr & INAT_MOFFSET;
+}
+
+static inline int inat_has_variant(insn_attr_t attr)
+{
+ return attr & INAT_VARIANT;
+}
+
+static inline int inat_accept_vex(insn_attr_t attr)
+{
+ return attr & INAT_VEXOK;
+}
+
+static inline int inat_must_vex(insn_attr_t attr)
+{
+ return attr & INAT_VEXONLY;
+}
+#endif
diff --git a/trunk/arch/x86/include/asm/inat_types.h b/trunk/arch/x86/include/asm/inat_types.h
new file mode 100644
index 000000000000..cb3c20ce39cf
--- /dev/null
+++ b/trunk/arch/x86/include/asm/inat_types.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_X86_INAT_TYPES_H
+#define _ASM_X86_INAT_TYPES_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* Instruction attributes */
+typedef unsigned int insn_attr_t;
+typedef unsigned char insn_byte_t;
+typedef signed int insn_value_t;
+
+#endif
diff --git a/trunk/arch/x86/include/asm/insn.h b/trunk/arch/x86/include/asm/insn.h
new file mode 100644
index 000000000000..96c2e0ad04ca
--- /dev/null
+++ b/trunk/arch/x86/include/asm/insn.h
@@ -0,0 +1,184 @@
+#ifndef _ASM_X86_INSN_H
+#define _ASM_X86_INSN_H
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+/* insn_attr_t is defined in inat.h */
+#include
+
+struct insn_field {
+ union {
+ insn_value_t value;
+ insn_byte_t bytes[4];
+ };
+ /* !0 if we've run insn_get_xxx() for this field */
+ unsigned char got;
+ unsigned char nbytes;
+};
+
+struct insn {
+ struct insn_field prefixes; /*
+ * Prefixes
+ * prefixes.bytes[3]: last prefix
+ */
+ struct insn_field rex_prefix; /* REX prefix */
+ struct insn_field vex_prefix; /* VEX prefix */
+ struct insn_field opcode; /*
+ * opcode.bytes[0]: opcode1
+ * opcode.bytes[1]: opcode2
+ * opcode.bytes[2]: opcode3
+ */
+ struct insn_field modrm;
+ struct insn_field sib;
+ struct insn_field displacement;
+ union {
+ struct insn_field immediate;
+ struct insn_field moffset1; /* for 64bit MOV */
+ struct insn_field immediate1; /* for 64bit imm or off16/32 */
+ };
+ union {
+ struct insn_field moffset2; /* for 64bit MOV */
+ struct insn_field immediate2; /* for 64bit imm or seg16 */
+ };
+
+ insn_attr_t attr;
+ unsigned char opnd_bytes;
+ unsigned char addr_bytes;
+ unsigned char length;
+ unsigned char x86_64;
+
+ const insn_byte_t *kaddr; /* kernel address of insn to analyze */
+ const insn_byte_t *next_byte;
+};
+
+#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
+#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
+#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
+
+#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
+#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
+#define X86_SIB_BASE(sib) ((sib) & 0x07)
+
+#define X86_REX_W(rex) ((rex) & 8)
+#define X86_REX_R(rex) ((rex) & 4)
+#define X86_REX_X(rex) ((rex) & 2)
+#define X86_REX_B(rex) ((rex) & 1)
+
+/* VEX bit flags */
+#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */
+#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */
+#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */
+#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
+#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
+/* VEX bit fields */
+#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
+#define X86_VEX2_M 1 /* VEX2.M always 1 */
+#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
+
+/* The last prefix is needed for two-byte and three-byte opcodes */
+static inline insn_byte_t insn_last_prefix(struct insn *insn)
+{
+ return insn->prefixes.bytes[3];
+}
+
+extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
+extern void insn_get_prefixes(struct insn *insn);
+extern void insn_get_opcode(struct insn *insn);
+extern void insn_get_modrm(struct insn *insn);
+extern void insn_get_sib(struct insn *insn);
+extern void insn_get_displacement(struct insn *insn);
+extern void insn_get_immediate(struct insn *insn);
+extern void insn_get_length(struct insn *insn);
+
+/* Attribute will be determined after getting ModRM (for opcode groups) */
+static inline void insn_get_attribute(struct insn *insn)
+{
+ insn_get_modrm(insn);
+}
+
+/* Instruction uses RIP-relative addressing */
+extern int insn_rip_relative(struct insn *insn);
+
+/* Init insn for kernel text */
+static inline void kernel_insn_init(struct insn *insn, const void *kaddr)
+{
+#ifdef CONFIG_X86_64
+ insn_init(insn, kaddr, 1);
+#else /* CONFIG_X86_32 */
+ insn_init(insn, kaddr, 0);
+#endif
+}
+
+static inline int insn_is_avx(struct insn *insn)
+{
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+ return (insn->vex_prefix.value != 0);
+}
+
+static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
+ return X86_VEX2_M;
+ else
+ return X86_VEX3_M(insn->vex_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
+ return X86_VEX_P(insn->vex_prefix.bytes[1]);
+ else
+ return X86_VEX_P(insn->vex_prefix.bytes[2]);
+}
+
+/* Offset of each field from kaddr */
+static inline int insn_offset_rex_prefix(struct insn *insn)
+{
+ return insn->prefixes.nbytes;
+}
+static inline int insn_offset_vex_prefix(struct insn *insn)
+{
+ return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
+}
+static inline int insn_offset_opcode(struct insn *insn)
+{
+ return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes;
+}
+static inline int insn_offset_modrm(struct insn *insn)
+{
+ return insn_offset_opcode(insn) + insn->opcode.nbytes;
+}
+static inline int insn_offset_sib(struct insn *insn)
+{
+ return insn_offset_modrm(insn) + insn->modrm.nbytes;
+}
+static inline int insn_offset_displacement(struct insn *insn)
+{
+ return insn_offset_sib(insn) + insn->sib.nbytes;
+}
+static inline int insn_offset_immediate(struct insn *insn)
+{
+ return insn_offset_displacement(insn) + insn->displacement.nbytes;
+}
+
+#endif /* _ASM_X86_INSN_H */
diff --git a/trunk/arch/x86/include/asm/iommu.h b/trunk/arch/x86/include/asm/iommu.h
index fd6d21bbee6c..345c99cef152 100644
--- a/trunk/arch/x86/include/asm/iommu.h
+++ b/trunk/arch/x86/include/asm/iommu.h
@@ -1,8 +1,6 @@
#ifndef _ASM_X86_IOMMU_H
#define _ASM_X86_IOMMU_H
-extern void pci_iommu_shutdown(void);
-extern void no_iommu_init(void);
extern struct dma_map_ops nommu_dma_ops;
extern int force_iommu, no_iommu;
extern int iommu_detected;
diff --git a/trunk/arch/x86/include/asm/irq.h b/trunk/arch/x86/include/asm/irq.h
index ddda6cbed6f4..ffd700ff5dcb 100644
--- a/trunk/arch/x86/include/asm/irq.h
+++ b/trunk/arch/x86/include/asm/irq.h
@@ -34,6 +34,7 @@ static inline int irq_canonicalize(int irq)
#ifdef CONFIG_HOTPLUG_CPU
#include
extern void fixup_irqs(void);
+extern void irq_force_complete_move(int);
#endif
extern void (*generic_interrupt_extension)(void);
diff --git a/trunk/arch/x86/include/asm/mce.h b/trunk/arch/x86/include/asm/mce.h
index f1363b72364f..858baa061cfc 100644
--- a/trunk/arch/x86/include/asm/mce.h
+++ b/trunk/arch/x86/include/asm/mce.h
@@ -108,6 +108,8 @@ struct mce_log {
#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
+extern struct atomic_notifier_head x86_mce_decoder_chain;
+
#ifdef __KERNEL__
#include
@@ -118,9 +120,11 @@ extern int mce_disabled;
extern int mce_p5_enabled;
#ifdef CONFIG_X86_MCE
-void mcheck_init(struct cpuinfo_x86 *c);
+int mcheck_init(void);
+void mcheck_cpu_init(struct cpuinfo_x86 *c);
#else
-static inline void mcheck_init(struct cpuinfo_x86 *c) {}
+static inline int mcheck_init(void) { return 0; }
+static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
@@ -214,5 +218,11 @@ void intel_init_thermal(struct cpuinfo_x86 *c);
void mce_log_therm_throt_event(__u64 status);
+#ifdef CONFIG_X86_THERMAL_VECTOR
+extern void mcheck_intel_therm_init(void);
+#else
+static inline void mcheck_intel_therm_init(void) { }
+#endif
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */
diff --git a/trunk/arch/x86/include/asm/mpspec.h b/trunk/arch/x86/include/asm/mpspec.h
index 79c94500c0bb..61d90b1331c3 100644
--- a/trunk/arch/x86/include/asm/mpspec.h
+++ b/trunk/arch/x86/include/asm/mpspec.h
@@ -163,14 +163,16 @@ typedef struct physid_mask physid_mask_t;
#define physids_shift_left(d, s, n) \
bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
-#define physids_coerce(map) ((map).mask[0])
+static inline unsigned long physids_coerce(physid_mask_t *map)
+{
+ return map->mask[0];
+}
-#define physids_promote(physids) \
- ({ \
- physid_mask_t __physid_mask = PHYSID_MASK_NONE; \
- __physid_mask.mask[0] = physids; \
- __physid_mask; \
- })
+static inline void physids_promote(unsigned long physids, physid_mask_t *map)
+{
+ physids_clear(*map);
+ map->mask[0] = physids;
+}
/* Note: will create very large stack frames if physid_mask_t is big */
#define physid_mask_of_physid(physid) \
diff --git a/trunk/arch/x86/include/asm/msr.h b/trunk/arch/x86/include/asm/msr.h
index 7e2b6ba962ff..5bef931f8b14 100644
--- a/trunk/arch/x86/include/asm/msr.h
+++ b/trunk/arch/x86/include/asm/msr.h
@@ -247,8 +247,8 @@ do { \
#ifdef CONFIG_SMP
int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs);
-void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs);
+void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
+void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
@@ -264,12 +264,12 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
wrmsr(msr_no, l, h);
return 0;
}
-static inline void rdmsr_on_cpus(const cpumask_t *m, u32 msr_no,
+static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,
struct msr *msrs)
{
rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h));
}
-static inline void wrmsr_on_cpus(const cpumask_t *m, u32 msr_no,
+static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no,
struct msr *msrs)
{
wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h);
diff --git a/trunk/arch/x86/include/asm/perf_event.h b/trunk/arch/x86/include/asm/perf_event.h
index ad7ce3fd5065..8d9f8548a870 100644
--- a/trunk/arch/x86/include/asm/perf_event.h
+++ b/trunk/arch/x86/include/asm/perf_event.h
@@ -28,9 +28,20 @@
*/
#define ARCH_PERFMON_EVENT_MASK 0xffff
+/*
+ * filter mask to validate fixed counter events.
+ * the following filters disqualify for fixed counters:
+ * - inv
+ * - edge
+ * - cnt-mask
+ * The other filters are supported by fixed counters.
+ * The any-thread option is supported starting with v3.
+ */
+#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000
+
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
diff --git a/trunk/arch/x86/include/asm/processor.h b/trunk/arch/x86/include/asm/processor.h
index c9786480f0fe..6f8ec1c37e0a 100644
--- a/trunk/arch/x86/include/asm/processor.h
+++ b/trunk/arch/x86/include/asm/processor.h
@@ -30,6 +30,7 @@ struct mm_struct;
#include
#include
+#define HBP_NUM 4
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
@@ -422,6 +423,8 @@ extern unsigned int xstate_size;
extern void free_thread_xstate(struct task_struct *);
extern struct kmem_cache *task_xstate_cachep;
+struct perf_event;
+
struct thread_struct {
/* Cached TLS descriptors: */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -443,13 +446,10 @@ struct thread_struct {
unsigned long fs;
#endif
unsigned long gs;
- /* Hardware debugging registers: */
- unsigned long debugreg0;
- unsigned long debugreg1;
- unsigned long debugreg2;
- unsigned long debugreg3;
- unsigned long debugreg6;
- unsigned long debugreg7;
+ /* Save middle states of ptrace breakpoints */
+ struct perf_event *ptrace_bps[HBP_NUM];
+ /* Debug status used for traps, single steps, etc... */
+ unsigned long debugreg6;
/* Fault info: */
unsigned long cr2;
unsigned long trap_no;
diff --git a/trunk/arch/x86/include/asm/ptrace.h b/trunk/arch/x86/include/asm/ptrace.h
index 0f0d908349aa..3d11fd0f44c5 100644
--- a/trunk/arch/x86/include/asm/ptrace.h
+++ b/trunk/arch/x86/include/asm/ptrace.h
@@ -7,6 +7,7 @@
#ifdef __KERNEL__
#include
+#include
#endif
#ifndef __ASSEMBLY__
@@ -216,6 +217,67 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
return regs->sp;
}
+/* Query offset/name of register from its name/offset */
+extern int regs_query_register_offset(const char *name);
+extern const char *regs_query_register_name(unsigned int offset);
+#define MAX_REG_OFFSET (offsetof(struct pt_regs, ss))
+
+/**
+ * regs_get_register() - get register value from its offset
+ * @regs: pt_regs from which register value is gotten.
+ * @offset: offset number of the register.
+ *
+ * regs_get_register returns the value of a register. The @offset is the
+ * offset of the register in struct pt_regs address which specified by @regs.
+ * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
+ */
+static inline unsigned long regs_get_register(struct pt_regs *regs,
+ unsigned int offset)
+{
+ if (unlikely(offset > MAX_REG_OFFSET))
+ return 0;
+ return *(unsigned long *)((unsigned long)regs + offset);
+}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @addr: address which is checked.
+ *
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+static inline int regs_within_kernel_stack(struct pt_regs *regs,
+ unsigned long addr)
+{
+ return ((addr & ~(THREAD_SIZE - 1)) ==
+ (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @n: stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
+ unsigned int n)
+{
+ unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+ addr += n;
+ if (regs_within_kernel_stack(regs, (unsigned long)addr))
+ return *addr;
+ else
+ return 0;
+}
+
+/* Get Nth argument at function call */
+extern unsigned long regs_get_argument_nth(struct pt_regs *regs,
+ unsigned int n);
+
/*
* These are defined as per linux/ptrace.h, which see.
*/
diff --git a/trunk/arch/x86/include/asm/string_32.h b/trunk/arch/x86/include/asm/string_32.h
index ae907e617181..3d3e8353ee5c 100644
--- a/trunk/arch/x86/include/asm/string_32.h
+++ b/trunk/arch/x86/include/asm/string_32.h
@@ -177,10 +177,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len)
*/
#ifndef CONFIG_KMEMCHECK
+
+#if (__GNUC__ >= 4)
+#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
+#else
#define memcpy(t, f, n) \
(__builtin_constant_p((n)) \
? __constant_memcpy((t), (f), (n)) \
: __memcpy((t), (f), (n)))
+#endif
#else
/*
* kmemcheck becomes very happy if we use the REP instructions unconditionally,
@@ -316,11 +321,15 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
: __memset_generic((s), (c), (count)))
#define __HAVE_ARCH_MEMSET
+#if (__GNUC__ >= 4)
+#define memset(s, c, count) __builtin_memset(s, c, count)
+#else
#define memset(s, c, count) \
(__builtin_constant_p(c) \
? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \
(count)) \
: __memset((s), (c), (count)))
+#endif
/*
* find the first occurrence of byte 'c', or 1 past the area if none
diff --git a/trunk/arch/x86/include/asm/swiotlb.h b/trunk/arch/x86/include/asm/swiotlb.h
index b9e4e20174fb..87ffcb12a1b8 100644
--- a/trunk/arch/x86/include/asm/swiotlb.h
+++ b/trunk/arch/x86/include/asm/swiotlb.h
@@ -3,17 +3,14 @@
#include
-/* SWIOTLB interface */
-
-extern int swiotlb_force;
-
#ifdef CONFIG_SWIOTLB
extern int swiotlb;
-extern void pci_swiotlb_init(void);
+extern int pci_swiotlb_init(void);
#else
#define swiotlb 0
-static inline void pci_swiotlb_init(void)
+static inline int pci_swiotlb_init(void)
{
+ return 0;
}
#endif
diff --git a/trunk/arch/x86/include/asm/sys_ia32.h b/trunk/arch/x86/include/asm/sys_ia32.h
index 72a6dcd1299b..9af9decb38c3 100644
--- a/trunk/arch/x86/include/asm/sys_ia32.h
+++ b/trunk/arch/x86/include/asm/sys_ia32.h
@@ -51,11 +51,6 @@ asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);
asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);
-#ifdef CONFIG_SYSCTL_SYSCALL
-struct sysctl_ia32;
-asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *);
-#endif
-
asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
diff --git a/trunk/arch/x86/include/asm/system.h b/trunk/arch/x86/include/asm/system.h
index f08f97374892..022a84386de8 100644
--- a/trunk/arch/x86/include/asm/system.h
+++ b/trunk/arch/x86/include/asm/system.h
@@ -128,8 +128,6 @@ do { \
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
"movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
"call __switch_to\n\t" \
- ".globl thread_return\n" \
- "thread_return:\n\t" \
"movq "__percpu_arg([current_task])",%%rsi\n\t" \
__switch_canary \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
@@ -157,19 +155,22 @@ extern void native_load_gs_index(unsigned);
* Load a segment. Fall back on loading the zero
* segment if something goes wrong..
*/
-#define loadsegment(seg, value) \
- asm volatile("\n" \
- "1:\t" \
- "movl %k0,%%" #seg "\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3:\t" \
- "movl %k1, %%" #seg "\n\t" \
- "jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE(1b,3b) \
- : :"r" (value), "r" (0) : "memory")
-
+#define loadsegment(seg, value) \
+do { \
+ unsigned short __val = (value); \
+ \
+ asm volatile(" \n" \
+ "1: movl %k0,%%" #seg " \n" \
+ \
+ ".section .fixup,\"ax\" \n" \
+ "2: xorl %k0,%k0 \n" \
+ " jmp 1b \n" \
+ ".previous \n" \
+ \
+ _ASM_EXTABLE(1b, 2b) \
+ \
+ : "+r" (__val) : : "memory"); \
+} while (0)
/*
* Save a segment register away
diff --git a/trunk/arch/x86/include/asm/uaccess.h b/trunk/arch/x86/include/asm/uaccess.h
index d2c6c930b491..abd3e0ea762a 100644
--- a/trunk/arch/x86/include/asm/uaccess.h
+++ b/trunk/arch/x86/include/asm/uaccess.h
@@ -570,7 +570,6 @@ extern struct movsl_mask {
#ifdef CONFIG_X86_32
# include "uaccess_32.h"
#else
-# define ARCH_HAS_SEARCH_EXTABLE
# include "uaccess_64.h"
#endif
diff --git a/trunk/arch/x86/include/asm/uaccess_32.h b/trunk/arch/x86/include/asm/uaccess_32.h
index 632fb44b4cb5..0c9825e97f36 100644
--- a/trunk/arch/x86/include/asm/uaccess_32.h
+++ b/trunk/arch/x86/include/asm/uaccess_32.h
@@ -187,9 +187,34 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from,
unsigned long __must_check copy_to_user(void __user *to,
const void *from, unsigned long n);
-unsigned long __must_check copy_from_user(void *to,
+unsigned long __must_check _copy_from_user(void *to,
const void __user *from,
unsigned long n);
+
+
+extern void copy_from_user_overflow(void)
+#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
+ __compiletime_error("copy_from_user() buffer size is not provably correct")
+#else
+ __compiletime_warning("copy_from_user() buffer size is not provably correct")
+#endif
+;
+
+static inline unsigned long __must_check copy_from_user(void *to,
+ const void __user *from,
+ unsigned long n)
+{
+ int sz = __compiletime_object_size(to);
+ int ret = -EFAULT;
+
+ if (likely(sz == -1 || sz >= n))
+ ret = _copy_from_user(to, from, n);
+ else
+ copy_from_user_overflow();
+
+ return ret;
+}
+
long __must_check strncpy_from_user(char *dst, const char __user *src,
long count);
long __must_check __strncpy_from_user(char *dst,
diff --git a/trunk/arch/x86/include/asm/uaccess_64.h b/trunk/arch/x86/include/asm/uaccess_64.h
index db24b215fc50..46324c6a4f6e 100644
--- a/trunk/arch/x86/include/asm/uaccess_64.h
+++ b/trunk/arch/x86/include/asm/uaccess_64.h
@@ -19,12 +19,37 @@ __must_check unsigned long
copy_user_generic(void *to, const void *from, unsigned len);
__must_check unsigned long
-copy_to_user(void __user *to, const void *from, unsigned len);
+_copy_to_user(void __user *to, const void *from, unsigned len);
__must_check unsigned long
-copy_from_user(void *to, const void __user *from, unsigned len);
+_copy_from_user(void *to, const void __user *from, unsigned len);
__must_check unsigned long
copy_in_user(void __user *to, const void __user *from, unsigned len);
+static inline unsigned long __must_check copy_from_user(void *to,
+ const void __user *from,
+ unsigned long n)
+{
+ int sz = __compiletime_object_size(to);
+ int ret = -EFAULT;
+
+ might_fault();
+ if (likely(sz == -1 || sz >= n))
+ ret = _copy_from_user(to, from, n);
+#ifdef CONFIG_DEBUG_VM
+ else
+ WARN(1, "Buffer overflow detected!\n");
+#endif
+ return ret;
+}
+
+static __always_inline __must_check
+int copy_to_user(void __user *dst, const void *src, unsigned size)
+{
+ might_fault();
+
+ return _copy_to_user(dst, src, size);
+}
+
static __always_inline __must_check
int __copy_from_user(void *dst, const void __user *src, unsigned size)
{
@@ -176,8 +201,11 @@ __must_check long strlen_user(const char __user *str);
__must_check unsigned long clear_user(void __user *mem, unsigned long len);
__must_check unsigned long __clear_user(void __user *mem, unsigned long len);
-__must_check long __copy_from_user_inatomic(void *dst, const void __user *src,
- unsigned size);
+static __must_check __always_inline int
+__copy_from_user_inatomic(void *dst, const void __user *src, unsigned size)
+{
+ return copy_user_generic(dst, (__force const void *)src, size);
+}
static __must_check __always_inline int
__copy_to_user_inatomic(void __user *dst, const void *src, unsigned size)
diff --git a/trunk/arch/x86/include/asm/uv/uv_irq.h b/trunk/arch/x86/include/asm/uv/uv_irq.h
index 9613c8c0b647..d6b17c760622 100644
--- a/trunk/arch/x86/include/asm/uv/uv_irq.h
+++ b/trunk/arch/x86/include/asm/uv/uv_irq.h
@@ -25,12 +25,14 @@ struct uv_IO_APIC_route_entry {
dest : 32;
};
-extern struct irq_chip uv_irq_chip;
-
-extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long);
-extern void arch_disable_uv_irq(int, unsigned long);
+enum {
+ UV_AFFINITY_ALL,
+ UV_AFFINITY_NODE,
+ UV_AFFINITY_CPU
+};
-extern int uv_setup_irq(char *, int, int, unsigned long);
-extern void uv_teardown_irq(unsigned int, int, unsigned long);
+extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
+extern int uv_setup_irq(char *, int, int, unsigned long, int);
+extern void uv_teardown_irq(unsigned int);
#endif /* _ASM_X86_UV_UV_IRQ_H */
diff --git a/trunk/arch/x86/include/asm/x86_init.h b/trunk/arch/x86/include/asm/x86_init.h
index 2c756fd4ab0e..d8e71459f025 100644
--- a/trunk/arch/x86/include/asm/x86_init.h
+++ b/trunk/arch/x86/include/asm/x86_init.h
@@ -90,6 +90,14 @@ struct x86_init_timers {
void (*timer_init)(void);
};
+/**
+ * struct x86_init_iommu - platform specific iommu setup
+ * @iommu_init: platform specific iommu setup
+ */
+struct x86_init_iommu {
+ int (*iommu_init)(void);
+};
+
/**
* struct x86_init_ops - functions for platform specific setup
*
@@ -101,6 +109,7 @@ struct x86_init_ops {
struct x86_init_oem oem;
struct x86_init_paging paging;
struct x86_init_timers timers;
+ struct x86_init_iommu iommu;
};
/**
@@ -121,6 +130,7 @@ struct x86_platform_ops {
unsigned long (*calibrate_tsc)(void);
unsigned long (*get_wallclock)(void);
int (*set_wallclock)(unsigned long nowtime);
+ void (*iommu_shutdown)(void);
};
extern struct x86_init_ops x86_init;
diff --git a/trunk/arch/x86/kernel/Makefile b/trunk/arch/x86/kernel/Makefile
index d8e5d0cdd678..4f2e66e29ecc 100644
--- a/trunk/arch/x86/kernel/Makefile
+++ b/trunk/arch/x86/kernel/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
-obj-y += alternative.o i8253.o pci-nommu.o
+obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
obj-y += tsc.o io_delay.o rtc.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
diff --git a/trunk/arch/x86/kernel/acpi/processor.c b/trunk/arch/x86/kernel/acpi/processor.c
index d296f4a195c9..d85d1b2432ba 100644
--- a/trunk/arch/x86/kernel/acpi/processor.c
+++ b/trunk/arch/x86/kernel/acpi/processor.c
@@ -79,7 +79,8 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
struct cpuinfo_x86 *c = &cpu_data(pr->id);
pr->pdc = NULL;
- if (c->x86_vendor == X86_VENDOR_INTEL)
+ if (c->x86_vendor == X86_VENDOR_INTEL ||
+ c->x86_vendor == X86_VENDOR_CENTAUR)
init_intel_pdc(pr, c);
return;
diff --git a/trunk/arch/x86/kernel/amd_iommu.c b/trunk/arch/x86/kernel/amd_iommu.c
index 0285521e0a99..32fb09102a13 100644
--- a/trunk/arch/x86/kernel/amd_iommu.c
+++ b/trunk/arch/x86/kernel/amd_iommu.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
* Author: Joerg Roedel
* Leo Duran
*
@@ -28,6 +28,7 @@
#include
#include
#include
+#include
#include
#include
@@ -56,20 +57,115 @@ struct iommu_cmd {
u32 data[4];
};
-static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
- struct unity_map_entry *e);
-static struct dma_ops_domain *find_protection_domain(u16 devid);
-static u64 *alloc_pte(struct protection_domain *domain,
- unsigned long address, int end_lvl,
- u64 **pte_page, gfp_t gfp);
-static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
- unsigned long start_page,
- unsigned int pages);
static void reset_iommu_command_buffer(struct amd_iommu *iommu);
-static u64 *fetch_pte(struct protection_domain *domain,
- unsigned long address, int map_size);
static void update_domain(struct protection_domain *domain);
+/****************************************************************************
+ *
+ * Helper functions
+ *
+ ****************************************************************************/
+
+static inline u16 get_device_id(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ return calc_devid(pdev->bus->number, pdev->devfn);
+}
+
+static struct iommu_dev_data *get_dev_data(struct device *dev)
+{
+ return dev->archdata.iommu;
+}
+
+/*
+ * In this function the list of preallocated protection domains is traversed to
+ * find the domain for a specific device
+ */
+static struct dma_ops_domain *find_protection_domain(u16 devid)
+{
+ struct dma_ops_domain *entry, *ret = NULL;
+ unsigned long flags;
+ u16 alias = amd_iommu_alias_table[devid];
+
+ if (list_empty(&iommu_pd_list))
+ return NULL;
+
+ spin_lock_irqsave(&iommu_pd_list_lock, flags);
+
+ list_for_each_entry(entry, &iommu_pd_list, list) {
+ if (entry->target_dev == devid ||
+ entry->target_dev == alias) {
+ ret = entry;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
+
+ return ret;
+}
+
+/*
+ * This function checks if the driver got a valid device from the caller to
+ * avoid dereferencing invalid pointers.
+ */
+static bool check_device(struct device *dev)
+{
+ u16 devid;
+
+ if (!dev || !dev->dma_mask)
+ return false;
+
+ /* No device or no PCI device */
+ if (!dev || dev->bus != &pci_bus_type)
+ return false;
+
+ devid = get_device_id(dev);
+
+ /* Out of our scope? */
+ if (devid > amd_iommu_last_bdf)
+ return false;
+
+ if (amd_iommu_rlookup_table[devid] == NULL)
+ return false;
+
+ return true;
+}
+
+static int iommu_init_device(struct device *dev)
+{
+ struct iommu_dev_data *dev_data;
+ struct pci_dev *pdev;
+ u16 devid, alias;
+
+ if (dev->archdata.iommu)
+ return 0;
+
+ dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
+ if (!dev_data)
+ return -ENOMEM;
+
+ dev_data->dev = dev;
+
+ devid = get_device_id(dev);
+ alias = amd_iommu_alias_table[devid];
+ pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff);
+ if (pdev)
+ dev_data->alias = &pdev->dev;
+
+ atomic_set(&dev_data->bind, 0);
+
+ dev->archdata.iommu = dev_data;
+
+
+ return 0;
+}
+
+static void iommu_uninit_device(struct device *dev)
+{
+ kfree(dev->archdata.iommu);
+}
#ifdef CONFIG_AMD_IOMMU_STATS
/*
@@ -90,7 +186,6 @@ DECLARE_STATS_COUNTER(alloced_io_mem);
DECLARE_STATS_COUNTER(total_map_requests);
static struct dentry *stats_dir;
-static struct dentry *de_isolate;
static struct dentry *de_fflush;
static void amd_iommu_stats_add(struct __iommu_counter *cnt)
@@ -108,9 +203,6 @@ static void amd_iommu_stats_init(void)
if (stats_dir == NULL)
return;
- de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
- (u32 *)&amd_iommu_isolate);
-
de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
(u32 *)&amd_iommu_unmap_flush);
@@ -130,12 +222,6 @@ static void amd_iommu_stats_init(void)
#endif
-/* returns !0 if the IOMMU is caching non-present entries in its TLB */
-static int iommu_has_npcache(struct amd_iommu *iommu)
-{
- return iommu->cap & (1UL << IOMMU_CAP_NPCACHE);
-}
-
/****************************************************************************
*
* Interrupt handling functions
@@ -199,6 +285,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
break;
case EVENT_TYPE_ILL_CMD:
printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
+ iommu->reset_in_progress = true;
reset_iommu_command_buffer(iommu);
dump_command(address);
break;
@@ -321,11 +408,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu)
status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
- if (unlikely(i == EXIT_LOOP_COUNT)) {
- spin_unlock(&iommu->lock);
- reset_iommu_command_buffer(iommu);
- spin_lock(&iommu->lock);
- }
+ if (unlikely(i == EXIT_LOOP_COUNT))
+ iommu->reset_in_progress = true;
}
/*
@@ -372,26 +456,46 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
out:
spin_unlock_irqrestore(&iommu->lock, flags);
+ if (iommu->reset_in_progress)
+ reset_iommu_command_buffer(iommu);
+
return 0;
}
+static void iommu_flush_complete(struct protection_domain *domain)
+{
+ int i;
+
+ for (i = 0; i < amd_iommus_present; ++i) {
+ if (!domain->dev_iommu[i])
+ continue;
+
+ /*
+ * Devices of this domain are behind this IOMMU
+ * We need to wait for completion of all commands.
+ */
+ iommu_completion_wait(amd_iommus[i]);
+ }
+}
+
/*
* Command send function for invalidating a device table entry
*/
-static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
+static int iommu_flush_device(struct device *dev)
{
+ struct amd_iommu *iommu;
struct iommu_cmd cmd;
- int ret;
+ u16 devid;
- BUG_ON(iommu == NULL);
+ devid = get_device_id(dev);
+ iommu = amd_iommu_rlookup_table[devid];
+ /* Build command */
memset(&cmd, 0, sizeof(cmd));
CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
cmd.data[0] = devid;
- ret = iommu_queue_command(iommu, &cmd);
-
- return ret;
+ return iommu_queue_command(iommu, &cmd);
}
static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
@@ -430,11 +534,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
* It invalidates a single PTE if the range to flush is within a single
* page. Otherwise it flushes the whole TLB of the IOMMU.
*/
-static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
- u64 address, size_t size)
+static void __iommu_flush_pages(struct protection_domain *domain,
+ u64 address, size_t size, int pde)
{
- int s = 0;
- unsigned pages = iommu_num_pages(address, size, PAGE_SIZE);
+ int s = 0, i;
+ unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE);
address &= PAGE_MASK;
@@ -447,142 +551,212 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
s = 1;
}
- iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s);
- return 0;
+ for (i = 0; i < amd_iommus_present; ++i) {
+ if (!domain->dev_iommu[i])
+ continue;
+
+ /*
+ * Devices of this domain are behind this IOMMU
+ * We need a TLB flush
+ */
+ iommu_queue_inv_iommu_pages(amd_iommus[i], address,
+ domain->id, pde, s);
+ }
+
+ return;
}
-/* Flush the whole IO/TLB for a given protection domain */
-static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
+static void iommu_flush_pages(struct protection_domain *domain,
+ u64 address, size_t size)
{
- u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
-
- INC_STATS_COUNTER(domain_flush_single);
+ __iommu_flush_pages(domain, address, size, 0);
+}
- iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
+/* Flush the whole IO/TLB for a given protection domain */
+static void iommu_flush_tlb(struct protection_domain *domain)
+{
+ __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
}
/* Flush the whole IO/TLB for a given protection domain - including PDE */
-static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid)
+static void iommu_flush_tlb_pde(struct protection_domain *domain)
{
- u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
-
- INC_STATS_COUNTER(domain_flush_single);
-
- iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1);
+ __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
}
+
/*
- * This function flushes one domain on one IOMMU
+ * This function flushes the DTEs for all devices in domain
*/
-static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid)
+static void iommu_flush_domain_devices(struct protection_domain *domain)
{
- struct iommu_cmd cmd;
+ struct iommu_dev_data *dev_data;
unsigned long flags;
- __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
- domid, 1, 1);
+ spin_lock_irqsave(&domain->lock, flags);
- spin_lock_irqsave(&iommu->lock, flags);
- __iommu_queue_command(iommu, &cmd);
- __iommu_completion_wait(iommu);
- __iommu_wait_for_completion(iommu);
- spin_unlock_irqrestore(&iommu->lock, flags);
+ list_for_each_entry(dev_data, &domain->dev_list, list)
+ iommu_flush_device(dev_data->dev);
+
+ spin_unlock_irqrestore(&domain->lock, flags);
}
-static void flush_all_domains_on_iommu(struct amd_iommu *iommu)
+static void iommu_flush_all_domain_devices(void)
{
- int i;
+ struct protection_domain *domain;
+ unsigned long flags;
- for (i = 1; i < MAX_DOMAIN_ID; ++i) {
- if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
- continue;
- flush_domain_on_iommu(iommu, i);
+ spin_lock_irqsave(&amd_iommu_pd_lock, flags);
+
+ list_for_each_entry(domain, &amd_iommu_pd_list, list) {
+ iommu_flush_domain_devices(domain);
+ iommu_flush_complete(domain);
}
+ spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
+}
+
+void amd_iommu_flush_all_devices(void)
+{
+ iommu_flush_all_domain_devices();
}
/*
- * This function is used to flush the IO/TLB for a given protection domain
- * on every IOMMU in the system
+ * This function uses heavy locking and may disable irqs for some time. But
+ * this is no issue because it is only called during resume.
*/
-static void iommu_flush_domain(u16 domid)
+void amd_iommu_flush_all_domains(void)
{
- struct amd_iommu *iommu;
+ struct protection_domain *domain;
+ unsigned long flags;
- INC_STATS_COUNTER(domain_flush_all);
+ spin_lock_irqsave(&amd_iommu_pd_lock, flags);
- for_each_iommu(iommu)
- flush_domain_on_iommu(iommu, domid);
+ list_for_each_entry(domain, &amd_iommu_pd_list, list) {
+ spin_lock(&domain->lock);
+ iommu_flush_tlb_pde(domain);
+ iommu_flush_complete(domain);
+ spin_unlock(&domain->lock);
+ }
+
+ spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
}
-void amd_iommu_flush_all_domains(void)
+static void reset_iommu_command_buffer(struct amd_iommu *iommu)
{
- struct amd_iommu *iommu;
+ pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
- for_each_iommu(iommu)
- flush_all_domains_on_iommu(iommu);
+ if (iommu->reset_in_progress)
+ panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
+
+ amd_iommu_reset_cmd_buffer(iommu);
+ amd_iommu_flush_all_devices();
+ amd_iommu_flush_all_domains();
+
+ iommu->reset_in_progress = false;
}
-static void flush_all_devices_for_iommu(struct amd_iommu *iommu)
+/****************************************************************************
+ *
+ * The functions below are used the create the page table mappings for
+ * unity mapped regions.
+ *
+ ****************************************************************************/
+
+/*
+ * This function is used to add another level to an IO page table. Adding
+ * another level increases the size of the address space by 9 bits to a size up
+ * to 64 bits.
+ */
+static bool increase_address_space(struct protection_domain *domain,
+ gfp_t gfp)
{
- int i;
+ u64 *pte;
- for (i = 0; i <= amd_iommu_last_bdf; ++i) {
- if (iommu != amd_iommu_rlookup_table[i])
- continue;
+ if (domain->mode == PAGE_MODE_6_LEVEL)
+ /* address space already 64 bit large */
+ return false;
- iommu_queue_inv_dev_entry(iommu, i);
- iommu_completion_wait(iommu);
- }
+ pte = (void *)get_zeroed_page(gfp);
+ if (!pte)
+ return false;
+
+ *pte = PM_LEVEL_PDE(domain->mode,
+ virt_to_phys(domain->pt_root));
+ domain->pt_root = pte;
+ domain->mode += 1;
+ domain->updated = true;
+
+ return true;
}
-static void flush_devices_by_domain(struct protection_domain *domain)
+static u64 *alloc_pte(struct protection_domain *domain,
+ unsigned long address,
+ int end_lvl,
+ u64 **pte_page,
+ gfp_t gfp)
{
- struct amd_iommu *iommu;
- int i;
+ u64 *pte, *page;
+ int level;
- for (i = 0; i <= amd_iommu_last_bdf; ++i) {
- if ((domain == NULL && amd_iommu_pd_table[i] == NULL) ||
- (amd_iommu_pd_table[i] != domain))
- continue;
+ while (address > PM_LEVEL_SIZE(domain->mode))
+ increase_address_space(domain, gfp);
- iommu = amd_iommu_rlookup_table[i];
- if (!iommu)
- continue;
+ level = domain->mode - 1;
+ pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
+
+ while (level > end_lvl) {
+ if (!IOMMU_PTE_PRESENT(*pte)) {
+ page = (u64 *)get_zeroed_page(gfp);
+ if (!page)
+ return NULL;
+ *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
+ }
+
+ level -= 1;
- iommu_queue_inv_dev_entry(iommu, i);
- iommu_completion_wait(iommu);
+ pte = IOMMU_PTE_PAGE(*pte);
+
+ if (pte_page && level == end_lvl)
+ *pte_page = pte;
+
+ pte = &pte[PM_LEVEL_INDEX(level, address)];
}
+
+ return pte;
}
-static void reset_iommu_command_buffer(struct amd_iommu *iommu)
+/*
+ * This function checks if there is a PTE for a given dma address. If
+ * there is one, it returns the pointer to it.
+ */
+static u64 *fetch_pte(struct protection_domain *domain,
+ unsigned long address, int map_size)
{
- pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
+ int level;
+ u64 *pte;
- if (iommu->reset_in_progress)
- panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
+ level = domain->mode - 1;
+ pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
- iommu->reset_in_progress = true;
+ while (level > map_size) {
+ if (!IOMMU_PTE_PRESENT(*pte))
+ return NULL;
- amd_iommu_reset_cmd_buffer(iommu);
- flush_all_devices_for_iommu(iommu);
- flush_all_domains_on_iommu(iommu);
+ level -= 1;
- iommu->reset_in_progress = false;
-}
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[PM_LEVEL_INDEX(level, address)];
-void amd_iommu_flush_all_devices(void)
-{
- flush_devices_by_domain(NULL);
-}
+ if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) {
+ pte = NULL;
+ break;
+ }
+ }
-/****************************************************************************
- *
- * The functions below are used the create the page table mappings for
- * unity mapped regions.
- *
- ****************************************************************************/
+ return pte;
+}
/*
* Generic mapping functions. It maps a physical address into a DMA
@@ -653,28 +827,6 @@ static int iommu_for_unity_map(struct amd_iommu *iommu,
return 0;
}
-/*
- * Init the unity mappings for a specific IOMMU in the system
- *
- * Basically iterates over all unity mapping entries and applies them to
- * the default domain DMA of that IOMMU if necessary.
- */
-static int iommu_init_unity_mappings(struct amd_iommu *iommu)
-{
- struct unity_map_entry *entry;
- int ret;
-
- list_for_each_entry(entry, &amd_iommu_unity_map, list) {
- if (!iommu_for_unity_map(iommu, entry))
- continue;
- ret = dma_ops_unity_map(iommu->default_dom, entry);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
/*
* This function actually applies the mapping to the page table of the
* dma_ops domain.
@@ -703,6 +855,28 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
return 0;
}
+/*
+ * Init the unity mappings for a specific IOMMU in the system
+ *
+ * Basically iterates over all unity mapping entries and applies them to
+ * the default domain DMA of that IOMMU if necessary.
+ */
+static int iommu_init_unity_mappings(struct amd_iommu *iommu)
+{
+ struct unity_map_entry *entry;
+ int ret;
+
+ list_for_each_entry(entry, &amd_iommu_unity_map, list) {
+ if (!iommu_for_unity_map(iommu, entry))
+ continue;
+ ret = dma_ops_unity_map(iommu->default_dom, entry);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
/*
* Inits the unity mappings required for a specific device
*/
@@ -740,34 +914,23 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
*/
/*
- * This function checks if there is a PTE for a given dma address. If
- * there is one, it returns the pointer to it.
+ * Used to reserve address ranges in the aperture (e.g. for exclusion
+ * ranges.
*/
-static u64 *fetch_pte(struct protection_domain *domain,
- unsigned long address, int map_size)
+static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
+ unsigned long start_page,
+ unsigned int pages)
{
- int level;
- u64 *pte;
-
- level = domain->mode - 1;
- pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
-
- while (level > map_size) {
- if (!IOMMU_PTE_PRESENT(*pte))
- return NULL;
-
- level -= 1;
+ unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
- pte = IOMMU_PTE_PAGE(*pte);
- pte = &pte[PM_LEVEL_INDEX(level, address)];
+ if (start_page + pages > last_page)
+ pages = last_page - start_page;
- if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) {
- pte = NULL;
- break;
- }
+ for (i = start_page; i < start_page + pages; ++i) {
+ int index = i / APERTURE_RANGE_PAGES;
+ int page = i % APERTURE_RANGE_PAGES;
+ __set_bit(page, dom->aperture[index]->bitmap);
}
-
- return pte;
}
/*
@@ -775,11 +938,11 @@ static u64 *fetch_pte(struct protection_domain *domain,
* aperture in case of dma_ops domain allocation or address allocation
* failure.
*/
-static int alloc_new_range(struct amd_iommu *iommu,
- struct dma_ops_domain *dma_dom,
+static int alloc_new_range(struct dma_ops_domain *dma_dom,
bool populate, gfp_t gfp)
{
int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
+ struct amd_iommu *iommu;
int i;
#ifdef CONFIG_IOMMU_STRESS
@@ -819,14 +982,17 @@ static int alloc_new_range(struct amd_iommu *iommu,
dma_dom->aperture_size += APERTURE_RANGE_SIZE;
/* Intialize the exclusion range if necessary */
- if (iommu->exclusion_start &&
- iommu->exclusion_start >= dma_dom->aperture[index]->offset &&
- iommu->exclusion_start < dma_dom->aperture_size) {
- unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
- int pages = iommu_num_pages(iommu->exclusion_start,
- iommu->exclusion_length,
- PAGE_SIZE);
- dma_ops_reserve_addresses(dma_dom, startpage, pages);
+ for_each_iommu(iommu) {
+ if (iommu->exclusion_start &&
+ iommu->exclusion_start >= dma_dom->aperture[index]->offset
+ && iommu->exclusion_start < dma_dom->aperture_size) {
+ unsigned long startpage;
+ int pages = iommu_num_pages(iommu->exclusion_start,
+ iommu->exclusion_length,
+ PAGE_SIZE);
+ startpage = iommu->exclusion_start >> PAGE_SHIFT;
+ dma_ops_reserve_addresses(dma_dom, startpage, pages);
+ }
}
/*
@@ -928,7 +1094,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
}
if (unlikely(address == -1))
- address = bad_dma_address;
+ address = DMA_ERROR_CODE;
WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
@@ -973,6 +1139,31 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
*
****************************************************************************/
+/*
+ * This function adds a protection domain to the global protection domain list
+ */
+static void add_domain_to_list(struct protection_domain *domain)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&amd_iommu_pd_lock, flags);
+ list_add(&domain->list, &amd_iommu_pd_list);
+ spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
+}
+
+/*
+ * This function removes a protection domain to the global
+ * protection domain list
+ */
+static void del_domain_from_list(struct protection_domain *domain)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&amd_iommu_pd_lock, flags);
+ list_del(&domain->list);
+ spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
+}
+
static u16 domain_id_alloc(void)
{
unsigned long flags;
@@ -1000,26 +1191,6 @@ static void domain_id_free(int id)
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
-/*
- * Used to reserve address ranges in the aperture (e.g. for exclusion
- * ranges.
- */
-static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
- unsigned long start_page,
- unsigned int pages)
-{
- unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
-
- if (start_page + pages > last_page)
- pages = last_page - start_page;
-
- for (i = start_page; i < start_page + pages; ++i) {
- int index = i / APERTURE_RANGE_PAGES;
- int page = i % APERTURE_RANGE_PAGES;
- __set_bit(page, dom->aperture[index]->bitmap);
- }
-}
-
static void free_pagetable(struct protection_domain *domain)
{
int i, j;
@@ -1061,6 +1232,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
if (!dom)
return;
+ del_domain_from_list(&dom->domain);
+
free_pagetable(&dom->domain);
for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
@@ -1078,7 +1251,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
* It also intializes the page table and the address allocator data
* structures required for the dma_ops interface
*/
-static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
+static struct dma_ops_domain *dma_ops_domain_alloc(void)
{
struct dma_ops_domain *dma_dom;
@@ -1091,6 +1264,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
dma_dom->domain.id = domain_id_alloc();
if (dma_dom->domain.id == 0)
goto free_dma_dom;
+ INIT_LIST_HEAD(&dma_dom->domain.dev_list);
dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
dma_dom->domain.flags = PD_DMA_OPS_MASK;
@@ -1101,7 +1275,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
dma_dom->need_flush = false;
dma_dom->target_dev = 0xffff;
- if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL))
+ add_domain_to_list(&dma_dom->domain);
+
+ if (alloc_new_range(dma_dom, true, GFP_KERNEL))
goto free_dma_dom;
/*
@@ -1129,22 +1305,6 @@ static bool dma_ops_domain(struct protection_domain *domain)
return domain->flags & PD_DMA_OPS_MASK;
}
-/*
- * Find out the protection domain structure for a given PCI device. This
- * will give us the pointer to the page table root for example.
- */
-static struct protection_domain *domain_for_device(u16 devid)
-{
- struct protection_domain *dom;
- unsigned long flags;
-
- read_lock_irqsave(&amd_iommu_devtable_lock, flags);
- dom = amd_iommu_pd_table[devid];
- read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
-
- return dom;
-}
-
static void set_dte_entry(u16 devid, struct protection_domain *domain)
{
u64 pte_root = virt_to_phys(domain->pt_root);
@@ -1156,42 +1316,123 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain)
amd_iommu_dev_table[devid].data[2] = domain->id;
amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
+}
+
+static void clear_dte_entry(u16 devid)
+{
+ /* remove entry from the device table seen by the hardware */
+ amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
+ amd_iommu_dev_table[devid].data[1] = 0;
+ amd_iommu_dev_table[devid].data[2] = 0;
+
+ amd_iommu_apply_erratum_63(devid);
+}
+
+static void do_attach(struct device *dev, struct protection_domain *domain)
+{
+ struct iommu_dev_data *dev_data;
+ struct amd_iommu *iommu;
+ u16 devid;
- amd_iommu_pd_table[devid] = domain;
+ devid = get_device_id(dev);
+ iommu = amd_iommu_rlookup_table[devid];
+ dev_data = get_dev_data(dev);
+
+ /* Update data structures */
+ dev_data->domain = domain;
+ list_add(&dev_data->list, &domain->dev_list);
+ set_dte_entry(devid, domain);
+
+ /* Do reference counting */
+ domain->dev_iommu[iommu->index] += 1;
+ domain->dev_cnt += 1;
+
+ /* Flush the DTE entry */
+ iommu_flush_device(dev);
+}
+
+static void do_detach(struct device *dev)
+{
+ struct iommu_dev_data *dev_data;
+ struct amd_iommu *iommu;
+ u16 devid;
+
+ devid = get_device_id(dev);
+ iommu = amd_iommu_rlookup_table[devid];
+ dev_data = get_dev_data(dev);
+
+ /* decrease reference counters */
+ dev_data->domain->dev_iommu[iommu->index] -= 1;
+ dev_data->domain->dev_cnt -= 1;
+
+ /* Update data structures */
+ dev_data->domain = NULL;
+ list_del(&dev_data->list);
+ clear_dte_entry(devid);
+
+ /* Flush the DTE entry */
+ iommu_flush_device(dev);
}
/*
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
*/
-static void __attach_device(struct amd_iommu *iommu,
- struct protection_domain *domain,
- u16 devid)
+static int __attach_device(struct device *dev,
+ struct protection_domain *domain)
{
+ struct iommu_dev_data *dev_data, *alias_data;
+
+ dev_data = get_dev_data(dev);
+ alias_data = get_dev_data(dev_data->alias);
+
+ if (!alias_data)
+ return -EINVAL;
+
/* lock domain */
spin_lock(&domain->lock);
- /* update DTE entry */
- set_dte_entry(devid, domain);
+ /* Some sanity checks */
+ if (alias_data->domain != NULL &&
+ alias_data->domain != domain)
+ return -EBUSY;
+
+ if (dev_data->domain != NULL &&
+ dev_data->domain != domain)
+ return -EBUSY;
+
+ /* Do real assignment */
+ if (dev_data->alias != dev) {
+ alias_data = get_dev_data(dev_data->alias);
+ if (alias_data->domain == NULL)
+ do_attach(dev_data->alias, domain);
+
+ atomic_inc(&alias_data->bind);
+ }
+
+ if (dev_data->domain == NULL)
+ do_attach(dev, domain);
- domain->dev_cnt += 1;
+ atomic_inc(&dev_data->bind);
/* ready */
spin_unlock(&domain->lock);
+
+ return 0;
}
/*
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
*/
-static void attach_device(struct amd_iommu *iommu,
- struct protection_domain *domain,
- u16 devid)
+static int attach_device(struct device *dev,
+ struct protection_domain *domain)
{
unsigned long flags;
+ int ret;
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
- __attach_device(iommu, domain, devid);
+ ret = __attach_device(dev, domain);
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
/*
@@ -1199,98 +1440,125 @@ static void attach_device(struct amd_iommu *iommu,
* left the caches in the IOMMU dirty. So we have to flush
* here to evict all dirty stuff.
*/
- iommu_queue_inv_dev_entry(iommu, devid);
- iommu_flush_tlb_pde(iommu, domain->id);
+ iommu_flush_tlb_pde(domain);
+
+ return ret;
}
/*
* Removes a device from a protection domain (unlocked)
*/
-static void __detach_device(struct protection_domain *domain, u16 devid)
+static void __detach_device(struct device *dev)
{
+ struct iommu_dev_data *dev_data = get_dev_data(dev);
+ struct iommu_dev_data *alias_data;
+ unsigned long flags;
- /* lock domain */
- spin_lock(&domain->lock);
-
- /* remove domain from the lookup table */
- amd_iommu_pd_table[devid] = NULL;
+ BUG_ON(!dev_data->domain);
- /* remove entry from the device table seen by the hardware */
- amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
- amd_iommu_dev_table[devid].data[1] = 0;
- amd_iommu_dev_table[devid].data[2] = 0;
+ spin_lock_irqsave(&dev_data->domain->lock, flags);
- amd_iommu_apply_erratum_63(devid);
+ if (dev_data->alias != dev) {
+ alias_data = get_dev_data(dev_data->alias);
+ if (atomic_dec_and_test(&alias_data->bind))
+ do_detach(dev_data->alias);
+ }
- /* decrease reference counter */
- domain->dev_cnt -= 1;
+ if (atomic_dec_and_test(&dev_data->bind))
+ do_detach(dev);
- /* ready */
- spin_unlock(&domain->lock);
+ spin_unlock_irqrestore(&dev_data->domain->lock, flags);
/*
* If we run in passthrough mode the device must be assigned to the
* passthrough domain if it is detached from any other domain
*/
- if (iommu_pass_through) {
- struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
- __attach_device(iommu, pt_domain, devid);
- }
+ if (iommu_pass_through && dev_data->domain == NULL)
+ __attach_device(dev, pt_domain);
}
/*
* Removes a device from a protection domain (with devtable_lock held)
*/
-static void detach_device(struct protection_domain *domain, u16 devid)
+static void detach_device(struct device *dev)
{
unsigned long flags;
/* lock device table */
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
- __detach_device(domain, devid);
+ __detach_device(dev);
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
+/*
+ * Find out the protection domain structure for a given PCI device. This
+ * will give us the pointer to the page table root for example.
+ */
+static struct protection_domain *domain_for_device(struct device *dev)
+{
+ struct protection_domain *dom;
+ struct iommu_dev_data *dev_data, *alias_data;
+ unsigned long flags;
+ u16 devid, alias;
+
+ devid = get_device_id(dev);
+ alias = amd_iommu_alias_table[devid];
+ dev_data = get_dev_data(dev);
+ alias_data = get_dev_data(dev_data->alias);
+ if (!alias_data)
+ return NULL;
+
+ read_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ dom = dev_data->domain;
+ if (dom == NULL &&
+ alias_data->domain != NULL) {
+ __attach_device(dev, alias_data->domain);
+ dom = alias_data->domain;
+ }
+
+ read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+
+ return dom;
+}
+
static int device_change_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;
- struct pci_dev *pdev = to_pci_dev(dev);
- u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
+ u16 devid;
struct protection_domain *domain;
struct dma_ops_domain *dma_domain;
struct amd_iommu *iommu;
unsigned long flags;
- if (devid > amd_iommu_last_bdf)
- goto out;
-
- devid = amd_iommu_alias_table[devid];
-
- iommu = amd_iommu_rlookup_table[devid];
- if (iommu == NULL)
- goto out;
-
- domain = domain_for_device(devid);
+ if (!check_device(dev))
+ return 0;
- if (domain && !dma_ops_domain(domain))
- WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
- "to a non-dma-ops domain\n", dev_name(dev));
+ devid = get_device_id(dev);
+ iommu = amd_iommu_rlookup_table[devid];
switch (action) {
case BUS_NOTIFY_UNBOUND_DRIVER:
+
+ domain = domain_for_device(dev);
+
if (!domain)
goto out;
if (iommu_pass_through)
break;
- detach_device(domain, devid);
+ detach_device(dev);
break;
case BUS_NOTIFY_ADD_DEVICE:
+
+ iommu_init_device(dev);
+
+ domain = domain_for_device(dev);
+
/* allocate a protection domain if a device is added */
dma_domain = find_protection_domain(devid);
if (dma_domain)
goto out;
- dma_domain = dma_ops_domain_alloc(iommu);
+ dma_domain = dma_ops_domain_alloc();
if (!dma_domain)
goto out;
dma_domain->target_dev = devid;
@@ -1300,11 +1568,15 @@ static int device_change_notifier(struct notifier_block *nb,
spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
break;
+ case BUS_NOTIFY_DEL_DEVICE:
+
+ iommu_uninit_device(dev);
+
default:
goto out;
}
- iommu_queue_inv_dev_entry(iommu, devid);
+ iommu_flush_device(dev);
iommu_completion_wait(iommu);
out:
@@ -1321,44 +1593,6 @@ static struct notifier_block device_nb = {
*
*****************************************************************************/
-/*
- * This function checks if the driver got a valid device from the caller to
- * avoid dereferencing invalid pointers.
- */
-static bool check_device(struct device *dev)
-{
- if (!dev || !dev->dma_mask)
- return false;
-
- return true;
-}
-
-/*
- * In this function the list of preallocated protection domains is traversed to
- * find the domain for a specific device
- */
-static struct dma_ops_domain *find_protection_domain(u16 devid)
-{
- struct dma_ops_domain *entry, *ret = NULL;
- unsigned long flags;
-
- if (list_empty(&iommu_pd_list))
- return NULL;
-
- spin_lock_irqsave(&iommu_pd_list_lock, flags);
-
- list_for_each_entry(entry, &iommu_pd_list, list) {
- if (entry->target_dev == devid) {
- ret = entry;
- break;
- }
- }
-
- spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
-
- return ret;
-}
-
/*
* In the dma_ops path we only have the struct device. This function
* finds the corresponding IOMMU, the protection domain and the
@@ -1366,62 +1600,40 @@ static struct dma_ops_domain *find_protection_domain(u16 devid)
* If the device is not yet associated with a domain this is also done
* in this function.
*/
-static int get_device_resources(struct device *dev,
- struct amd_iommu **iommu,
- struct protection_domain **domain,
- u16 *bdf)
+static struct protection_domain *get_domain(struct device *dev)
{
+ struct protection_domain *domain;
struct dma_ops_domain *dma_dom;
- struct pci_dev *pcidev;
- u16 _bdf;
-
- *iommu = NULL;
- *domain = NULL;
- *bdf = 0xffff;
-
- if (dev->bus != &pci_bus_type)
- return 0;
-
- pcidev = to_pci_dev(dev);
- _bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
+ u16 devid = get_device_id(dev);
- /* device not translated by any IOMMU in the system? */
- if (_bdf > amd_iommu_last_bdf)
- return 0;
+ if (!check_device(dev))
+ return ERR_PTR(-EINVAL);
- *bdf = amd_iommu_alias_table[_bdf];
+ domain = domain_for_device(dev);
+ if (domain != NULL && !dma_ops_domain(domain))
+ return ERR_PTR(-EBUSY);
- *iommu = amd_iommu_rlookup_table[*bdf];
- if (*iommu == NULL)
- return 0;
- *domain = domain_for_device(*bdf);
- if (*domain == NULL) {
- dma_dom = find_protection_domain(*bdf);
- if (!dma_dom)
- dma_dom = (*iommu)->default_dom;
- *domain = &dma_dom->domain;
- attach_device(*iommu, *domain, *bdf);
- DUMP_printk("Using protection domain %d for device %s\n",
- (*domain)->id, dev_name(dev));
- }
+ if (domain != NULL)
+ return domain;
- if (domain_for_device(_bdf) == NULL)
- attach_device(*iommu, *domain, _bdf);
+ /* Device not bount yet - bind it */
+ dma_dom = find_protection_domain(devid);
+ if (!dma_dom)
+ dma_dom = amd_iommu_rlookup_table[devid]->default_dom;
+ attach_device(dev, &dma_dom->domain);
+ DUMP_printk("Using protection domain %d for device %s\n",
+ dma_dom->domain.id, dev_name(dev));
- return 1;
+ return &dma_dom->domain;
}
static void update_device_table(struct protection_domain *domain)
{
- unsigned long flags;
- int i;
+ struct iommu_dev_data *dev_data;
- for (i = 0; i <= amd_iommu_last_bdf; ++i) {
- if (amd_iommu_pd_table[i] != domain)
- continue;
- write_lock_irqsave(&amd_iommu_devtable_lock, flags);
- set_dte_entry(i, domain);
- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ list_for_each_entry(dev_data, &domain->dev_list, list) {
+ u16 devid = get_device_id(dev_data->dev);
+ set_dte_entry(devid, domain);
}
}
@@ -1431,75 +1643,12 @@ static void update_domain(struct protection_domain *domain)
return;
update_device_table(domain);
- flush_devices_by_domain(domain);
- iommu_flush_domain(domain->id);
+ iommu_flush_domain_devices(domain);
+ iommu_flush_tlb_pde(domain);
domain->updated = false;
}
-/*
- * This function is used to add another level to an IO page table. Adding
- * another level increases the size of the address space by 9 bits to a size up
- * to 64 bits.
- */
-static bool increase_address_space(struct protection_domain *domain,
- gfp_t gfp)
-{
- u64 *pte;
-
- if (domain->mode == PAGE_MODE_6_LEVEL)
- /* address space already 64 bit large */
- return false;
-
- pte = (void *)get_zeroed_page(gfp);
- if (!pte)
- return false;
-
- *pte = PM_LEVEL_PDE(domain->mode,
- virt_to_phys(domain->pt_root));
- domain->pt_root = pte;
- domain->mode += 1;
- domain->updated = true;
-
- return true;
-}
-
-static u64 *alloc_pte(struct protection_domain *domain,
- unsigned long address,
- int end_lvl,
- u64 **pte_page,
- gfp_t gfp)
-{
- u64 *pte, *page;
- int level;
-
- while (address > PM_LEVEL_SIZE(domain->mode))
- increase_address_space(domain, gfp);
-
- level = domain->mode - 1;
- pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
-
- while (level > end_lvl) {
- if (!IOMMU_PTE_PRESENT(*pte)) {
- page = (u64 *)get_zeroed_page(gfp);
- if (!page)
- return NULL;
- *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
- }
-
- level -= 1;
-
- pte = IOMMU_PTE_PAGE(*pte);
-
- if (pte_page && level == end_lvl)
- *pte_page = pte;
-
- pte = &pte[PM_LEVEL_INDEX(level, address)];
- }
-
- return pte;
-}
-
/*
* This function fetches the PTE for a given address in the aperture
*/
@@ -1530,8 +1679,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
* This is the generic map function. It maps one 4kb page at paddr to
* the given address in the DMA address space for the domain.
*/
-static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
- struct dma_ops_domain *dom,
+static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
unsigned long address,
phys_addr_t paddr,
int direction)
@@ -1544,7 +1692,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
pte = dma_ops_get_pte(dom, address);
if (!pte)
- return bad_dma_address;
+ return DMA_ERROR_CODE;
__pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
@@ -1565,8 +1713,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
/*
* The generic unmapping function for on page in the DMA address space.
*/
-static void dma_ops_domain_unmap(struct amd_iommu *iommu,
- struct dma_ops_domain *dom,
+static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
unsigned long address)
{
struct aperture_range *aperture;
@@ -1597,7 +1744,6 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
* Must be called with the domain lock held.
*/
static dma_addr_t __map_single(struct device *dev,
- struct amd_iommu *iommu,
struct dma_ops_domain *dma_dom,
phys_addr_t paddr,
size_t size,
@@ -1625,7 +1771,7 @@ static dma_addr_t __map_single(struct device *dev,
retry:
address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
dma_mask);
- if (unlikely(address == bad_dma_address)) {
+ if (unlikely(address == DMA_ERROR_CODE)) {
/*
* setting next_address here will let the address
* allocator only scan the new allocated range in the
@@ -1633,7 +1779,7 @@ static dma_addr_t __map_single(struct device *dev,
*/
dma_dom->next_address = dma_dom->aperture_size;
- if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC))
+ if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
goto out;
/*
@@ -1645,8 +1791,8 @@ static dma_addr_t __map_single(struct device *dev,
start = address;
for (i = 0; i < pages; ++i) {
- ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
- if (ret == bad_dma_address)
+ ret = dma_ops_domain_map(dma_dom, start, paddr, dir);
+ if (ret == DMA_ERROR_CODE)
goto out_unmap;
paddr += PAGE_SIZE;
@@ -1657,10 +1803,10 @@ static dma_addr_t __map_single(struct device *dev,
ADD_STATS_COUNTER(alloced_io_mem, size);
if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
- iommu_flush_tlb(iommu, dma_dom->domain.id);
+ iommu_flush_tlb(&dma_dom->domain);
dma_dom->need_flush = false;
- } else if (unlikely(iommu_has_npcache(iommu)))
- iommu_flush_pages(iommu, dma_dom->domain.id, address, size);
+ } else if (unlikely(amd_iommu_np_cache))
+ iommu_flush_pages(&dma_dom->domain, address, size);
out:
return address;
@@ -1669,20 +1815,19 @@ static dma_addr_t __map_single(struct device *dev,
for (--i; i >= 0; --i) {
start -= PAGE_SIZE;
- dma_ops_domain_unmap(iommu, dma_dom, start);
+ dma_ops_domain_unmap(dma_dom, start);
}
dma_ops_free_addresses(dma_dom, address, pages);
- return bad_dma_address;
+ return DMA_ERROR_CODE;
}
/*
* Does the reverse of the __map_single function. Must be called with
* the domain lock held too
*/
-static void __unmap_single(struct amd_iommu *iommu,
- struct dma_ops_domain *dma_dom,
+static void __unmap_single(struct dma_ops_domain *dma_dom,
dma_addr_t dma_addr,
size_t size,
int dir)
@@ -1690,7 +1835,7 @@ static void __unmap_single(struct amd_iommu *iommu,
dma_addr_t i, start;
unsigned int pages;
- if ((dma_addr == bad_dma_address) ||
+ if ((dma_addr == DMA_ERROR_CODE) ||
(dma_addr + size > dma_dom->aperture_size))
return;
@@ -1699,7 +1844,7 @@ static void __unmap_single(struct amd_iommu *iommu,
start = dma_addr;
for (i = 0; i < pages; ++i) {
- dma_ops_domain_unmap(iommu, dma_dom, start);
+ dma_ops_domain_unmap(dma_dom, start);
start += PAGE_SIZE;
}
@@ -1708,7 +1853,7 @@ static void __unmap_single(struct amd_iommu *iommu,
dma_ops_free_addresses(dma_dom, dma_addr, pages);
if (amd_iommu_unmap_flush || dma_dom->need_flush) {
- iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size);
+ iommu_flush_pages(&dma_dom->domain, dma_addr, size);
dma_dom->need_flush = false;
}
}
@@ -1722,36 +1867,29 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
struct dma_attrs *attrs)
{
unsigned long flags;
- struct amd_iommu *iommu;
struct protection_domain *domain;
- u16 devid;
dma_addr_t addr;
u64 dma_mask;
phys_addr_t paddr = page_to_phys(page) + offset;
INC_STATS_COUNTER(cnt_map_single);
- if (!check_device(dev))
- return bad_dma_address;
-
- dma_mask = *dev->dma_mask;
-
- get_device_resources(dev, &iommu, &domain, &devid);
-
- if (iommu == NULL || domain == NULL)
- /* device not handled by any AMD IOMMU */
+ domain = get_domain(dev);
+ if (PTR_ERR(domain) == -EINVAL)
return (dma_addr_t)paddr;
+ else if (IS_ERR(domain))
+ return DMA_ERROR_CODE;
- if (!dma_ops_domain(domain))
- return bad_dma_address;
+ dma_mask = *dev->dma_mask;
spin_lock_irqsave(&domain->lock, flags);
- addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
+
+ addr = __map_single(dev, domain->priv, paddr, size, dir, false,
dma_mask);
- if (addr == bad_dma_address)
+ if (addr == DMA_ERROR_CODE)
goto out;
- iommu_completion_wait(iommu);
+ iommu_flush_complete(domain);
out:
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1766,25 +1904,19 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
enum dma_data_direction dir, struct dma_attrs *attrs)
{
unsigned long flags;
- struct amd_iommu *iommu;
struct protection_domain *domain;
- u16 devid;
INC_STATS_COUNTER(cnt_unmap_single);
- if (!check_device(dev) ||
- !get_device_resources(dev, &iommu, &domain, &devid))
- /* device not handled by any AMD IOMMU */
- return;
-
- if (!dma_ops_domain(domain))
+ domain = get_domain(dev);
+ if (IS_ERR(domain))
return;
spin_lock_irqsave(&domain->lock, flags);
- __unmap_single(iommu, domain->priv, dma_addr, size, dir);
+ __unmap_single(domain->priv, dma_addr, size, dir);
- iommu_completion_wait(iommu);
+ iommu_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -1816,9 +1948,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
struct dma_attrs *attrs)
{
unsigned long flags;
- struct amd_iommu *iommu;
struct protection_domain *domain;
- u16 devid;
int i;
struct scatterlist *s;
phys_addr_t paddr;
@@ -1827,25 +1957,20 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
INC_STATS_COUNTER(cnt_map_sg);
- if (!check_device(dev))
+ domain = get_domain(dev);
+ if (PTR_ERR(domain) == -EINVAL)
+ return map_sg_no_iommu(dev, sglist, nelems, dir);
+ else if (IS_ERR(domain))
return 0;
dma_mask = *dev->dma_mask;
- get_device_resources(dev, &iommu, &domain, &devid);
-
- if (!iommu || !domain)
- return map_sg_no_iommu(dev, sglist, nelems, dir);
-
- if (!dma_ops_domain(domain))
- return 0;
-
spin_lock_irqsave(&domain->lock, flags);
for_each_sg(sglist, s, nelems, i) {
paddr = sg_phys(s);
- s->dma_address = __map_single(dev, iommu, domain->priv,
+ s->dma_address = __map_single(dev, domain->priv,
paddr, s->length, dir, false,
dma_mask);
@@ -1856,7 +1981,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
goto unmap;
}
- iommu_completion_wait(iommu);
+ iommu_flush_complete(domain);
out:
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1865,7 +1990,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
unmap:
for_each_sg(sglist, s, mapped_elems, i) {
if (s->dma_address)
- __unmap_single(iommu, domain->priv, s->dma_address,
+ __unmap_single(domain->priv, s->dma_address,
s->dma_length, dir);
s->dma_address = s->dma_length = 0;
}
@@ -1884,30 +2009,25 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
struct dma_attrs *attrs)
{
unsigned long flags;
- struct amd_iommu *iommu;
struct protection_domain *domain;
struct scatterlist *s;
- u16 devid;
int i;
INC_STATS_COUNTER(cnt_unmap_sg);
- if (!check_device(dev) ||
- !get_device_resources(dev, &iommu, &domain, &devid))
- return;
-
- if (!dma_ops_domain(domain))
+ domain = get_domain(dev);
+ if (IS_ERR(domain))
return;
spin_lock_irqsave(&domain->lock, flags);
for_each_sg(sglist, s, nelems, i) {
- __unmap_single(iommu, domain->priv, s->dma_address,
+ __unmap_single(domain->priv, s->dma_address,
s->dma_length, dir);
s->dma_address = s->dma_length = 0;
}
- iommu_completion_wait(iommu);
+ iommu_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -1920,49 +2040,44 @@ static void *alloc_coherent(struct device *dev, size_t size,
{
unsigned long flags;
void *virt_addr;
- struct amd_iommu *iommu;
struct protection_domain *domain;
- u16 devid;
phys_addr_t paddr;
u64 dma_mask = dev->coherent_dma_mask;
INC_STATS_COUNTER(cnt_alloc_coherent);
- if (!check_device(dev))
+ domain = get_domain(dev);
+ if (PTR_ERR(domain) == -EINVAL) {
+ virt_addr = (void *)__get_free_pages(flag, get_order(size));
+ *dma_addr = __pa(virt_addr);
+ return virt_addr;
+ } else if (IS_ERR(domain))
return NULL;
- if (!get_device_resources(dev, &iommu, &domain, &devid))
- flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+ dma_mask = dev->coherent_dma_mask;
+ flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+ flag |= __GFP_ZERO;
- flag |= __GFP_ZERO;
virt_addr = (void *)__get_free_pages(flag, get_order(size));
if (!virt_addr)
return NULL;
paddr = virt_to_phys(virt_addr);
- if (!iommu || !domain) {
- *dma_addr = (dma_addr_t)paddr;
- return virt_addr;
- }
-
- if (!dma_ops_domain(domain))
- goto out_free;
-
if (!dma_mask)
dma_mask = *dev->dma_mask;
spin_lock_irqsave(&domain->lock, flags);
- *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
+ *dma_addr = __map_single(dev, domain->priv, paddr,
size, DMA_BIDIRECTIONAL, true, dma_mask);
- if (*dma_addr == bad_dma_address) {
+ if (*dma_addr == DMA_ERROR_CODE) {
spin_unlock_irqrestore(&domain->lock, flags);
goto out_free;
}
- iommu_completion_wait(iommu);
+ iommu_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1982,28 +2097,19 @@ static void free_coherent(struct device *dev, size_t size,
void *virt_addr, dma_addr_t dma_addr)
{
unsigned long flags;
- struct amd_iommu *iommu;
struct protection_domain *domain;
- u16 devid;
INC_STATS_COUNTER(cnt_free_coherent);
- if (!check_device(dev))
- return;
-
- get_device_resources(dev, &iommu, &domain, &devid);
-
- if (!iommu || !domain)
- goto free_mem;
-
- if (!dma_ops_domain(domain))
+ domain = get_domain(dev);
+ if (IS_ERR(domain))
goto free_mem;
spin_lock_irqsave(&domain->lock, flags);
- __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
+ __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
- iommu_completion_wait(iommu);
+ iommu_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
@@ -2017,22 +2123,7 @@ static void free_coherent(struct device *dev, size_t size,
*/
static int amd_iommu_dma_supported(struct device *dev, u64 mask)
{
- u16 bdf;
- struct pci_dev *pcidev;
-
- /* No device or no PCI device */
- if (!dev || dev->bus != &pci_bus_type)
- return 0;
-
- pcidev = to_pci_dev(dev);
-
- bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
-
- /* Out of our scope? */
- if (bdf > amd_iommu_last_bdf)
- return 0;
-
- return 1;
+ return check_device(dev);
}
/*
@@ -2046,25 +2137,30 @@ static void prealloc_protection_domains(void)
{
struct pci_dev *dev = NULL;
struct dma_ops_domain *dma_dom;
- struct amd_iommu *iommu;
u16 devid;
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
- devid = calc_devid(dev->bus->number, dev->devfn);
- if (devid > amd_iommu_last_bdf)
- continue;
- devid = amd_iommu_alias_table[devid];
- if (domain_for_device(devid))
+
+ /* Do we handle this device? */
+ if (!check_device(&dev->dev))
continue;
- iommu = amd_iommu_rlookup_table[devid];
- if (!iommu)
+
+ iommu_init_device(&dev->dev);
+
+ /* Is there already any domain for it? */
+ if (domain_for_device(&dev->dev))
continue;
- dma_dom = dma_ops_domain_alloc(iommu);
+
+ devid = get_device_id(&dev->dev);
+
+ dma_dom = dma_ops_domain_alloc();
if (!dma_dom)
continue;
init_unity_mappings_for_device(dma_dom, devid);
dma_dom->target_dev = devid;
+ attach_device(&dev->dev, &dma_dom->domain);
+
list_add_tail(&dma_dom->list, &iommu_pd_list);
}
}
@@ -2093,7 +2189,7 @@ int __init amd_iommu_init_dma_ops(void)
* protection domain will be assigned to the default one.
*/
for_each_iommu(iommu) {
- iommu->default_dom = dma_ops_domain_alloc(iommu);
+ iommu->default_dom = dma_ops_domain_alloc();
if (iommu->default_dom == NULL)
return -ENOMEM;
iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
@@ -2103,15 +2199,12 @@ int __init amd_iommu_init_dma_ops(void)
}
/*
- * If device isolation is enabled, pre-allocate the protection
- * domains for each device.
+ * Pre-allocate the protection domains for each device.
*/
- if (amd_iommu_isolate)
- prealloc_protection_domains();
+ prealloc_protection_domains();
iommu_detected = 1;
- force_iommu = 1;
- bad_dma_address = 0;
+ swiotlb = 0;
#ifdef CONFIG_GART_IOMMU
gart_iommu_aperture_disabled = 1;
gart_iommu_aperture = 0;
@@ -2150,14 +2243,17 @@ int __init amd_iommu_init_dma_ops(void)
static void cleanup_domain(struct protection_domain *domain)
{
+ struct iommu_dev_data *dev_data, *next;
unsigned long flags;
- u16 devid;
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
- for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
- if (amd_iommu_pd_table[devid] == domain)
- __detach_device(domain, devid);
+ list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {
+ struct device *dev = dev_data->dev;
+
+ do_detach(dev);
+ atomic_set(&dev_data->bind, 0);
+ }
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
@@ -2167,6 +2263,8 @@ static void protection_domain_free(struct protection_domain *domain)
if (!domain)
return;
+ del_domain_from_list(domain);
+
if (domain->id)
domain_id_free(domain->id);
@@ -2185,6 +2283,9 @@ static struct protection_domain *protection_domain_alloc(void)
domain->id = domain_id_alloc();
if (!domain->id)
goto out_err;
+ INIT_LIST_HEAD(&domain->dev_list);
+
+ add_domain_to_list(domain);
return domain;
@@ -2241,26 +2342,23 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
static void amd_iommu_detach_device(struct iommu_domain *dom,
struct device *dev)
{
- struct protection_domain *domain = dom->priv;
+ struct iommu_dev_data *dev_data = dev->archdata.iommu;
struct amd_iommu *iommu;
- struct pci_dev *pdev;
u16 devid;
- if (dev->bus != &pci_bus_type)
+ if (!check_device(dev))
return;
- pdev = to_pci_dev(dev);
-
- devid = calc_devid(pdev->bus->number, pdev->devfn);
+ devid = get_device_id(dev);
- if (devid > 0)
- detach_device(domain, devid);
+ if (dev_data->domain != NULL)
+ detach_device(dev);
iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
return;
- iommu_queue_inv_dev_entry(iommu, devid);
+ iommu_flush_device(dev);
iommu_completion_wait(iommu);
}
@@ -2268,35 +2366,30 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
struct device *dev)
{
struct protection_domain *domain = dom->priv;
- struct protection_domain *old_domain;
+ struct iommu_dev_data *dev_data;
struct amd_iommu *iommu;
- struct pci_dev *pdev;
+ int ret;
u16 devid;
- if (dev->bus != &pci_bus_type)
+ if (!check_device(dev))
return -EINVAL;
- pdev = to_pci_dev(dev);
-
- devid = calc_devid(pdev->bus->number, pdev->devfn);
+ dev_data = dev->archdata.iommu;
- if (devid >= amd_iommu_last_bdf ||
- devid != amd_iommu_alias_table[devid])
- return -EINVAL;
+ devid = get_device_id(dev);
iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
return -EINVAL;
- old_domain = domain_for_device(devid);
- if (old_domain)
- detach_device(old_domain, devid);
+ if (dev_data->domain)
+ detach_device(dev);
- attach_device(iommu, domain, devid);
+ ret = attach_device(dev, domain);
iommu_completion_wait(iommu);
- return 0;
+ return ret;
}
static int amd_iommu_map_range(struct iommu_domain *dom,
@@ -2342,7 +2435,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,
iova += PAGE_SIZE;
}
- iommu_flush_domain(domain->id);
+ iommu_flush_tlb_pde(domain);
}
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
@@ -2393,8 +2486,9 @@ static struct iommu_ops amd_iommu_ops = {
int __init amd_iommu_init_passthrough(void)
{
+ struct amd_iommu *iommu;
struct pci_dev *dev = NULL;
- u16 devid, devid2;
+ u16 devid;
/* allocate passthroug domain */
pt_domain = protection_domain_alloc();
@@ -2404,20 +2498,17 @@ int __init amd_iommu_init_passthrough(void)
pt_domain->mode |= PAGE_MODE_NONE;
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
- struct amd_iommu *iommu;
- devid = calc_devid(dev->bus->number, dev->devfn);
- if (devid > amd_iommu_last_bdf)
+ if (!check_device(&dev->dev))
continue;
- devid2 = amd_iommu_alias_table[devid];
+ devid = get_device_id(&dev->dev);
- iommu = amd_iommu_rlookup_table[devid2];
+ iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
continue;
- __attach_device(iommu, pt_domain, devid);
- __attach_device(iommu, pt_domain, devid2);
+ attach_device(&dev->dev, pt_domain);
}
pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
diff --git a/trunk/arch/x86/kernel/amd_iommu_init.c b/trunk/arch/x86/kernel/amd_iommu_init.c
index c20001e4f556..7ffc39965233 100644
--- a/trunk/arch/x86/kernel/amd_iommu_init.c
+++ b/trunk/arch/x86/kernel/amd_iommu_init.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
* Author: Joerg Roedel
* Leo Duran
*
@@ -25,10 +25,12 @@
#include
#include
#include
+#include
#include
#include
#include
#include
+#include
/*
* definitions for the ACPI scanning code
@@ -123,18 +125,24 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
to handle */
LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
we find in ACPI */
-#ifdef CONFIG_IOMMU_STRESS
-bool amd_iommu_isolate = false;
-#else
-bool amd_iommu_isolate = true; /* if true, device isolation is
- enabled */
-#endif
-
bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
system */
+/* Array to assign indices to IOMMUs*/
+struct amd_iommu *amd_iommus[MAX_IOMMUS];
+int amd_iommus_present;
+
+/* IOMMUs have a non-present cache? */
+bool amd_iommu_np_cache __read_mostly;
+
+/*
+ * List of protection domains - used during resume
+ */
+LIST_HEAD(amd_iommu_pd_list);
+spinlock_t amd_iommu_pd_lock;
+
/*
* Pointer to the device table which is shared by all AMD IOMMUs
* it is indexed by the PCI device id or the HT unit id and contains
@@ -156,12 +164,6 @@ u16 *amd_iommu_alias_table;
*/
struct amd_iommu **amd_iommu_rlookup_table;
-/*
- * The pd table (protection domain table) is used to find the protection domain
- * data structure a device belongs to. Indexed with the PCI device id too.
- */
-struct protection_domain **amd_iommu_pd_table;
-
/*
* AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
* to know which ones are already in use.
@@ -838,7 +840,18 @@ static void __init free_iommu_all(void)
static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
{
spin_lock_init(&iommu->lock);
+
+ /* Add IOMMU to internal data structures */
list_add_tail(&iommu->list, &amd_iommu_list);
+ iommu->index = amd_iommus_present++;
+
+ if (unlikely(iommu->index >= MAX_IOMMUS)) {
+ WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
+ return -ENOSYS;
+ }
+
+ /* Index is fine - add IOMMU to the array */
+ amd_iommus[iommu->index] = iommu;
/*
* Copy data from ACPI table entry to the iommu struct
@@ -868,6 +881,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
init_iommu_from_acpi(iommu, h);
init_iommu_devices(iommu);
+ if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
+ amd_iommu_np_cache = true;
+
return pci_enable_device(iommu->dev);
}
@@ -925,7 +941,7 @@ static int __init init_iommu_all(struct acpi_table_header *table)
*
****************************************************************************/
-static int __init iommu_setup_msi(struct amd_iommu *iommu)
+static int iommu_setup_msi(struct amd_iommu *iommu)
{
int r;
@@ -1176,19 +1192,10 @@ static struct sys_device device_amd_iommu = {
* functions. Finally it prints some information about AMD IOMMUs and
* the driver state and enables the hardware.
*/
-int __init amd_iommu_init(void)
+static int __init amd_iommu_init(void)
{
int i, ret = 0;
-
- if (no_iommu) {
- printk(KERN_INFO "AMD-Vi disabled by kernel command line\n");
- return 0;
- }
-
- if (!amd_iommu_detected)
- return -ENODEV;
-
/*
* First parse ACPI tables to find the largest Bus/Dev/Func
* we need to handle. Upon this information the shared data
@@ -1225,15 +1232,6 @@ int __init amd_iommu_init(void)
if (amd_iommu_rlookup_table == NULL)
goto free;
- /*
- * Protection Domain table - maps devices to protection domains
- * This table has the same size as the rlookup_table
- */
- amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(rlookup_table_size));
- if (amd_iommu_pd_table == NULL)
- goto free;
-
amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
GFP_KERNEL | __GFP_ZERO,
get_order(MAX_DOMAIN_ID/8));
@@ -1255,6 +1253,8 @@ int __init amd_iommu_init(void)
*/
amd_iommu_pd_alloc_bitmap[0] = 1;
+ spin_lock_init(&amd_iommu_pd_lock);
+
/*
* now the data structures are allocated and basically initialized
* start the real acpi table scan
@@ -1286,17 +1286,12 @@ int __init amd_iommu_init(void)
if (iommu_pass_through)
goto out;
- printk(KERN_INFO "AMD-Vi: device isolation ");
- if (amd_iommu_isolate)
- printk("enabled\n");
- else
- printk("disabled\n");
-
if (amd_iommu_unmap_flush)
printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n");
else
printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
+ x86_platform.iommu_shutdown = disable_iommus;
out:
return ret;
@@ -1304,9 +1299,6 @@ int __init amd_iommu_init(void)
free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
get_order(MAX_DOMAIN_ID/8));
- free_pages((unsigned long)amd_iommu_pd_table,
- get_order(rlookup_table_size));
-
free_pages((unsigned long)amd_iommu_rlookup_table,
get_order(rlookup_table_size));
@@ -1323,11 +1315,6 @@ int __init amd_iommu_init(void)
goto out;
}
-void amd_iommu_shutdown(void)
-{
- disable_iommus();
-}
-
/****************************************************************************
*
* Early detect code. This code runs at IOMMU detection time in the DMA
@@ -1342,16 +1329,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table)
void __init amd_iommu_detect(void)
{
- if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture))
+ if (no_iommu || (iommu_detected && !gart_iommu_aperture))
return;
if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
iommu_detected = 1;
amd_iommu_detected = 1;
-#ifdef CONFIG_GART_IOMMU
- gart_iommu_aperture_disabled = 1;
- gart_iommu_aperture = 0;
-#endif
+ x86_init.iommu.iommu_init = amd_iommu_init;
}
}
@@ -1372,10 +1356,6 @@ static int __init parse_amd_iommu_dump(char *str)
static int __init parse_amd_iommu_options(char *str)
{
for (; *str; ++str) {
- if (strncmp(str, "isolate", 7) == 0)
- amd_iommu_isolate = true;
- if (strncmp(str, "share", 5) == 0)
- amd_iommu_isolate = false;
if (strncmp(str, "fullflush", 9) == 0)
amd_iommu_unmap_flush = true;
}
diff --git a/trunk/arch/x86/kernel/aperture_64.c b/trunk/arch/x86/kernel/aperture_64.c
index 128111d8ffe0..e0dfb6856aa2 100644
--- a/trunk/arch/x86/kernel/aperture_64.c
+++ b/trunk/arch/x86/kernel/aperture_64.c
@@ -28,6 +28,7 @@
#include
#include
#include
+#include
int gart_iommu_aperture;
int gart_iommu_aperture_disabled __initdata;
@@ -400,6 +401,7 @@ void __init gart_iommu_hole_init(void)
iommu_detected = 1;
gart_iommu_aperture = 1;
+ x86_init.iommu.iommu_init = gart_iommu_init;
aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7;
aper_size = (32 * 1024 * 1024) << aper_order;
@@ -456,7 +458,7 @@ void __init gart_iommu_hole_init(void)
if (aper_alloc) {
/* Got the aperture from the AGP bridge */
- } else if (swiotlb && !valid_agp) {
+ } else if (!valid_agp) {
/* Do nothing */
} else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
force_iommu ||
diff --git a/trunk/arch/x86/kernel/apic/Makefile b/trunk/arch/x86/kernel/apic/Makefile
index da7b7b9f8bd8..565c1bfc507d 100644
--- a/trunk/arch/x86/kernel/apic/Makefile
+++ b/trunk/arch/x86/kernel/apic/Makefile
@@ -2,7 +2,7 @@
# Makefile for local APIC drivers and for the IO-APIC code
#
-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o
+obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_SMP) += ipi.o
diff --git a/trunk/arch/x86/kernel/apic/apic.c b/trunk/arch/x86/kernel/apic/apic.c
index 894aa97f0717..ad8c75b9e453 100644
--- a/trunk/arch/x86/kernel/apic/apic.c
+++ b/trunk/arch/x86/kernel/apic/apic.c
@@ -241,28 +241,13 @@ static int modern_apic(void)
}
/*
- * bare function to substitute write operation
- * and it's _that_ fast :)
- */
-static void native_apic_write_dummy(u32 reg, u32 v)
-{
- WARN_ON_ONCE((cpu_has_apic || !disable_apic));
-}
-
-static u32 native_apic_read_dummy(u32 reg)
-{
- WARN_ON_ONCE((cpu_has_apic && !disable_apic));
- return 0;
-}
-
-/*
- * right after this call apic->write/read doesn't do anything
- * note that there is no restore operation it works one way
+ * right after this call apic become NOOP driven
+ * so apic->write/read doesn't do anything
*/
void apic_disable(void)
{
- apic->read = native_apic_read_dummy;
- apic->write = native_apic_write_dummy;
+ pr_info("APIC: switched to apic NOOP\n");
+ apic = &apic_noop;
}
void native_apic_wait_icr_idle(void)
@@ -459,7 +444,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
v = apic_read(APIC_LVTT);
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
apic_write(APIC_LVTT, v);
- apic_write(APIC_TMICT, 0xffffffff);
+ apic_write(APIC_TMICT, 0);
break;
case CLOCK_EVT_MODE_RESUME:
/* Nothing to do here */
@@ -1392,14 +1377,11 @@ void __init enable_IR_x2apic(void)
unsigned long flags;
struct IO_APIC_route_entry **ioapic_entries = NULL;
int ret, x2apic_enabled = 0;
- int dmar_table_init_ret = 0;
+ int dmar_table_init_ret;
-#ifdef CONFIG_INTR_REMAP
dmar_table_init_ret = dmar_table_init();
- if (dmar_table_init_ret)
- pr_debug("dmar_table_init() failed with %d:\n",
- dmar_table_init_ret);
-#endif
+ if (dmar_table_init_ret && !x2apic_supported())
+ return;
ioapic_entries = alloc_ioapic_entries();
if (!ioapic_entries) {
diff --git a/trunk/arch/x86/kernel/apic/apic_noop.c b/trunk/arch/x86/kernel/apic/apic_noop.c
new file mode 100644
index 000000000000..d9acc3bee0f4
--- /dev/null
+++ b/trunk/arch/x86/kernel/apic/apic_noop.c
@@ -0,0 +1,200 @@
+/*
+ * NOOP APIC driver.
+ *
+ * Does almost nothing and should be substituted by a real apic driver via
+ * probe routine.
+ *
+ * Though in case if apic is disabled (for some reason) we try
+ * to not uglify the caller's code and allow to call (some) apic routines
+ * like self-ipi, etc...
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+#include
+#include
+
+static void noop_init_apic_ldr(void) { }
+static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { }
+static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { }
+static void noop_send_IPI_allbutself(int vector) { }
+static void noop_send_IPI_all(int vector) { }
+static void noop_send_IPI_self(int vector) { }
+static void noop_apic_wait_icr_idle(void) { }
+static void noop_apic_icr_write(u32 low, u32 id) { }
+
+static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip)
+{
+ return -1;
+}
+
+static u32 noop_safe_apic_wait_icr_idle(void)
+{
+ return 0;
+}
+
+static u64 noop_apic_icr_read(void)
+{
+ return 0;
+}
+
+static int noop_cpu_to_logical_apicid(int cpu)
+{
+ return 0;
+}
+
+static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+ return 0;
+}
+
+static unsigned int noop_get_apic_id(unsigned long x)
+{
+ return 0;
+}
+
+static int noop_probe(void)
+{
+ /*
+ * NOOP apic should not ever be
+ * enabled via probe routine
+ */
+ return 0;
+}
+
+static int noop_apic_id_registered(void)
+{
+ /*
+ * if we would be really "pedantic"
+ * we should pass read_apic_id() here
+ * but since NOOP suppose APIC ID = 0
+ * lets save a few cycles
+ */
+ return physid_isset(0, phys_cpu_present_map);
+}
+
+static const struct cpumask *noop_target_cpus(void)
+{
+ /* only BSP here */
+ return cpumask_of(0);
+}
+
+static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid)
+{
+ return physid_isset(apicid, *map);
+}
+
+static unsigned long noop_check_apicid_present(int bit)
+{
+ return physid_isset(bit, phys_cpu_present_map);
+}
+
+static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ if (cpu != 0)
+ pr_warning("APIC: Vector allocated for non-BSP cpu\n");
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
+}
+
+int noop_apicid_to_node(int logical_apicid)
+{
+ /* we're always on node 0 */
+ return 0;
+}
+
+static u32 noop_apic_read(u32 reg)
+{
+ WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+ return 0;
+}
+
+static void noop_apic_write(u32 reg, u32 v)
+{
+ WARN_ON_ONCE((cpu_has_apic || !disable_apic));
+}
+
+struct apic apic_noop = {
+ .name = "noop",
+ .probe = noop_probe,
+ .acpi_madt_oem_check = NULL,
+
+ .apic_id_registered = noop_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ /* logical delivery broadcast to all CPUs: */
+ .irq_dest_mode = 1,
+
+ .target_cpus = noop_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = noop_check_apicid_used,
+ .check_apicid_present = noop_check_apicid_present,
+
+ .vector_allocation_domain = noop_vector_allocation_domain,
+ .init_apic_ldr = noop_init_apic_ldr,
+
+ .ioapic_phys_id_map = default_ioapic_phys_id_map,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+ .apicid_to_node = noop_apicid_to_node,
+
+ .cpu_to_logical_apicid = noop_cpu_to_logical_apicid,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = physid_set_mask_of_physid,
+
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+
+ .phys_pkg_id = noop_phys_pkg_id,
+
+ .mps_oem_check = NULL,
+
+ .get_apic_id = noop_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0x0F << 24,
+
+ .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = noop_send_IPI_mask,
+ .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = noop_send_IPI_allbutself,
+ .send_IPI_all = noop_send_IPI_all,
+ .send_IPI_self = noop_send_IPI_self,
+
+ .wakeup_secondary_cpu = noop_wakeup_secondary_cpu,
+
+ /* should be safe */
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+
+ .wait_for_init_deassert = NULL,
+
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = NULL,
+
+ .read = noop_apic_read,
+ .write = noop_apic_write,
+ .icr_read = noop_apic_icr_read,
+ .icr_write = noop_apic_icr_write,
+ .wait_icr_idle = noop_apic_wait_icr_idle,
+ .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle,
+};
diff --git a/trunk/arch/x86/kernel/apic/bigsmp_32.c b/trunk/arch/x86/kernel/apic/bigsmp_32.c
index 77a06413b6b2..38dcecfa5818 100644
--- a/trunk/arch/x86/kernel/apic/bigsmp_32.c
+++ b/trunk/arch/x86/kernel/apic/bigsmp_32.c
@@ -35,7 +35,7 @@ static const struct cpumask *bigsmp_target_cpus(void)
#endif
}
-static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
{
return 0;
}
@@ -93,11 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu)
return BAD_APICID;
}
-static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
-{
- return physid_mask_of_physid(phys_apicid);
-}
-
/* Mapping from cpu number to logical apicid */
static inline int bigsmp_cpu_to_logical_apicid(int cpu)
{
@@ -106,10 +101,10 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu)
return cpu_physical_id(cpu);
}
-static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
+static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
- return physids_promote(0xFFL);
+ physids_promote(0xFFL, retmap);
}
static int bigsmp_check_phys_apicid_present(int phys_apicid)
@@ -230,7 +225,7 @@ struct apic apic_bigsmp = {
.apicid_to_node = bigsmp_apicid_to_node,
.cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
.cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
- .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present,
+ .apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
.check_phys_apicid_present = bigsmp_check_phys_apicid_present,
.enable_apic_mode = NULL,
diff --git a/trunk/arch/x86/kernel/apic/es7000_32.c b/trunk/arch/x86/kernel/apic/es7000_32.c
index 89174f847b49..e85f8fb7f8e7 100644
--- a/trunk/arch/x86/kernel/apic/es7000_32.c
+++ b/trunk/arch/x86/kernel/apic/es7000_32.c
@@ -466,11 +466,11 @@ static const struct cpumask *es7000_target_cpus(void)
return cpumask_of(smp_processor_id());
}
-static unsigned long
-es7000_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid)
{
return 0;
}
+
static unsigned long es7000_check_apicid_present(int bit)
{
return physid_isset(bit, phys_cpu_present_map);
@@ -539,14 +539,10 @@ static int es7000_cpu_present_to_apicid(int mps_cpu)
static int cpu_id;
-static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid)
+static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
{
- physid_mask_t mask;
-
- mask = physid_mask_of_physid(cpu_id);
+ physid_set_mask_of_physid(cpu_id, retmap);
++cpu_id;
-
- return mask;
}
/* Mapping from cpu number to logical apicid */
@@ -561,10 +557,10 @@ static int es7000_cpu_to_logical_apicid(int cpu)
#endif
}
-static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map)
+static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
- return physids_promote(0xff);
+ physids_promote(0xFFL, retmap);
}
static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
diff --git a/trunk/arch/x86/kernel/apic/io_apic.c b/trunk/arch/x86/kernel/apic/io_apic.c
index dc69f28489f5..c0b4468683f9 100644
--- a/trunk/arch/x86/kernel/apic/io_apic.c
+++ b/trunk/arch/x86/kernel/apic/io_apic.c
@@ -60,8 +60,6 @@
#include
#include
#include
-#include
-#include
#include
@@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
return pin;
}
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * Most irqs are mapped 1:1 with pins.
- */
-struct irq_cfg {
- struct irq_pin_list *irq_2_pin;
- cpumask_var_t domain;
- cpumask_var_t old_domain;
- unsigned move_cleanup_count;
- u8 vector;
- u8 move_in_progress : 1;
-};
-
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
#ifdef CONFIG_SPARSE_IRQ
static struct irq_cfg irq_cfgx[] = {
@@ -209,7 +193,7 @@ int __init arch_early_irq_init(void)
}
#ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
{
struct irq_cfg *cfg = NULL;
struct irq_desc *desc;
@@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
/* end for move_irq_desc */
#else
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
{
return irq < nr_irqs ? irq_cfgx + irq : NULL;
}
@@ -555,23 +539,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
add_pin_to_irq_node(cfg, node, newapic, newpin);
}
+static void __io_apic_modify_irq(struct irq_pin_list *entry,
+ int mask_and, int mask_or,
+ void (*final)(struct irq_pin_list *entry))
+{
+ unsigned int reg, pin;
+
+ pin = entry->pin;
+ reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+ reg &= mask_and;
+ reg |= mask_or;
+ io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+ if (final)
+ final(entry);
+}
+
static void io_apic_modify_irq(struct irq_cfg *cfg,
int mask_and, int mask_or,
void (*final)(struct irq_pin_list *entry))
{
- int pin;
struct irq_pin_list *entry;
- for_each_irq_pin(entry, cfg->irq_2_pin) {
- unsigned int reg;
- pin = entry->pin;
- reg = io_apic_read(entry->apic, 0x10 + pin * 2);
- reg &= mask_and;
- reg |= mask_or;
- io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
- if (final)
- final(entry);
- }
+ for_each_irq_pin(entry, cfg->irq_2_pin)
+ __io_apic_modify_irq(entry, mask_and, mask_or, final);
+}
+
+static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
+{
+ __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+ IO_APIC_REDIR_MASKED, NULL);
+}
+
+static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
+{
+ __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
+ IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
}
static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
@@ -595,18 +597,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
}
-static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
-{
- io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
- IO_APIC_REDIR_MASKED, NULL);
-}
-
-static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
-{
- io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
- IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
-}
-
static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
{
struct irq_cfg *cfg = desc->chip_data;
@@ -1177,7 +1167,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
int cpu, err;
cpumask_var_t tmp_mask;
- if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+ if (cfg->move_in_progress)
return -EBUSY;
if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
@@ -1237,8 +1227,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
return err;
}
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
{
int err;
unsigned long flags;
@@ -1599,9 +1588,6 @@ __apicdebuginit(void) print_IO_APIC(void)
struct irq_desc *desc;
unsigned int irq;
- if (apic_verbosity == APIC_QUIET)
- return;
-
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
for (i = 0; i < nr_ioapics; i++)
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
@@ -1708,9 +1694,6 @@ __apicdebuginit(void) print_APIC_field(int base)
{
int i;
- if (apic_verbosity == APIC_QUIET)
- return;
-
printk(KERN_DEBUG);
for (i = 0; i < 8; i++)
@@ -1724,9 +1707,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
unsigned int i, v, ver, maxlvt;
u64 icr;
- if (apic_verbosity == APIC_QUIET)
- return;
-
printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
smp_processor_id(), hard_smp_processor_id());
v = apic_read(APIC_ID);
@@ -1824,13 +1804,19 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
printk("\n");
}
-__apicdebuginit(void) print_all_local_APICs(void)
+__apicdebuginit(void) print_local_APICs(int maxcpu)
{
int cpu;
+ if (!maxcpu)
+ return;
+
preempt_disable();
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
+ if (cpu >= maxcpu)
+ break;
smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+ }
preempt_enable();
}
@@ -1839,7 +1825,7 @@ __apicdebuginit(void) print_PIC(void)
unsigned int v;
unsigned long flags;
- if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs)
+ if (!nr_legacy_irqs)
return;
printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1866,21 +1852,41 @@ __apicdebuginit(void) print_PIC(void)
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
}
-__apicdebuginit(int) print_all_ICs(void)
+static int __initdata show_lapic = 1;
+static __init int setup_show_lapic(char *arg)
{
+ int num = -1;
+
+ if (strcmp(arg, "all") == 0) {
+ show_lapic = CONFIG_NR_CPUS;
+ } else {
+ get_option(&arg, &num);
+ if (num >= 0)
+ show_lapic = num;
+ }
+
+ return 1;
+}
+__setup("show_lapic=", setup_show_lapic);
+
+__apicdebuginit(int) print_ICs(void)
+{
+ if (apic_verbosity == APIC_QUIET)
+ return 0;
+
print_PIC();
/* don't print out if apic is not there */
if (!cpu_has_apic && !apic_from_smp_config())
return 0;
- print_all_local_APICs();
+ print_local_APICs(show_lapic);
print_IO_APIC();
return 0;
}
-fs_initcall(print_all_ICs);
+fs_initcall(print_ICs);
/* Where if anywhere is the i8259 connect in external int mode */
@@ -2031,7 +2037,7 @@ void __init setup_ioapic_ids_from_mpc(void)
* This is broken; anything with a real cpu count has to
* circumvent this idiocy regardless.
*/
- phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
+ apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
/*
* Set the IOAPIC ID to the value stored in the MPC table.
@@ -2058,7 +2064,7 @@ void __init setup_ioapic_ids_from_mpc(void)
* system must have a unique ID or we get lots of nice
* 'stuck on smp_invalidate_needed IPI wait' messages.
*/
- if (apic->check_apicid_used(phys_id_present_map,
+ if (apic->check_apicid_used(&phys_id_present_map,
mp_ioapics[apic_id].apicid)) {
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
apic_id, mp_ioapics[apic_id].apicid);
@@ -2073,7 +2079,7 @@ void __init setup_ioapic_ids_from_mpc(void)
mp_ioapics[apic_id].apicid = i;
} else {
physid_mask_t tmp;
- tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
+ apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp);
apic_printk(APIC_VERBOSE, "Setting %d in the "
"phys_id_present_map\n",
mp_ioapics[apic_id].apicid);
@@ -2228,20 +2234,16 @@ static int ioapic_retrigger_irq(unsigned int irq)
*/
#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
+void send_cleanup_vector(struct irq_cfg *cfg)
{
cpumask_var_t cleanup_mask;
if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
unsigned int i;
- cfg->move_cleanup_count = 0;
- for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
- cfg->move_cleanup_count++;
for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
} else {
cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
- cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
free_cpumask_var(cleanup_mask);
}
@@ -2272,15 +2274,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
}
}
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
/*
* Either sets desc->affinity to a valid value, and returns
* ->cpu_mask_to_apicid of that, or returns BAD_APICID and
* leaves desc->affinity untouched.
*/
-static unsigned int
+unsigned int
set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
@@ -2433,8 +2432,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
cfg = irq_cfg(irq);
spin_lock(&desc->lock);
- if (!cfg->move_cleanup_count)
- goto unlock;
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
goto unlock;
@@ -2452,7 +2449,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
goto unlock;
}
__get_cpu_var(vector_irq)[vector] = -1;
- cfg->move_cleanup_count--;
unlock:
spin_unlock(&desc->lock);
}
@@ -2460,21 +2456,33 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
irq_exit();
}
-static void irq_complete_move(struct irq_desc **descp)
+static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
{
struct irq_desc *desc = *descp;
struct irq_cfg *cfg = desc->chip_data;
- unsigned vector, me;
+ unsigned me;
if (likely(!cfg->move_in_progress))
return;
- vector = ~get_irq_regs()->orig_ax;
me = smp_processor_id();
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
send_cleanup_vector(cfg);
}
+
+static void irq_complete_move(struct irq_desc **descp)
+{
+ __irq_complete_move(descp, ~get_irq_regs()->orig_ax);
+}
+
+void irq_force_complete_move(int irq)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg = desc->chip_data;
+
+ __irq_complete_move(&desc, cfg->vector);
+}
#else
static inline void irq_complete_move(struct irq_desc **descp) {}
#endif
@@ -2490,6 +2498,59 @@ static void ack_apic_edge(unsigned int irq)
atomic_t irq_mis_count;
+/*
+ * IO-APIC versions below 0x20 don't support EOI register.
+ * For the record, here is the information about various versions:
+ * 0Xh 82489DX
+ * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
+ * 2Xh I/O(x)APIC which is PCI 2.2 Compliant
+ * 30h-FFh Reserved
+ *
+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
+ * version as 0x2. This is an error with documentation and these ICH chips
+ * use io-apic's of version 0x20.
+ *
+ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
+ * Otherwise, we simulate the EOI message manually by changing the trigger
+ * mode to edge and then back to level, with RTE being masked during this.
+*/
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+ struct irq_pin_list *entry;
+
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
+ if (mp_ioapics[entry->apic].apicver >= 0x20) {
+ /*
+ * Intr-remapping uses pin number as the virtual vector
+ * in the RTE. Actual vector is programmed in
+ * intr-remapping table entry. Hence for the io-apic
+ * EOI we use the pin number.
+ */
+ if (irq_remapped(irq))
+ io_apic_eoi(entry->apic, entry->pin);
+ else
+ io_apic_eoi(entry->apic, cfg->vector);
+ } else {
+ __mask_and_edge_IO_APIC_irq(entry);
+ __unmask_and_level_IO_APIC_irq(entry);
+ }
+ }
+}
+
+static void eoi_ioapic_irq(struct irq_desc *desc)
+{
+ struct irq_cfg *cfg;
+ unsigned long flags;
+ unsigned int irq;
+
+ irq = desc->irq;
+ cfg = desc->chip_data;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __eoi_ioapic_irq(irq, cfg);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
static void ack_apic_level(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -2525,6 +2586,19 @@ static void ack_apic_level(unsigned int irq)
* level-triggered interrupt. We mask the source for the time of the
* operation to prevent an edge-triggered interrupt escaping meanwhile.
* The idea is from Manfred Spraul. --macro
+ *
+ * Also in the case when cpu goes offline, fixup_irqs() will forward
+ * any unhandled interrupt on the offlined cpu to the new cpu
+ * destination that is handling the corresponding interrupt. This
+ * interrupt forwarding is done via IPI's. Hence, in this case also
+ * level-triggered io-apic interrupt will be seen as an edge
+ * interrupt in the IRR. And we can't rely on the cpu's EOI
+ * to be broadcasted to the IO-APIC's which will clear the remoteIRR
+ * corresponding to the level-triggered interrupt. Hence on IO-APIC's
+ * supporting EOI register, we do an explicit EOI to clear the
+ * remote IRR and on IO-APIC's which don't have an EOI register,
+ * we use the above logic (mask+edge followed by unmask+level) from
+ * Manfred Spraul to clear the remote IRR.
*/
cfg = desc->chip_data;
i = cfg->vector;
@@ -2536,6 +2610,19 @@ static void ack_apic_level(unsigned int irq)
*/
ack_APIC_irq();
+ /*
+ * Tail end of clearing remote IRR bit (either by delivering the EOI
+ * message via io-apic EOI register write or simulating it using
+ * mask+edge followed by unnask+level logic) manually when the
+ * level triggered interrupt is seen as the edge triggered interrupt
+ * at the cpu.
+ */
+ if (!(v & (1 << (i & 0x1f)))) {
+ atomic_inc(&irq_mis_count);
+
+ eoi_ioapic_irq(desc);
+ }
+
/* Now we can move and renable the irq */
if (unlikely(do_unmask_irq)) {
/* Only migrate the irq if the ack has been received.
@@ -2569,41 +2656,9 @@ static void ack_apic_level(unsigned int irq)
move_masked_irq(irq);
unmask_IO_APIC_irq_desc(desc);
}
-
- /* Tail end of version 0x11 I/O APIC bug workaround */
- if (!(v & (1 << (i & 0x1f)))) {
- atomic_inc(&irq_mis_count);
- spin_lock(&ioapic_lock);
- __mask_and_edge_IO_APIC_irq(cfg);
- __unmask_and_level_IO_APIC_irq(cfg);
- spin_unlock(&ioapic_lock);
- }
}
#ifdef CONFIG_INTR_REMAP
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
- struct irq_pin_list *entry;
-
- for_each_irq_pin(entry, cfg->irq_2_pin)
- io_apic_eoi(entry->apic, entry->pin);
-}
-
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
-{
- struct irq_cfg *cfg;
- unsigned long flags;
- unsigned int irq;
-
- irq = desc->irq;
- cfg = desc->chip_data;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __eoi_ioapic_irq(irq, cfg);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
static void ir_ack_apic_edge(unsigned int irq)
{
ack_APIC_irq();
@@ -3157,6 +3212,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
continue;
desc_new = move_irq_desc(desc_new, node);
+ cfg_new = desc_new->chip_data;
if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
irq = new;
@@ -3708,75 +3764,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
}
#endif /* CONFIG_HT_IRQ */
-#ifdef CONFIG_X86_UV
-/*
- * Re-target the irq to the specified CPU and enable the specified MMR located
- * on the specified blade to allow the sending of MSIs to the specified CPU.
- */
-int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
- unsigned long mmr_offset)
-{
- const struct cpumask *eligible_cpu = cpumask_of(cpu);
- struct irq_cfg *cfg;
- int mmr_pnode;
- unsigned long mmr_value;
- struct uv_IO_APIC_route_entry *entry;
- unsigned long flags;
- int err;
-
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
- cfg = irq_cfg(irq);
-
- err = assign_irq_vector(irq, cfg, eligible_cpu);
- if (err != 0)
- return err;
-
- spin_lock_irqsave(&vector_lock, flags);
- set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
- irq_name);
- spin_unlock_irqrestore(&vector_lock, flags);
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->polarity = 0;
- entry->trigger = 0;
- entry->mask = 0;
- entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
-
- mmr_pnode = uv_blade_to_pnode(mmr_blade);
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
-
- return irq;
-}
-
-/*
- * Disable the specified MMR located on the specified blade so that MSIs are
- * longer allowed to be sent.
- */
-void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
-{
- unsigned long mmr_value;
- struct uv_IO_APIC_route_entry *entry;
- int mmr_pnode;
-
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- entry->mask = 1;
-
- mmr_pnode = uv_blade_to_pnode(mmr_blade);
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-}
-#endif /* CONFIG_X86_64 */
-
int __init io_apic_get_redir_entries (int ioapic)
{
union IO_APIC_reg_01 reg_01;
@@ -3944,7 +3931,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
*/
if (physids_empty(apic_id_map))
- apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
+ apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(ioapic, 0);
@@ -3960,10 +3947,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
* Every APIC in a system must have a unique ID or we get lots of nice
* 'stuck on smp_invalidate_needed IPI wait' messages.
*/
- if (apic->check_apicid_used(apic_id_map, apic_id)) {
+ if (apic->check_apicid_used(&apic_id_map, apic_id)) {
for (i = 0; i < get_physical_broadcast(); i++) {
- if (!apic->check_apicid_used(apic_id_map, i))
+ if (!apic->check_apicid_used(&apic_id_map, i))
break;
}
@@ -3976,7 +3963,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
apic_id = i;
}
- tmp = apic->apicid_to_cpu_present(apic_id);
+ apic->apicid_to_cpu_present(apic_id, &tmp);
physids_or(apic_id_map, apic_id_map, tmp);
if (reg_00.bits.ID != apic_id) {
@@ -4106,7 +4093,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
for (i = 0; i < nr_ioapics; i++) {
res[i].name = mem;
res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- sprintf(mem, "IOAPIC %u", i);
+ snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
mem += IOAPIC_RESOURCE_NAME_SIZE;
}
@@ -4140,18 +4127,17 @@ void __init ioapic_init_mappings(void)
#ifdef CONFIG_X86_32
fake_ioapic_page:
#endif
- ioapic_phys = (unsigned long)
- alloc_bootmem_pages(PAGE_SIZE);
+ ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
ioapic_phys = __pa(ioapic_phys);
}
set_fixmap_nocache(idx, ioapic_phys);
- apic_printk(APIC_VERBOSE,
- "mapped IOAPIC to %08lx (%08lx)\n",
- __fix_to_virt(idx), ioapic_phys);
+ apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
+ __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
+ ioapic_phys);
idx++;
ioapic_res->start = ioapic_phys;
- ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+ ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
ioapic_res++;
}
}
diff --git a/trunk/arch/x86/kernel/apic/nmi.c b/trunk/arch/x86/kernel/apic/nmi.c
index 7ff61d6a188a..6389432a9dbf 100644
--- a/trunk/arch/x86/kernel/apic/nmi.c
+++ b/trunk/arch/x86/kernel/apic/nmi.c
@@ -39,7 +39,8 @@
int unknown_nmi_panic;
int nmi_watchdog_enabled;
-static cpumask_t backtrace_mask __read_mostly;
+/* For reliability, we're prepared to waste bits here. */
+static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
/* nmi_active:
* >0: the lapic NMI watchdog is active, but can be disabled
@@ -414,7 +415,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
}
/* We can be called before check_nmi_watchdog, hence NULL check. */
- if (cpumask_test_cpu(cpu, &backtrace_mask)) {
+ if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
spin_lock(&lock);
@@ -422,7 +423,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
show_regs(regs);
dump_stack();
spin_unlock(&lock);
- cpumask_clear_cpu(cpu, &backtrace_mask);
+ cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
rc = 1;
}
@@ -558,14 +559,14 @@ void arch_trigger_all_cpu_backtrace(void)
{
int i;
- cpumask_copy(&backtrace_mask, cpu_online_mask);
+ cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
printk(KERN_INFO "sending NMI to all CPUs:\n");
apic->send_IPI_all(NMI_VECTOR);
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
for (i = 0; i < 10 * 1000; i++) {
- if (cpumask_empty(&backtrace_mask))
+ if (cpumask_empty(to_cpumask(backtrace_mask)))
break;
mdelay(1);
}
diff --git a/trunk/arch/x86/kernel/apic/numaq_32.c b/trunk/arch/x86/kernel/apic/numaq_32.c
index efa00e2b8505..07cdbdcd7a92 100644
--- a/trunk/arch/x86/kernel/apic/numaq_32.c
+++ b/trunk/arch/x86/kernel/apic/numaq_32.c
@@ -334,10 +334,9 @@ static inline const struct cpumask *numaq_target_cpus(void)
return cpu_all_mask;
}
-static inline unsigned long
-numaq_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid)
{
- return physid_isset(apicid, bitmap);
+ return physid_isset(apicid, *map);
}
static inline unsigned long numaq_check_apicid_present(int bit)
@@ -371,10 +370,10 @@ static inline int numaq_multi_timer_check(int apic, int irq)
return apic != 0 && irq == 0;
}
-static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map)
+static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* We don't have a good way to do this yet - hack */
- return physids_promote(0xFUL);
+ return physids_promote(0xFUL, retmap);
}
static inline int numaq_cpu_to_logical_apicid(int cpu)
@@ -402,12 +401,12 @@ static inline int numaq_apicid_to_node(int logical_apicid)
return logical_apicid >> 4;
}
-static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid)
+static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
{
int node = numaq_apicid_to_node(logical_apicid);
int cpu = __ffs(logical_apicid & 0xf);
- return physid_mask_of_physid(cpu + 4*node);
+ physid_set_mask_of_physid(cpu + 4*node, retmap);
}
/* Where the IO area was mapped on multiquad, always 0 otherwise */
diff --git a/trunk/arch/x86/kernel/apic/probe_32.c b/trunk/arch/x86/kernel/apic/probe_32.c
index 0c0182cc947d..1a6559f6768c 100644
--- a/trunk/arch/x86/kernel/apic/probe_32.c
+++ b/trunk/arch/x86/kernel/apic/probe_32.c
@@ -108,7 +108,7 @@ struct apic apic_default = {
.apicid_to_node = default_apicid_to_node,
.cpu_to_logical_apicid = default_cpu_to_logical_apicid,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .apicid_to_cpu_present = default_apicid_to_cpu_present,
+ .apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
.enable_apic_mode = NULL,
diff --git a/trunk/arch/x86/kernel/apic/summit_32.c b/trunk/arch/x86/kernel/apic/summit_32.c
index 645ecc4ff0be..9b419263d90d 100644
--- a/trunk/arch/x86/kernel/apic/summit_32.c
+++ b/trunk/arch/x86/kernel/apic/summit_32.c
@@ -183,7 +183,7 @@ static const struct cpumask *summit_target_cpus(void)
return cpumask_of(0);
}
-static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid)
{
return 0;
}
@@ -261,15 +261,15 @@ static int summit_cpu_present_to_apicid(int mps_cpu)
return BAD_APICID;
}
-static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map)
+static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
- return physids_promote(0x0F);
+ physids_promote(0x0FL, retmap);
}
-static physid_mask_t summit_apicid_to_cpu_present(int apicid)
+static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap)
{
- return physid_mask_of_physid(0);
+ physid_set_mask_of_physid(0, retmap);
}
static int summit_check_phys_apicid_present(int physical_apicid)
diff --git a/trunk/arch/x86/kernel/apic/x2apic_uv_x.c b/trunk/arch/x86/kernel/apic/x2apic_uv_x.c
index 326c25477d3d..130c4b934877 100644
--- a/trunk/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/trunk/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -409,6 +409,12 @@ static __init void map_mmioh_high(int max_pnode)
map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc);
}
+static __init void map_low_mmrs(void)
+{
+ init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
+ init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
+}
+
static __init void uv_rtc_init(void)
{
long status;
@@ -550,6 +556,8 @@ void __init uv_system_init(void)
unsigned long mmr_base, present, paddr;
unsigned short pnode_mask;
+ map_low_mmrs();
+
m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
m_val = m_n_config.s.m_skt;
n_val = m_n_config.s.n_skt;
diff --git a/trunk/arch/x86/kernel/apm_32.c b/trunk/arch/x86/kernel/apm_32.c
index 151ace69a5aa..b5b6b23bce53 100644
--- a/trunk/arch/x86/kernel/apm_32.c
+++ b/trunk/arch/x86/kernel/apm_32.c
@@ -204,7 +204,6 @@
#include
#include
-#include
#include
#include
#include
@@ -403,6 +402,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
static struct apm_user *user_list;
static DEFINE_SPINLOCK(user_list_lock);
+static DEFINE_MUTEX(apm_mutex);
/*
* Set up a segment that references the real mode segment 0x40
@@ -1531,7 +1531,7 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
return -EPERM;
switch (cmd) {
case APM_IOC_STANDBY:
- lock_kernel();
+ mutex_lock(&apm_mutex);
if (as->standbys_read > 0) {
as->standbys_read--;
as->standbys_pending--;
@@ -1540,10 +1540,10 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
queue_event(APM_USER_STANDBY, as);
if (standbys_pending <= 0)
standby();
- unlock_kernel();
+ mutex_unlock(&apm_mutex);
break;
case APM_IOC_SUSPEND:
- lock_kernel();
+ mutex_lock(&apm_mutex);
if (as->suspends_read > 0) {
as->suspends_read--;
as->suspends_pending--;
@@ -1552,13 +1552,14 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
queue_event(APM_USER_SUSPEND, as);
if (suspends_pending <= 0) {
ret = suspend(1);
+ mutex_unlock(&apm_mutex);
} else {
as->suspend_wait = 1;
+ mutex_unlock(&apm_mutex);
wait_event_interruptible(apm_suspend_waitqueue,
as->suspend_wait == 0);
ret = as->suspend_result;
}
- unlock_kernel();
return ret;
default:
return -ENOTTY;
@@ -1608,12 +1609,10 @@ static int do_open(struct inode *inode, struct file *filp)
{
struct apm_user *as;
- lock_kernel();
as = kmalloc(sizeof(*as), GFP_KERNEL);
if (as == NULL) {
printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
sizeof(*as));
- unlock_kernel();
return -ENOMEM;
}
as->magic = APM_BIOS_MAGIC;
@@ -1635,7 +1634,6 @@ static int do_open(struct inode *inode, struct file *filp)
user_list = as;
spin_unlock(&user_list_lock);
filp->private_data = as;
- unlock_kernel();
return 0;
}
diff --git a/trunk/arch/x86/kernel/cpu/Makefile b/trunk/arch/x86/kernel/cpu/Makefile
index 68537e957a9b..1d2cb383410e 100644
--- a/trunk/arch/x86/kernel/cpu/Makefile
+++ b/trunk/arch/x86/kernel/cpu/Makefile
@@ -5,6 +5,7 @@
# Don't trace early stages of a secondary CPU boot
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_common.o = -pg
+CFLAGS_REMOVE_perf_event.o = -pg
endif
# Make sure load_percpu_segment has no stackprotector
diff --git a/trunk/arch/x86/kernel/cpu/amd.c b/trunk/arch/x86/kernel/cpu/amd.c
index c910a716a71c..7128b3799cec 100644
--- a/trunk/arch/x86/kernel/cpu/amd.c
+++ b/trunk/arch/x86/kernel/cpu/amd.c
@@ -535,7 +535,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
}
}
- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);
/* Multi core CPU? */
if (c->extended_cpuid_level >= 0x80000008) {
diff --git a/trunk/arch/x86/kernel/cpu/centaur.c b/trunk/arch/x86/kernel/cpu/centaur.c
index c95e831bb095..e58d978e0758 100644
--- a/trunk/arch/x86/kernel/cpu/centaur.c
+++ b/trunk/arch/x86/kernel/cpu/centaur.c
@@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
}
- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);
}
enum {
diff --git a/trunk/arch/x86/kernel/cpu/common.c b/trunk/arch/x86/kernel/cpu/common.c
index cc25c2b4a567..a4ec8b647544 100644
--- a/trunk/arch/x86/kernel/cpu/common.c
+++ b/trunk/arch/x86/kernel/cpu/common.c
@@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void)
static void __cpuinit default_init(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_64
- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);
#else
/* Not much we can do here... */
/* Check if at least it has cpuid */
@@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
}
}
-void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
+void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ebx, ecx, edx, l2size;
@@ -391,8 +391,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
if (n >= 0x80000005) {
cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
- printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
c->x86_cache_size = (ecx>>24) + (edx>>24);
#ifdef CONFIG_X86_64
/* On K8 L1 TLB is inclusive, so don't count it */
@@ -422,9 +420,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
#endif
c->x86_cache_size = l2size;
-
- printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
- l2size, ecx & 0xFF);
}
void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -659,24 +654,31 @@ void __init early_cpu_init(void)
const struct cpu_dev *const *cdev;
int count = 0;
+#ifdef PROCESSOR_SELECT
printk(KERN_INFO "KERNEL supported cpus:\n");
+#endif
+
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
const struct cpu_dev *cpudev = *cdev;
- unsigned int j;
if (count >= X86_VENDOR_NUM)
break;
cpu_devs[count] = cpudev;
count++;
- for (j = 0; j < 2; j++) {
- if (!cpudev->c_ident[j])
- continue;
- printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
- cpudev->c_ident[j]);
+#ifdef PROCESSOR_SELECT
+ {
+ unsigned int j;
+
+ for (j = 0; j < 2; j++) {
+ if (!cpudev->c_ident[j])
+ continue;
+ printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
+ cpudev->c_ident[j]);
+ }
}
+#endif
}
-
early_identify_cpu(&boot_cpu_data);
}
@@ -837,10 +839,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
}
-#ifdef CONFIG_X86_MCE
/* Init Machine Check Exception if available. */
- mcheck_init(c);
-#endif
+ mcheck_cpu_init(c);
select_idle_routine(c);
diff --git a/trunk/arch/x86/kernel/cpu/cpu.h b/trunk/arch/x86/kernel/cpu/cpu.h
index 6de9a908e400..3624e8a0f71b 100644
--- a/trunk/arch/x86/kernel/cpu/cpu.h
+++ b/trunk/arch/x86/kernel/cpu/cpu.h
@@ -32,6 +32,6 @@ struct cpu_dev {
extern const struct cpu_dev *const __x86_cpu_dev_start[],
*const __x86_cpu_dev_end[];
-extern void display_cacheinfo(struct cpuinfo_x86 *c);
+extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
#endif
diff --git a/trunk/arch/x86/kernel/cpu/cyrix.c b/trunk/arch/x86/kernel/cpu/cyrix.c
index 19807b89f058..4fbd384fb645 100644
--- a/trunk/arch/x86/kernel/cpu/cyrix.c
+++ b/trunk/arch/x86/kernel/cpu/cyrix.c
@@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
/* Handle the GX (Formally known as the GX2) */
if (c->x86 == 5 && c->x86_model == 5)
- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);
else
init_cyrix(c);
}
diff --git a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c
index 804c40e2bc3e..0df4c2b7107f 100644
--- a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -488,22 +488,6 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
#endif
}
- if (trace)
- printk(KERN_INFO "CPU: Trace cache: %dK uops", trace);
- else if (l1i)
- printk(KERN_INFO "CPU: L1 I cache: %dK", l1i);
-
- if (l1d)
- printk(KERN_CONT ", L1 D cache: %dK\n", l1d);
- else
- printk(KERN_CONT "\n");
-
- if (l2)
- printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
-
- if (l3)
- printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
-
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
return l2;
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce.c b/trunk/arch/x86/kernel/cpu/mcheck/mce.c
index 721a77ca8115..0bcaa3875863 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/mce.c
@@ -46,6 +46,9 @@
#include "mce-internal.h"
+#define CREATE_TRACE_POINTS
+#include
+
int mce_disabled __read_mostly;
#define MISC_MCELOG_MINOR 227
@@ -85,18 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
-static void default_decode_mce(struct mce *m)
+/*
+ * CPU/chipset specific EDAC code can register a notifier call here to print
+ * MCE errors in a human-readable form.
+ */
+ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
+EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
+
+static int default_decode_mce(struct notifier_block *nb, unsigned long val,
+ void *data)
{
pr_emerg("No human readable MCE decoding support on this CPU type.\n");
pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
+
+ return NOTIFY_STOP;
}
-/*
- * CPU/chipset specific EDAC code can register a callback here to print
- * MCE errors in a human-readable form:
- */
-void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
-EXPORT_SYMBOL(x86_mce_decode_callback);
+static struct notifier_block mce_dec_nb = {
+ .notifier_call = default_decode_mce,
+ .priority = -1,
+};
/* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -141,6 +152,9 @@ void mce_log(struct mce *mce)
{
unsigned next, entry;
+ /* Emit the trace record: */
+ trace_mce_record(mce);
+
mce->finished = 0;
wmb();
for (;;) {
@@ -204,9 +218,9 @@ static void print_mce(struct mce *m)
/*
* Print out human-readable details about the MCE error,
- * (if the CPU has an implementation for that):
+ * (if the CPU has an implementation for that)
*/
- x86_mce_decode_callback(m);
+ atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
}
static void print_mce_head(void)
@@ -1122,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */
static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
-static void mcheck_timer(unsigned long data)
+static void mce_start_timer(unsigned long data)
{
struct timer_list *t = &per_cpu(mce_timer, data);
int *n;
@@ -1187,7 +1201,7 @@ int mce_notify_irq(void)
}
EXPORT_SYMBOL_GPL(mce_notify_irq);
-static int mce_banks_init(void)
+static int __cpuinit __mcheck_cpu_mce_banks_init(void)
{
int i;
@@ -1206,7 +1220,7 @@ static int mce_banks_init(void)
/*
* Initialize Machine Checks for a CPU.
*/
-static int __cpuinit mce_cap_init(void)
+static int __cpuinit __mcheck_cpu_cap_init(void)
{
unsigned b;
u64 cap;
@@ -1228,7 +1242,7 @@ static int __cpuinit mce_cap_init(void)
WARN_ON(banks != 0 && b != banks);
banks = b;
if (!mce_banks) {
- int err = mce_banks_init();
+ int err = __mcheck_cpu_mce_banks_init();
if (err)
return err;
@@ -1244,7 +1258,7 @@ static int __cpuinit mce_cap_init(void)
return 0;
}
-static void mce_init(void)
+static void __mcheck_cpu_init_generic(void)
{
mce_banks_t all_banks;
u64 cap;
@@ -1273,7 +1287,7 @@ static void mce_init(void)
}
/* Add per CPU specific workarounds here */
-static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
+static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1341,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
return 0;
}
-static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
+static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
if (c->x86 != 5)
return;
@@ -1355,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
}
}
-static void mce_cpu_features(struct cpuinfo_x86 *c)
+static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
{
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
@@ -1369,7 +1383,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
}
}
-static void mce_init_timer(void)
+static void __mcheck_cpu_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
int *n = &__get_cpu_var(mce_next_interval);
@@ -1380,7 +1394,7 @@ static void mce_init_timer(void)
*n = check_interval * HZ;
if (!*n)
return;
- setup_timer(t, mcheck_timer, smp_processor_id());
+ setup_timer(t, mce_start_timer, smp_processor_id());
t->expires = round_jiffies(jiffies + *n);
add_timer_on(t, smp_processor_id());
}
@@ -1400,27 +1414,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
*/
-void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
+void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
{
if (mce_disabled)
return;
- mce_ancient_init(c);
+ __mcheck_cpu_ancient_init(c);
if (!mce_available(c))
return;
- if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) {
+ if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
mce_disabled = 1;
return;
}
machine_check_vector = do_machine_check;
- mce_init();
- mce_cpu_features(c);
- mce_init_timer();
+ __mcheck_cpu_init_generic();
+ __mcheck_cpu_init_vendor(c);
+ __mcheck_cpu_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
+
}
/*
@@ -1640,6 +1655,15 @@ static int __init mcheck_enable(char *str)
}
__setup("mce", mcheck_enable);
+int __init mcheck_init(void)
+{
+ atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
+
+ mcheck_intel_therm_init();
+
+ return 0;
+}
+
/*
* Sysfs support
*/
@@ -1648,7 +1672,7 @@ __setup("mce", mcheck_enable);
* Disable machine checks on suspend and shutdown. We can't really handle
* them later.
*/
-static int mce_disable(void)
+static int mce_disable_error_reporting(void)
{
int i;
@@ -1663,12 +1687,12 @@ static int mce_disable(void)
static int mce_suspend(struct sys_device *dev, pm_message_t state)
{
- return mce_disable();
+ return mce_disable_error_reporting();
}
static int mce_shutdown(struct sys_device *dev)
{
- return mce_disable();
+ return mce_disable_error_reporting();
}
/*
@@ -1678,8 +1702,8 @@ static int mce_shutdown(struct sys_device *dev)
*/
static int mce_resume(struct sys_device *dev)
{
- mce_init();
- mce_cpu_features(¤t_cpu_data);
+ __mcheck_cpu_init_generic();
+ __mcheck_cpu_init_vendor(¤t_cpu_data);
return 0;
}
@@ -1689,8 +1713,8 @@ static void mce_cpu_restart(void *data)
del_timer_sync(&__get_cpu_var(mce_timer));
if (!mce_available(¤t_cpu_data))
return;
- mce_init();
- mce_init_timer();
+ __mcheck_cpu_init_generic();
+ __mcheck_cpu_init_timer();
}
/* Reinit MCEs after user configuration changes */
@@ -1716,7 +1740,7 @@ static void mce_enable_ce(void *all)
cmci_reenable();
cmci_recheck();
if (all)
- mce_init_timer();
+ __mcheck_cpu_init_timer();
}
static struct sysdev_class mce_sysclass = {
@@ -1929,13 +1953,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
}
/* Make sure there are no machine checks on offlined CPUs. */
-static void mce_disable_cpu(void *h)
+static void __cpuinit mce_disable_cpu(void *h)
{
unsigned long action = *(unsigned long *)h;
int i;
if (!mce_available(¤t_cpu_data))
return;
+
if (!(action & CPU_TASKS_FROZEN))
cmci_clear();
for (i = 0; i < banks; i++) {
@@ -1946,7 +1971,7 @@ static void mce_disable_cpu(void *h)
}
}
-static void mce_reenable_cpu(void *h)
+static void __cpuinit mce_reenable_cpu(void *h)
{
unsigned long action = *(unsigned long *)h;
int i;
@@ -2025,7 +2050,7 @@ static __init void mce_init_banks(void)
}
}
-static __init int mce_init_device(void)
+static __init int mcheck_init_device(void)
{
int err;
int i = 0;
@@ -2053,7 +2078,7 @@ static __init int mce_init_device(void)
return err;
}
-device_initcall(mce_init_device);
+device_initcall(mcheck_init_device);
/*
* Old style boot options parsing. Only for compatibility.
@@ -2101,7 +2126,7 @@ static int fake_panic_set(void *data, u64 val)
DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
fake_panic_set, "%llu\n");
-static int __init mce_debugfs_init(void)
+static int __init mcheck_debugfs_init(void)
{
struct dentry *dmce, *ffake_panic;
@@ -2115,5 +2140,5 @@ static int __init mce_debugfs_init(void)
return 0;
}
-late_initcall(mce_debugfs_init);
+late_initcall(mcheck_debugfs_init);
#endif
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c b/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c
index b3a1dba75330..4fef985fc221 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state);
static atomic_t therm_throt_en = ATOMIC_INIT(0);
+static u32 lvtthmr_init __read_mostly;
+
#ifdef CONFIG_SYSFS
#define define_therm_throt_sysdev_one_ro(_name) \
static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
@@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
ack_APIC_irq();
}
+void __init mcheck_intel_therm_init(void)
+{
+ /*
+ * This function is only called on boot CPU. Save the init thermal
+ * LVT value on BSP and use that value to restore APs' thermal LVT
+ * entry BIOS programmed later
+ */
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) &&
+ cpu_has(&boot_cpu_data, X86_FEATURE_ACC))
+ lvtthmr_init = apic_read(APIC_LVTTHMR);
+}
+
void intel_init_thermal(struct cpuinfo_x86 *c)
{
unsigned int cpu = smp_processor_id();
@@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
* since it might be delivered via SMI already:
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- h = apic_read(APIC_LVTTHMR);
+
+ /*
+ * The initial value of thermal LVT entries on all APs always reads
+ * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
+ * sequence to them and LVT registers are reset to 0s except for
+ * the mask bits which are set to 1s when APs receive INIT IPI.
+ * Always restore the value that BIOS has programmed on AP based on
+ * BSP's info we saved since BIOS is always setting the same value
+ * for all threads/cores
+ */
+ apic_write(APIC_LVTTHMR, lvtthmr_init);
+
+ h = lvtthmr_init;
+
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI\n", cpu);
diff --git a/trunk/arch/x86/kernel/cpu/perf_event.c b/trunk/arch/x86/kernel/cpu/perf_event.c
index b5801c311846..c1bbed1021d9 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event.c
@@ -77,6 +77,18 @@ struct cpu_hw_events {
struct debug_store *ds;
};
+struct event_constraint {
+ unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ int code;
+};
+
+#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
+#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 }
+
+#define for_each_event_constraint(e, c) \
+ for ((e) = (c); (e)->idxmsk[0]; (e)++)
+
+
/*
* struct x86_pmu - generic x86 pmu
*/
@@ -102,6 +114,8 @@ struct x86_pmu {
u64 intel_ctrl;
void (*enable_bts)(u64 config);
void (*disable_bts)(void);
+ int (*get_event_idx)(struct cpu_hw_events *cpuc,
+ struct hw_perf_event *hwc);
};
static struct x86_pmu x86_pmu __read_mostly;
@@ -110,6 +124,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.enabled = 1,
};
+static const struct event_constraint *event_constraints;
+
/*
* Not sure about some of these
*/
@@ -155,6 +171,16 @@ static u64 p6_pmu_raw_event(u64 hw_event)
return hw_event & P6_EVNTSEL_MASK;
}
+static const struct event_constraint intel_p6_event_constraints[] =
+{
+ EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
+ EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+ EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
+ EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+ EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+ EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+ EVENT_CONSTRAINT_END
+};
/*
* Intel PerfMon v3. Used on Core2 and later.
@@ -170,6 +196,35 @@ static const u64 intel_perfmon_event_map[] =
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
};
+static const struct event_constraint intel_core_event_constraints[] =
+{
+ EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+ EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+ EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+ EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+ EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+ EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
+ EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+ EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
+ EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
+ EVENT_CONSTRAINT_END
+};
+
+static const struct event_constraint intel_nehalem_event_constraints[] =
+{
+ EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
+ EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
+ EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
+ EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
+ EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
+ EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
+ EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+ EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
+ EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
+ EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
+ EVENT_CONSTRAINT_END
+};
+
static u64 intel_pmu_event_map(int hw_event)
{
return intel_perfmon_event_map[hw_event];
@@ -190,7 +245,7 @@ static u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
-static const u64 nehalem_hw_cache_event_ids
+static __initconst u64 nehalem_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -281,7 +336,7 @@ static const u64 nehalem_hw_cache_event_ids
},
};
-static const u64 core2_hw_cache_event_ids
+static __initconst u64 core2_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -372,7 +427,7 @@ static const u64 core2_hw_cache_event_ids
},
};
-static const u64 atom_hw_cache_event_ids
+static __initconst u64 atom_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -469,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event)
#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
-#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
+#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
#define CORE_EVNTSEL_MASK \
(CORE_EVNTSEL_EVENT_MASK | \
@@ -481,7 +536,7 @@ static u64 intel_pmu_raw_event(u64 hw_event)
return hw_event & CORE_EVNTSEL_MASK;
}
-static const u64 amd_hw_cache_event_ids
+static __initconst u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -932,6 +987,8 @@ static int __hw_perf_event_init(struct perf_event *event)
*/
hwc->config = ARCH_PERFMON_EVENTSEL_INT;
+ hwc->idx = -1;
+
/*
* Count user and OS events unless requested not to.
*/
@@ -1334,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
x86_pmu_enable_event(hwc, idx);
}
-static int
-fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
+static int fixed_mode_idx(struct hw_perf_event *hwc)
{
unsigned int hw_event;
@@ -1349,6 +1405,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
if (!x86_pmu.num_events_fixed)
return -1;
+ /*
+ * fixed counters do not take all possible filters
+ */
+ if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
+ return -1;
+
if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
return X86_PMC_IDX_FIXED_INSTRUCTIONS;
if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
@@ -1360,22 +1422,57 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
}
/*
- * Find a PMC slot for the freshly enabled / scheduled in event:
+ * generic counter allocator: get next free counter
*/
-static int x86_pmu_enable(struct perf_event *event)
+static int
+gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
+{
+ int idx;
+
+ idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
+ return idx == x86_pmu.num_events ? -1 : idx;
+}
+
+/*
+ * intel-specific counter allocator: check event constraints
+ */
+static int
+intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
+{
+ const struct event_constraint *event_constraint;
+ int i, code;
+
+ if (!event_constraints)
+ goto skip;
+
+ code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
+
+ for_each_event_constraint(event_constraint, event_constraints) {
+ if (code == event_constraint->code) {
+ for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
+ if (!test_and_set_bit(i, cpuc->used_mask))
+ return i;
+ }
+ return -1;
+ }
+ }
+skip:
+ return gen_get_event_idx(cpuc, hwc);
+}
+
+static int
+x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
int idx;
- idx = fixed_mode_idx(event, hwc);
+ idx = fixed_mode_idx(hwc);
if (idx == X86_PMC_IDX_FIXED_BTS) {
/* BTS is already occupied. */
if (test_and_set_bit(idx, cpuc->used_mask))
return -EAGAIN;
hwc->config_base = 0;
- hwc->event_base = 0;
+ hwc->event_base = 0;
hwc->idx = idx;
} else if (idx >= 0) {
/*
@@ -1396,20 +1493,35 @@ static int x86_pmu_enable(struct perf_event *event)
} else {
idx = hwc->idx;
/* Try to get the previous generic event again */
- if (test_and_set_bit(idx, cpuc->used_mask)) {
+ if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
try_generic:
- idx = find_first_zero_bit(cpuc->used_mask,
- x86_pmu.num_events);
- if (idx == x86_pmu.num_events)
+ idx = x86_pmu.get_event_idx(cpuc, hwc);
+ if (idx == -1)
return -EAGAIN;
set_bit(idx, cpuc->used_mask);
hwc->idx = idx;
}
- hwc->config_base = x86_pmu.eventsel;
- hwc->event_base = x86_pmu.perfctr;
+ hwc->config_base = x86_pmu.eventsel;
+ hwc->event_base = x86_pmu.perfctr;
}
+ return idx;
+}
+
+/*
+ * Find a PMC slot for the freshly enabled / scheduled in event:
+ */
+static int x86_pmu_enable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+
+ idx = x86_schedule_event(cpuc, hwc);
+ if (idx < 0)
+ return idx;
+
perf_events_lapic_init();
x86_pmu.disable(hwc, idx);
@@ -1852,7 +1964,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = {
.priority = 1
};
-static struct x86_pmu p6_pmu = {
+static __initconst struct x86_pmu p6_pmu = {
.name = "p6",
.handle_irq = p6_pmu_handle_irq,
.disable_all = p6_pmu_disable_all,
@@ -1877,9 +1989,10 @@ static struct x86_pmu p6_pmu = {
*/
.event_bits = 32,
.event_mask = (1ULL << 32) - 1,
+ .get_event_idx = intel_get_event_idx,
};
-static struct x86_pmu intel_pmu = {
+static __initconst struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
.disable_all = intel_pmu_disable_all,
@@ -1900,9 +2013,10 @@ static struct x86_pmu intel_pmu = {
.max_period = (1ULL << 31) - 1,
.enable_bts = intel_pmu_enable_bts,
.disable_bts = intel_pmu_disable_bts,
+ .get_event_idx = intel_get_event_idx,
};
-static struct x86_pmu amd_pmu = {
+static __initconst struct x86_pmu amd_pmu = {
.name = "AMD",
.handle_irq = amd_pmu_handle_irq,
.disable_all = amd_pmu_disable_all,
@@ -1920,9 +2034,10 @@ static struct x86_pmu amd_pmu = {
.apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
+ .get_event_idx = gen_get_event_idx,
};
-static int p6_pmu_init(void)
+static __init int p6_pmu_init(void)
{
switch (boot_cpu_data.x86_model) {
case 1:
@@ -1932,10 +2047,12 @@ static int p6_pmu_init(void)
case 7:
case 8:
case 11: /* Pentium III */
+ event_constraints = intel_p6_event_constraints;
break;
case 9:
case 13:
/* Pentium M */
+ event_constraints = intel_p6_event_constraints;
break;
default:
pr_cont("unsupported p6 CPU model %d ",
@@ -1954,7 +2071,7 @@ static int p6_pmu_init(void)
return 0;
}
-static int intel_pmu_init(void)
+static __init int intel_pmu_init(void)
{
union cpuid10_edx edx;
union cpuid10_eax eax;
@@ -2007,12 +2124,14 @@ static int intel_pmu_init(void)
sizeof(hw_cache_event_ids));
pr_cont("Core2 events, ");
+ event_constraints = intel_core_event_constraints;
break;
default:
case 26:
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ event_constraints = intel_nehalem_event_constraints;
pr_cont("Nehalem/Corei7 events, ");
break;
case 28:
@@ -2025,7 +2144,7 @@ static int intel_pmu_init(void)
return 0;
}
-static int amd_pmu_init(void)
+static __init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
@@ -2105,11 +2224,47 @@ static const struct pmu pmu = {
.unthrottle = x86_pmu_unthrottle,
};
+static int
+validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ struct hw_perf_event fake_event = event->hw;
+
+ if (event->pmu && event->pmu != &pmu)
+ return 0;
+
+ return x86_schedule_event(cpuc, &fake_event) >= 0;
+}
+
+static int validate_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct cpu_hw_events fake_pmu;
+
+ memset(&fake_pmu, 0, sizeof(fake_pmu));
+
+ if (!validate_event(&fake_pmu, leader))
+ return -ENOSPC;
+
+ list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+ if (!validate_event(&fake_pmu, sibling))
+ return -ENOSPC;
+ }
+
+ if (!validate_event(&fake_pmu, event))
+ return -ENOSPC;
+
+ return 0;
+}
+
const struct pmu *hw_perf_event_init(struct perf_event *event)
{
int err;
err = __hw_perf_event_init(event);
+ if (!err) {
+ if (event->group_leader != event)
+ err = validate_group(event);
+ }
if (err) {
if (event->destroy)
event->destroy(event);
diff --git a/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c b/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c
index fab786f60ed6..898df9719afb 100644
--- a/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -712,7 +712,7 @@ static void probe_nmi_watchdog(void)
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
- boot_cpu_data.x86 != 16)
+ boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17)
return;
wd_ops = &k7_wd_ops;
break;
diff --git a/trunk/arch/x86/kernel/cpu/transmeta.c b/trunk/arch/x86/kernel/cpu/transmeta.c
index bb62b3e5caad..28000743bbb0 100644
--- a/trunk/arch/x86/kernel/cpu/transmeta.c
+++ b/trunk/arch/x86/kernel/cpu/transmeta.c
@@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
early_init_transmeta(c);
- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);
/* Print CMS and CPU revision */
max = cpuid_eax(0x80860000);
diff --git a/trunk/arch/x86/kernel/cpuid.c b/trunk/arch/x86/kernel/cpuid.c
index 6a52d4b36a30..7ef24a796992 100644
--- a/trunk/arch/x86/kernel/cpuid.c
+++ b/trunk/arch/x86/kernel/cpuid.c
@@ -116,21 +116,16 @@ static int cpuid_open(struct inode *inode, struct file *file)
{
unsigned int cpu;
struct cpuinfo_x86 *c;
- int ret = 0;
-
- lock_kernel();
cpu = iminor(file->f_path.dentry->d_inode);
- if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
- ret = -ENXIO; /* No such CPU */
- goto out;
- }
+ if (cpu >= nr_cpu_ids || !cpu_online(cpu))
+ return -ENXIO; /* No such CPU */
+
c = &cpu_data(cpu);
if (c->cpuid_level < 0)
- ret = -EIO; /* CPUID not supported */
-out:
- unlock_kernel();
- return ret;
+ return -EIO; /* CPUID not supported */
+
+ return 0;
}
/*
diff --git a/trunk/arch/x86/kernel/crash.c b/trunk/arch/x86/kernel/crash.c
index 5e409dc298a4..a4849c10a77e 100644
--- a/trunk/arch/x86/kernel/crash.c
+++ b/trunk/arch/x86/kernel/crash.c
@@ -27,8 +27,7 @@
#include
#include
#include
-#include
-
+#include
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
@@ -106,7 +105,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
#endif
#ifdef CONFIG_X86_64
- pci_iommu_shutdown();
+ x86_platform.iommu_shutdown();
#endif
crash_save_cpu(regs, safe_smp_processor_id());
diff --git a/trunk/arch/x86/kernel/dumpstack.c b/trunk/arch/x86/kernel/dumpstack.c
index 2d8a371d4339..b8ce165dde5d 100644
--- a/trunk/arch/x86/kernel/dumpstack.c
+++ b/trunk/arch/x86/kernel/dumpstack.c
@@ -268,11 +268,12 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
show_registers(regs);
#ifdef CONFIG_X86_32
- sp = (unsigned long) (®s->sp);
- savesegment(ss, ss);
- if (user_mode(regs)) {
+ if (user_mode_vm(regs)) {
sp = regs->sp;
ss = regs->ss & 0xffff;
+ } else {
+ sp = kernel_stack_pointer(regs);
+ savesegment(ss, ss);
}
printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
print_symbol("%s", regs->ip);
diff --git a/trunk/arch/x86/kernel/dumpstack_32.c b/trunk/arch/x86/kernel/dumpstack_32.c
index f7dd2a7c3bf4..e0ed4c7abb62 100644
--- a/trunk/arch/x86/kernel/dumpstack_32.c
+++ b/trunk/arch/x86/kernel/dumpstack_32.c
@@ -10,9 +10,9 @@
#include
#include
#include
+#include
#include
#include
-#include
#include
@@ -35,6 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (!stack) {
unsigned long dummy;
+
stack = &dummy;
if (task && task != current)
stack = (unsigned long *)task->thread.sp;
@@ -57,8 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
context = (struct thread_info *)
((unsigned long)stack & (~(THREAD_SIZE - 1)));
- bp = print_context_stack(context, stack, bp, ops,
- data, NULL, &graph);
+ bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph);
stack = (unsigned long *)context->previous_esp;
if (!stack)
@@ -72,7 +72,7 @@ EXPORT_SYMBOL(dump_trace);
void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp, char *log_lvl)
+ unsigned long *sp, unsigned long bp, char *log_lvl)
{
unsigned long *stack;
int i;
@@ -156,4 +156,3 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}
-
diff --git a/trunk/arch/x86/kernel/dumpstack_64.c b/trunk/arch/x86/kernel/dumpstack_64.c
index a071e6be177e..8e740934bd1f 100644
--- a/trunk/arch/x86/kernel/dumpstack_64.c
+++ b/trunk/arch/x86/kernel/dumpstack_64.c
@@ -10,26 +10,28 @@
#include
#include
#include
+#include
#include
#include
-#include
#include
#include "dumpstack.h"
+#define N_EXCEPTION_STACKS_END \
+ (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
static char x86_stack_ids[][8] = {
- [DEBUG_STACK - 1] = "#DB",
- [NMI_STACK - 1] = "NMI",
- [DOUBLEFAULT_STACK - 1] = "#DF",
- [STACKFAULT_STACK - 1] = "#SS",
- [MCE_STACK - 1] = "#MC",
+ [ DEBUG_STACK-1 ] = "#DB",
+ [ NMI_STACK-1 ] = "NMI",
+ [ DOUBLEFAULT_STACK-1 ] = "#DF",
+ [ STACKFAULT_STACK-1 ] = "#SS",
+ [ MCE_STACK-1 ] = "#MC",
#if DEBUG_STKSZ > EXCEPTION_STKSZ
- [N_EXCEPTION_STACKS ...
- N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
+ [ N_EXCEPTION_STACKS ...
+ N_EXCEPTION_STACKS_END ] = "#DB[?]"
#endif
- };
+};
int x86_is_stack_id(int id, char *name)
{
@@ -37,7 +39,7 @@ int x86_is_stack_id(int id, char *name)
}
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
- unsigned *usedp, char **idp)
+ unsigned *usedp, char **idp)
{
unsigned k;
@@ -202,21 +204,24 @@ EXPORT_SYMBOL(dump_trace);
void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp, char *log_lvl)
+ unsigned long *sp, unsigned long bp, char *log_lvl)
{
+ unsigned long *irq_stack_end;
+ unsigned long *irq_stack;
unsigned long *stack;
+ int cpu;
int i;
- const int cpu = smp_processor_id();
- unsigned long *irq_stack_end =
- (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
- unsigned long *irq_stack =
- (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
+
+ preempt_disable();
+ cpu = smp_processor_id();
+
+ irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
+ irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
/*
- * debugging aid: "show_stack(NULL, NULL);" prints the
- * back trace for this cpu.
+ * Debugging aid: "show_stack(NULL, NULL);" prints the
+ * back trace for this cpu:
*/
-
if (sp == NULL) {
if (task)
sp = (unsigned long *)task->thread.sp;
@@ -240,6 +245,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
printk(" %016lx", *stack++);
touch_nmi_watchdog();
}
+ preempt_enable();
+
printk("\n");
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
@@ -303,4 +310,3 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}
-
diff --git a/trunk/arch/x86/kernel/entry_32.S b/trunk/arch/x86/kernel/entry_32.S
index c097e7d607c6..50b9c220e121 100644
--- a/trunk/arch/x86/kernel/entry_32.S
+++ b/trunk/arch/x86/kernel/entry_32.S
@@ -333,6 +333,10 @@ ENTRY(ret_from_fork)
CFI_ENDPROC
END(ret_from_fork)
+/*
+ * Interrupt exit functions should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
/*
* Return to user mode is not as complex as all this looks,
* but we want the default path for a system call return to
@@ -383,6 +387,10 @@ need_resched:
END(resume_kernel)
#endif
CFI_ENDPROC
+/*
+ * End of kprobes section
+ */
+ .popsection
/* SYSENTER_RETURN points to after the "sysenter" instruction in
the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
@@ -513,6 +521,10 @@ sysexit_audit:
PTGS_TO_GS_EX
ENDPROC(ia32_sysenter_target)
+/*
+ * syscall stub including irq exit should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
# system call handler stub
ENTRY(system_call)
RING0_INT_FRAME # can't unwind into user space anyway
@@ -705,6 +717,10 @@ syscall_badsys:
jmp resume_userspace
END(syscall_badsys)
CFI_ENDPROC
+/*
+ * End of kprobes section
+ */
+ .popsection
/*
* System calls that need a pt_regs pointer.
@@ -814,6 +830,10 @@ common_interrupt:
ENDPROC(common_interrupt)
CFI_ENDPROC
+/*
+ * Irq entries should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
#define BUILD_INTERRUPT3(name, nr, fn) \
ENTRY(name) \
RING0_INT_FRAME; \
@@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug)
jmp error_code
CFI_ENDPROC
END(spurious_interrupt_bug)
+/*
+ * End of kprobes section
+ */
+ .popsection
ENTRY(kernel_thread_helper)
pushl $0 # fake return address for unwinder
@@ -1185,17 +1209,14 @@ END(ftrace_graph_caller)
.globl return_to_handler
return_to_handler:
- pushl $0
pushl %eax
- pushl %ecx
pushl %edx
movl %ebp, %eax
call ftrace_return_to_handler
- movl %eax, 0xc(%esp)
+ movl %eax, %ecx
popl %edx
- popl %ecx
popl %eax
- ret
+ jmp *%ecx
#endif
.section .rodata,"a"
diff --git a/trunk/arch/x86/kernel/entry_64.S b/trunk/arch/x86/kernel/entry_64.S
index b5c061f8f358..4deb8fc849dd 100644
--- a/trunk/arch/x86/kernel/entry_64.S
+++ b/trunk/arch/x86/kernel/entry_64.S
@@ -155,11 +155,11 @@ GLOBAL(return_to_handler)
call ftrace_return_to_handler
- movq %rax, 16(%rsp)
+ movq %rax, %rdi
movq 8(%rsp), %rdx
movq (%rsp), %rax
- addq $16, %rsp
- retq
+ addq $24, %rsp
+ jmp *%rdi
#endif
@@ -803,6 +803,10 @@ END(interrupt)
call \func
.endm
+/*
+ * Interrupt entry/exit should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
/*
* The interrupt stubs push (~vector+0x80) onto the stack and
* then jump to common_interrupt.
@@ -941,6 +945,10 @@ ENTRY(retint_kernel)
CFI_ENDPROC
END(common_interrupt)
+/*
+ * End of kprobes section
+ */
+ .popsection
/*
* APIC interrupts.
@@ -1491,12 +1499,17 @@ error_kernelspace:
leaq irq_return(%rip),%rcx
cmpq %rcx,RIP+8(%rsp)
je error_swapgs
- movl %ecx,%ecx /* zero extend */
- cmpq %rcx,RIP+8(%rsp)
- je error_swapgs
+ movl %ecx,%eax /* zero extend */
+ cmpq %rax,RIP+8(%rsp)
+ je bstep_iret
cmpq $gs_change,RIP+8(%rsp)
je error_swapgs
jmp error_sti
+
+bstep_iret:
+ /* Fix truncated RIP */
+ movq %rcx,RIP+8(%rsp)
+ jmp error_swapgs
END(error_entry)
diff --git a/trunk/arch/x86/kernel/ftrace.c b/trunk/arch/x86/kernel/ftrace.c
index 9dbb527e1652..5a1b9758fd62 100644
--- a/trunk/arch/x86/kernel/ftrace.c
+++ b/trunk/arch/x86/kernel/ftrace.c
@@ -9,6 +9,8 @@
* the dangers of modifying code on the run.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include
#include
#include
@@ -336,15 +338,15 @@ int __init ftrace_dyn_arch_init(void *data)
switch (faulted) {
case 0:
- pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
+ pr_info("converting mcount calls to 0f 1f 44 00 00\n");
memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
break;
case 1:
- pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
+ pr_info("converting mcount calls to 66 66 66 66 90\n");
memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
break;
case 2:
- pr_info("ftrace: converting mcount calls to jmp . + 5\n");
+ pr_info("converting mcount calls to jmp . + 5\n");
memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
break;
}
@@ -468,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
#ifdef CONFIG_FTRACE_SYSCALLS
-extern unsigned long __start_syscalls_metadata[];
-extern unsigned long __stop_syscalls_metadata[];
extern unsigned long *sys_call_table;
-static struct syscall_metadata **syscalls_metadata;
-
-static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
-{
- struct syscall_metadata *start;
- struct syscall_metadata *stop;
- char str[KSYM_SYMBOL_LEN];
-
-
- start = (struct syscall_metadata *)__start_syscalls_metadata;
- stop = (struct syscall_metadata *)__stop_syscalls_metadata;
- kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
-
- for ( ; start < stop; start++) {
- if (start->name && !strcmp(start->name, str))
- return start;
- }
- return NULL;
-}
-
-struct syscall_metadata *syscall_nr_to_meta(int nr)
-{
- if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
- return NULL;
-
- return syscalls_metadata[nr];
-}
-
-int syscall_name_to_nr(char *name)
+unsigned long __init arch_syscall_addr(int nr)
{
- int i;
-
- if (!syscalls_metadata)
- return -1;
-
- for (i = 0; i < NR_syscalls; i++) {
- if (syscalls_metadata[i]) {
- if (!strcmp(syscalls_metadata[i]->name, name))
- return i;
- }
- }
- return -1;
-}
-
-void set_syscall_enter_id(int num, int id)
-{
- syscalls_metadata[num]->enter_id = id;
-}
-
-void set_syscall_exit_id(int num, int id)
-{
- syscalls_metadata[num]->exit_id = id;
-}
-
-static int __init arch_init_ftrace_syscalls(void)
-{
- int i;
- struct syscall_metadata *meta;
- unsigned long **psys_syscall_table = &sys_call_table;
-
- syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
- NR_syscalls, GFP_KERNEL);
- if (!syscalls_metadata) {
- WARN_ON(1);
- return -ENOMEM;
- }
-
- for (i = 0; i < NR_syscalls; i++) {
- meta = find_syscall_meta(psys_syscall_table[i]);
- syscalls_metadata[i] = meta;
- }
- return 0;
+ return (unsigned long)(&sys_call_table)[nr];
}
-arch_initcall(arch_init_ftrace_syscalls);
#endif
diff --git a/trunk/arch/x86/kernel/head_64.S b/trunk/arch/x86/kernel/head_64.S
index 780cd928fcd5..22db86a37643 100644
--- a/trunk/arch/x86/kernel/head_64.S
+++ b/trunk/arch/x86/kernel/head_64.S
@@ -212,8 +212,8 @@ ENTRY(secondary_startup_64)
*/
lgdt early_gdt_descr(%rip)
- /* set up data segments. actually 0 would do too */
- movl $__KERNEL_DS,%eax
+ /* set up data segments */
+ xorl %eax,%eax
movl %eax,%ds
movl %eax,%ss
movl %eax,%es
diff --git a/trunk/arch/x86/kernel/hw_breakpoint.c b/trunk/arch/x86/kernel/hw_breakpoint.c
new file mode 100644
index 000000000000..d42f65ac4927
--- /dev/null
+++ b/trunk/arch/x86/kernel/hw_breakpoint.c
@@ -0,0 +1,555 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2007 Alan Stern
+ * Copyright (C) 2009 IBM Corporation
+ * Copyright (C) 2009 Frederic Weisbecker
+ *
+ * Authors: Alan Stern
+ * K.Prasad
+ * Frederic Weisbecker
+ */
+
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+/* Per cpu debug control register value */
+DEFINE_PER_CPU(unsigned long, cpu_dr7);
+EXPORT_PER_CPU_SYMBOL(cpu_dr7);
+
+/* Per cpu debug address registers values */
+static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
+
+/*
+ * Stores the breakpoints currently in use on each breakpoint address
+ * register for each cpus
+ */
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
+
+
+static inline unsigned long
+__encode_dr7(int drnum, unsigned int len, unsigned int type)
+{
+ unsigned long bp_info;
+
+ bp_info = (len | type) & 0xf;
+ bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
+ bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));
+
+ return bp_info;
+}
+
+/*
+ * Encode the length, type, Exact, and Enable bits for a particular breakpoint
+ * as stored in debug register 7.
+ */
+unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
+{
+ return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN;
+}
+
+/*
+ * Decode the length and type bits for a particular breakpoint as
+ * stored in debug register 7. Return the "enabled" status.
+ */
+int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
+{
+ int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
+
+ *len = (bp_info & 0xc) | 0x40;
+ *type = (bp_info & 0x3) | 0x80;
+
+ return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
+}
+
+/*
+ * Install a perf counter breakpoint.
+ *
+ * We seek a free debug address register and use it for this
+ * breakpoint. Eventually we enable it in the debug control register.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ unsigned long *dr7;
+ int i;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+
+ if (!*slot) {
+ *slot = bp;
+ break;
+ }
+ }
+
+ if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
+ return -EBUSY;
+
+ set_debugreg(info->address, i);
+ __get_cpu_var(cpu_debugreg[i]) = info->address;
+
+ dr7 = &__get_cpu_var(cpu_dr7);
+ *dr7 |= encode_dr7(i, info->len, info->type);
+
+ set_debugreg(*dr7, 7);
+
+ return 0;
+}
+
+/*
+ * Uninstall the breakpoint contained in the given counter.
+ *
+ * First we search the debug address register it uses and then we disable
+ * it.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ unsigned long *dr7;
+ int i;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+
+ if (*slot == bp) {
+ *slot = NULL;
+ break;
+ }
+ }
+
+ if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
+ return;
+
+ dr7 = &__get_cpu_var(cpu_dr7);
+ *dr7 &= ~__encode_dr7(i, info->len, info->type);
+
+ set_debugreg(*dr7, 7);
+}
+
+static int get_hbp_len(u8 hbp_len)
+{
+ unsigned int len_in_bytes = 0;
+
+ switch (hbp_len) {
+ case X86_BREAKPOINT_LEN_1:
+ len_in_bytes = 1;
+ break;
+ case X86_BREAKPOINT_LEN_2:
+ len_in_bytes = 2;
+ break;
+ case X86_BREAKPOINT_LEN_4:
+ len_in_bytes = 4;
+ break;
+#ifdef CONFIG_X86_64
+ case X86_BREAKPOINT_LEN_8:
+ len_in_bytes = 8;
+ break;
+#endif
+ }
+ return len_in_bytes;
+}
+
+/*
+ * Check for virtual address in user space.
+ */
+int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
+{
+ unsigned int len;
+
+ len = get_hbp_len(hbp_len);
+
+ return (va <= TASK_SIZE - len);
+}
+
+/*
+ * Check for virtual address in kernel space.
+ */
+static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
+{
+ unsigned int len;
+
+ len = get_hbp_len(hbp_len);
+
+ return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
+}
+
+/*
+ * Store a breakpoint's encoded address, length, and type.
+ */
+static int arch_store_info(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ /*
+ * For kernel-addresses, either the address or symbol name can be
+ * specified.
+ */
+ if (info->name)
+ info->address = (unsigned long)
+ kallsyms_lookup_name(info->name);
+ if (info->address)
+ return 0;
+
+ return -EINVAL;
+}
+
+int arch_bp_generic_fields(int x86_len, int x86_type,
+ int *gen_len, int *gen_type)
+{
+ /* Len */
+ switch (x86_len) {
+ case X86_BREAKPOINT_LEN_1:
+ *gen_len = HW_BREAKPOINT_LEN_1;
+ break;
+ case X86_BREAKPOINT_LEN_2:
+ *gen_len = HW_BREAKPOINT_LEN_2;
+ break;
+ case X86_BREAKPOINT_LEN_4:
+ *gen_len = HW_BREAKPOINT_LEN_4;
+ break;
+#ifdef CONFIG_X86_64
+ case X86_BREAKPOINT_LEN_8:
+ *gen_len = HW_BREAKPOINT_LEN_8;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+
+ /* Type */
+ switch (x86_type) {
+ case X86_BREAKPOINT_EXECUTE:
+ *gen_type = HW_BREAKPOINT_X;
+ break;
+ case X86_BREAKPOINT_WRITE:
+ *gen_type = HW_BREAKPOINT_W;
+ break;
+ case X86_BREAKPOINT_RW:
+ *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+static int arch_build_bp_info(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+ info->address = bp->attr.bp_addr;
+
+ /* Len */
+ switch (bp->attr.bp_len) {
+ case HW_BREAKPOINT_LEN_1:
+ info->len = X86_BREAKPOINT_LEN_1;
+ break;
+ case HW_BREAKPOINT_LEN_2:
+ info->len = X86_BREAKPOINT_LEN_2;
+ break;
+ case HW_BREAKPOINT_LEN_4:
+ info->len = X86_BREAKPOINT_LEN_4;
+ break;
+#ifdef CONFIG_X86_64
+ case HW_BREAKPOINT_LEN_8:
+ info->len = X86_BREAKPOINT_LEN_8;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+
+ /* Type */
+ switch (bp->attr.bp_type) {
+ case HW_BREAKPOINT_W:
+ info->type = X86_BREAKPOINT_WRITE;
+ break;
+ case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+ info->type = X86_BREAKPOINT_RW;
+ break;
+ case HW_BREAKPOINT_X:
+ info->type = X86_BREAKPOINT_EXECUTE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp,
+ struct task_struct *tsk)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ unsigned int align;
+ int ret;
+
+
+ ret = arch_build_bp_info(bp);
+ if (ret)
+ return ret;
+
+ ret = -EINVAL;
+
+ if (info->type == X86_BREAKPOINT_EXECUTE)
+ /*
+ * Ptrace-refactoring code
+ * For now, we'll allow instruction breakpoint only for user-space
+ * addresses
+ */
+ if ((!arch_check_va_in_userspace(info->address, info->len)) &&
+ info->len != X86_BREAKPOINT_EXECUTE)
+ return ret;
+
+ switch (info->len) {
+ case X86_BREAKPOINT_LEN_1:
+ align = 0;
+ break;
+ case X86_BREAKPOINT_LEN_2:
+ align = 1;
+ break;
+ case X86_BREAKPOINT_LEN_4:
+ align = 3;
+ break;
+#ifdef CONFIG_X86_64
+ case X86_BREAKPOINT_LEN_8:
+ align = 7;
+ break;
+#endif
+ default:
+ return ret;
+ }
+
+ if (bp->callback)
+ ret = arch_store_info(bp);
+
+ if (ret < 0)
+ return ret;
+ /*
+ * Check that the low-order bits of the address are appropriate
+ * for the alignment implied by len.
+ */
+ if (info->address & align)
+ return -EINVAL;
+
+ /* Check that the virtual address is in the proper range */
+ if (tsk) {
+ if (!arch_check_va_in_userspace(info->address, info->len))
+ return -EFAULT;
+ } else {
+ if (!arch_check_va_in_kernelspace(info->address, info->len))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/*
+ * Dump the debug register contents to the user.
+ * We can't dump our per cpu values because it
+ * may contain cpu wide breakpoint, something that
+ * doesn't belong to the current task.
+ *
+ * TODO: include non-ptrace user breakpoints (perf)
+ */
+void aout_dump_debugregs(struct user *dump)
+{
+ int i;
+ int dr7 = 0;
+ struct perf_event *bp;
+ struct arch_hw_breakpoint *info;
+ struct thread_struct *thread = ¤t->thread;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ bp = thread->ptrace_bps[i];
+
+ if (bp && !bp->attr.disabled) {
+ dump->u_debugreg[i] = bp->attr.bp_addr;
+ info = counter_arch_bp(bp);
+ dr7 |= encode_dr7(i, info->len, info->type);
+ } else {
+ dump->u_debugreg[i] = 0;
+ }
+ }
+
+ dump->u_debugreg[4] = 0;
+ dump->u_debugreg[5] = 0;
+ dump->u_debugreg[6] = current->thread.debugreg6;
+
+ dump->u_debugreg[7] = dr7;
+}
+EXPORT_SYMBOL_GPL(aout_dump_debugregs);
+
+/*
+ * Release the user breakpoints used by ptrace
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+ int i;
+ struct thread_struct *t = &tsk->thread;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ unregister_hw_breakpoint(t->ptrace_bps[i]);
+ t->ptrace_bps[i] = NULL;
+ }
+}
+
+void hw_breakpoint_restore(void)
+{
+ set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
+ set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
+ set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
+ set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
+ set_debugreg(current->thread.debugreg6, 6);
+ set_debugreg(__get_cpu_var(cpu_dr7), 7);
+}
+EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
+
+/*
+ * Handle debug exception notifications.
+ *
+ * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
+ *
+ * NOTIFY_DONE returned if one of the following conditions is true.
+ * i) When the causative address is from user-space and the exception
+ * is a valid one, i.e. not triggered as a result of lazy debug register
+ * switching
+ * ii) When there are more bits than trap set in DR6 register (such
+ * as BD, BS or BT) indicating that more than one debug condition is
+ * met and requires some more action in do_debug().
+ *
+ * NOTIFY_STOP returned for all other cases
+ *
+ */
+static int __kprobes hw_breakpoint_handler(struct die_args *args)
+{
+ int i, cpu, rc = NOTIFY_STOP;
+ struct perf_event *bp;
+ unsigned long dr7, dr6;
+ unsigned long *dr6_p;
+
+ /* The DR6 value is pointed by args->err */
+ dr6_p = (unsigned long *)ERR_PTR(args->err);
+ dr6 = *dr6_p;
+
+ /* Do an early return if no trap bits are set in DR6 */
+ if ((dr6 & DR_TRAP_BITS) == 0)
+ return NOTIFY_DONE;
+
+ get_debugreg(dr7, 7);
+ /* Disable breakpoints during exception handling */
+ set_debugreg(0UL, 7);
+ /*
+ * Assert that local interrupts are disabled
+ * Reset the DRn bits in the virtualized register value.
+ * The ptrace trigger routine will add in whatever is needed.
+ */
+ current->thread.debugreg6 &= ~DR_TRAP_BITS;
+ cpu = get_cpu();
+
+ /* Handle all the breakpoints that were triggered */
+ for (i = 0; i < HBP_NUM; ++i) {
+ if (likely(!(dr6 & (DR_TRAP0 << i))))
+ continue;
+
+ /*
+ * The counter may be concurrently released but that can only
+ * occur from a call_rcu() path. We can then safely fetch
+ * the breakpoint, use its callback, touch its counter
+ * while we are in an rcu_read_lock() path.
+ */
+ rcu_read_lock();
+
+ bp = per_cpu(bp_per_reg[i], cpu);
+ if (bp)
+ rc = NOTIFY_DONE;
+ /*
+ * Reset the 'i'th TRAP bit in dr6 to denote completion of
+ * exception handling
+ */
+ (*dr6_p) &= ~(DR_TRAP0 << i);
+ /*
+ * bp can be NULL due to lazy debug register switching
+ * or due to concurrent perf counter removing.
+ */
+ if (!bp) {
+ rcu_read_unlock();
+ break;
+ }
+
+ (bp->callback)(bp, args->regs);
+
+ rcu_read_unlock();
+ }
+ if (dr6 & (~DR_TRAP_BITS))
+ rc = NOTIFY_DONE;
+
+ set_debugreg(dr7, 7);
+ put_cpu();
+
+ return rc;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_exceptions_notify(
+ struct notifier_block *unused, unsigned long val, void *data)
+{
+ if (val != DIE_DEBUG)
+ return NOTIFY_DONE;
+
+ return hw_breakpoint_handler(data);
+}
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+ /* TODO */
+}
+
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
+{
+ /* TODO */
+}
diff --git a/trunk/arch/x86/kernel/irq.c b/trunk/arch/x86/kernel/irq.c
index 04bbd5278568..fee6cc2b2079 100644
--- a/trunk/arch/x86/kernel/irq.c
+++ b/trunk/arch/x86/kernel/irq.c
@@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
seq_printf(p, " TLB shootdowns\n");
#endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
seq_printf(p, "%*s: ", prec, "TRM");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
seq_printf(p, " Thermal event interrupts\n");
-# ifdef CONFIG_X86_MCE_THRESHOLD
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
seq_printf(p, "%*s: ", prec, "THR");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
seq_printf(p, " Threshold APIC interrupts\n");
-# endif
#endif
#ifdef CONFIG_X86_MCE
seq_printf(p, "%*s: ", prec, "MCE");
@@ -194,11 +194,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->irq_call_count;
sum += irq_stats(cpu)->irq_tlb_count;
#endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
sum += irq_stats(cpu)->irq_thermal_count;
-# ifdef CONFIG_X86_MCE_THRESHOLD
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
sum += irq_stats(cpu)->irq_threshold_count;
-# endif
#endif
#ifdef CONFIG_X86_MCE
sum += per_cpu(mce_exception_count, cpu);
@@ -274,3 +274,93 @@ void smp_generic_interrupt(struct pt_regs *regs)
}
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
+
+#ifdef CONFIG_HOTPLUG_CPU
+/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
+void fixup_irqs(void)
+{
+ unsigned int irq, vector;
+ static int warned;
+ struct irq_desc *desc;
+
+ for_each_irq_desc(irq, desc) {
+ int break_affinity = 0;
+ int set_affinity = 1;
+ const struct cpumask *affinity;
+
+ if (!desc)
+ continue;
+ if (irq == 2)
+ continue;
+
+ /* interrupt's are disabled at this point */
+ spin_lock(&desc->lock);
+
+ affinity = desc->affinity;
+ if (!irq_has_action(irq) ||
+ cpumask_equal(affinity, cpu_online_mask)) {
+ spin_unlock(&desc->lock);
+ continue;
+ }
+
+ /*
+ * Complete the irq move. This cpu is going down and for
+ * non intr-remapping case, we can't wait till this interrupt
+ * arrives at this cpu before completing the irq move.
+ */
+ irq_force_complete_move(irq);
+
+ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+ break_affinity = 1;
+ affinity = cpu_all_mask;
+ }
+
+ if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask)
+ desc->chip->mask(irq);
+
+ if (desc->chip->set_affinity)
+ desc->chip->set_affinity(irq, affinity);
+ else if (!(warned++))
+ set_affinity = 0;
+
+ if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
+ desc->chip->unmask(irq);
+
+ spin_unlock(&desc->lock);
+
+ if (break_affinity && set_affinity)
+ printk("Broke affinity for irq %i\n", irq);
+ else if (!set_affinity)
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+ /*
+ * We can remove mdelay() and then send spuriuous interrupts to
+ * new cpu targets for all the irqs that were handled previously by
+ * this cpu. While it works, I have seen spurious interrupt messages
+ * (nothing wrong but still...).
+ *
+ * So for now, retain mdelay(1) and check the IRR and then send those
+ * interrupts to new targets as this cpu is already offlined...
+ */
+ mdelay(1);
+
+ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+ unsigned int irr;
+
+ if (__get_cpu_var(vector_irq)[vector] < 0)
+ continue;
+
+ irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+ if (irr & (1 << (vector % 32))) {
+ irq = __get_cpu_var(vector_irq)[vector];
+
+ desc = irq_to_desc(irq);
+ spin_lock(&desc->lock);
+ if (desc->chip->retrigger)
+ desc->chip->retrigger(irq);
+ spin_unlock(&desc->lock);
+ }
+ }
+}
+#endif
diff --git a/trunk/arch/x86/kernel/irq_32.c b/trunk/arch/x86/kernel/irq_32.c
index 7d35d0fe2329..10709f29d166 100644
--- a/trunk/arch/x86/kernel/irq_32.c
+++ b/trunk/arch/x86/kernel/irq_32.c
@@ -211,48 +211,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
return true;
}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
-void fixup_irqs(void)
-{
- unsigned int irq;
- struct irq_desc *desc;
-
- for_each_irq_desc(irq, desc) {
- const struct cpumask *affinity;
-
- if (!desc)
- continue;
- if (irq == 2)
- continue;
-
- affinity = desc->affinity;
- if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
- printk("Breaking affinity for irq %i\n", irq);
- affinity = cpu_all_mask;
- }
- if (desc->chip->set_affinity)
- desc->chip->set_affinity(irq, affinity);
- else if (desc->action)
- printk_once("Cannot set affinity for irq %i\n", irq);
- }
-
-#if 0
- barrier();
- /* Ingo Molnar says: "after the IO-APIC masks have been redirected
- [note the nop - the interrupt-enable boundary on x86 is two
- instructions from sti] - to flush out pending hardirqs and
- IPIs. After this point nothing is supposed to reach this CPU." */
- __asm__ __volatile__("sti; nop; cli");
- barrier();
-#else
- /* That doesn't seem sufficient. Give it 1ms. */
- local_irq_enable();
- mdelay(1);
- local_irq_disable();
-#endif
-}
-#endif
-
diff --git a/trunk/arch/x86/kernel/irq_64.c b/trunk/arch/x86/kernel/irq_64.c
index 977d8b43a0dd..acf8fbf8fbda 100644
--- a/trunk/arch/x86/kernel/irq_64.c
+++ b/trunk/arch/x86/kernel/irq_64.c
@@ -62,64 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
return true;
}
-#ifdef CONFIG_HOTPLUG_CPU
-/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
-void fixup_irqs(void)
-{
- unsigned int irq;
- static int warned;
- struct irq_desc *desc;
-
- for_each_irq_desc(irq, desc) {
- int break_affinity = 0;
- int set_affinity = 1;
- const struct cpumask *affinity;
-
- if (!desc)
- continue;
- if (irq == 2)
- continue;
-
- /* interrupt's are disabled at this point */
- spin_lock(&desc->lock);
-
- affinity = desc->affinity;
- if (!irq_has_action(irq) ||
- cpumask_equal(affinity, cpu_online_mask)) {
- spin_unlock(&desc->lock);
- continue;
- }
-
- if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
- break_affinity = 1;
- affinity = cpu_all_mask;
- }
-
- if (desc->chip->mask)
- desc->chip->mask(irq);
-
- if (desc->chip->set_affinity)
- desc->chip->set_affinity(irq, affinity);
- else if (!(warned++))
- set_affinity = 0;
-
- if (desc->chip->unmask)
- desc->chip->unmask(irq);
-
- spin_unlock(&desc->lock);
-
- if (break_affinity && set_affinity)
- printk("Broke affinity for irq %i\n", irq);
- else if (!set_affinity)
- printk("Cannot set affinity for irq %i\n", irq);
- }
-
- /* That doesn't seem sufficient. Give it 1ms. */
- local_irq_enable();
- mdelay(1);
- local_irq_disable();
-}
-#endif
extern void call_softirq(void);
diff --git a/trunk/arch/x86/kernel/kgdb.c b/trunk/arch/x86/kernel/kgdb.c
index 8d82a77a3f3b..20a5b3689463 100644
--- a/trunk/arch/x86/kernel/kgdb.c
+++ b/trunk/arch/x86/kernel/kgdb.c
@@ -43,6 +43,7 @@
#include
#include
+#include
#include
#include
@@ -88,7 +89,6 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs[GDB_SS] = __KERNEL_DS;
gdb_regs[GDB_FS] = 0xFFFF;
gdb_regs[GDB_GS] = 0xFFFF;
- gdb_regs[GDB_SP] = (int)®s->sp;
#else
gdb_regs[GDB_R8] = regs->r8;
gdb_regs[GDB_R9] = regs->r9;
@@ -101,8 +101,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs32[GDB_PS] = regs->flags;
gdb_regs32[GDB_CS] = regs->cs;
gdb_regs32[GDB_SS] = regs->ss;
- gdb_regs[GDB_SP] = regs->sp;
#endif
+ gdb_regs[GDB_SP] = kernel_stack_pointer(regs);
}
/**
@@ -434,6 +434,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
"resuming...\n");
kgdb_arch_handle_exception(args->trapnr, args->signr,
args->err, "c", "", regs);
+ /*
+ * Reset the BS bit in dr6 (pointed by args->err) to
+ * denote completion of processing
+ */
+ (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
return NOTIFY_STOP;
}
diff --git a/trunk/arch/x86/kernel/kprobes.c b/trunk/arch/x86/kernel/kprobes.c
index 7b5169d2b000..1f3186ce213c 100644
--- a/trunk/arch/x86/kernel/kprobes.c
+++ b/trunk/arch/x86/kernel/kprobes.c
@@ -48,31 +48,22 @@
#include
#include
#include
+#include
#include
#include
#include
#include
#include
+#include
+#include
void jprobe_return_end(void);
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
-#ifdef CONFIG_X86_64
-#define stack_addr(regs) ((unsigned long *)regs->sp)
-#else
-/*
- * "®s->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs
- * don't save the ss and esp registers if the CPU is already in kernel
- * mode when it traps. So for kprobes, regs->sp and regs->ss are not
- * the [nonexistent] saved stack pointer and ss register, but rather
- * the top 8 bytes of the pre-int3 stack. So ®s->sp happens to
- * point to the top of the pre-int3 stack.
- */
-#define stack_addr(regs) ((unsigned long *)®s->sp)
-#endif
+#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs))
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
@@ -106,50 +97,6 @@ static const u32 twobyte_is_boostable[256 / 32] = {
/* ----------------------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
-static const u32 onebyte_has_modrm[256 / 32] = {
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* ----------------------------------------------- */
- W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
- W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
- W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
- W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
- W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
- W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
- W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
- W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
- W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
- W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
- W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
- W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
- W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
- W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
- W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
- W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */
- /* ----------------------------------------------- */
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
-};
-static const u32 twobyte_has_modrm[256 / 32] = {
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* ----------------------------------------------- */
- W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
- W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
- W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
- W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
- W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
- W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
- W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
- W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
- W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
- W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
- W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
- W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
- W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
- W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
- W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
- W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */
- /* ----------------------------------------------- */
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
-};
#undef W
struct kretprobe_blackpoint kretprobe_blacklist[] = {
@@ -244,6 +191,75 @@ static int __kprobes can_boost(kprobe_opcode_t *opcodes)
}
}
+/* Recover the probed instruction at addr for further analysis. */
+static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+{
+ struct kprobe *kp;
+ kp = get_kprobe((void *)addr);
+ if (!kp)
+ return -EINVAL;
+
+ /*
+ * Basically, kp->ainsn.insn has an original instruction.
+ * However, RIP-relative instruction can not do single-stepping
+ * at different place, fix_riprel() tweaks the displacement of
+ * that instruction. In that case, we can't recover the instruction
+ * from the kp->ainsn.insn.
+ *
+ * On the other hand, kp->opcode has a copy of the first byte of
+ * the probed instruction, which is overwritten by int3. And
+ * the instruction at kp->addr is not modified by kprobes except
+ * for the first byte, we can recover the original instruction
+ * from it and kp->opcode.
+ */
+ memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ buf[0] = kp->opcode;
+ return 0;
+}
+
+/* Dummy buffers for kallsyms_lookup */
+static char __dummy_buf[KSYM_NAME_LEN];
+
+/* Check if paddr is at an instruction boundary */
+static int __kprobes can_probe(unsigned long paddr)
+{
+ int ret;
+ unsigned long addr, offset = 0;
+ struct insn insn;
+ kprobe_opcode_t buf[MAX_INSN_SIZE];
+
+ if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
+ return 0;
+
+ /* Decode instructions */
+ addr = paddr - offset;
+ while (addr < paddr) {
+ kernel_insn_init(&insn, (void *)addr);
+ insn_get_opcode(&insn);
+
+ /*
+ * Check if the instruction has been modified by another
+ * kprobe, in which case we replace the breakpoint by the
+ * original instruction in our buffer.
+ */
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
+ ret = recover_probed_instruction(buf, addr);
+ if (ret)
+ /*
+ * Another debugging subsystem might insert
+ * this breakpoint. In that case, we can't
+ * recover it.
+ */
+ return 0;
+ kernel_insn_init(&insn, buf);
+ }
+ insn_get_length(&insn);
+ addr += insn.length;
+ }
+
+ return (addr == paddr);
+}
+
/*
* Returns non-zero if opcode modifies the interrupt flag.
*/
@@ -277,68 +293,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
static void __kprobes fix_riprel(struct kprobe *p)
{
#ifdef CONFIG_X86_64
- u8 *insn = p->ainsn.insn;
- s64 disp;
- int need_modrm;
-
- /* Skip legacy instruction prefixes. */
- while (1) {
- switch (*insn) {
- case 0x66:
- case 0x67:
- case 0x2e:
- case 0x3e:
- case 0x26:
- case 0x64:
- case 0x65:
- case 0x36:
- case 0xf0:
- case 0xf3:
- case 0xf2:
- ++insn;
- continue;
- }
- break;
- }
+ struct insn insn;
+ kernel_insn_init(&insn, p->ainsn.insn);
- /* Skip REX instruction prefix. */
- if (is_REX_prefix(insn))
- ++insn;
-
- if (*insn == 0x0f) {
- /* Two-byte opcode. */
- ++insn;
- need_modrm = test_bit(*insn,
- (unsigned long *)twobyte_has_modrm);
- } else
- /* One-byte opcode. */
- need_modrm = test_bit(*insn,
- (unsigned long *)onebyte_has_modrm);
-
- if (need_modrm) {
- u8 modrm = *++insn;
- if ((modrm & 0xc7) == 0x05) {
- /* %rip+disp32 addressing mode */
- /* Displacement follows ModRM byte. */
- ++insn;
- /*
- * The copied instruction uses the %rip-relative
- * addressing mode. Adjust the displacement for the
- * difference between the original location of this
- * instruction and the location of the copy that will
- * actually be run. The tricky bit here is making sure
- * that the sign extension happens correctly in this
- * calculation, since we need a signed 32-bit result to
- * be sign-extended to 64 bits when it's added to the
- * %rip value and yield the same 64-bit result that the
- * sign-extension of the original signed 32-bit
- * displacement would have given.
- */
- disp = (u8 *) p->addr + *((s32 *) insn) -
- (u8 *) p->ainsn.insn;
- BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
- *(s32 *)insn = (s32) disp;
- }
+ if (insn_rip_relative(&insn)) {
+ s64 newdisp;
+ u8 *disp;
+ insn_get_displacement(&insn);
+ /*
+ * The copied instruction uses the %rip-relative addressing
+ * mode. Adjust the displacement for the difference between
+ * the original location of this instruction and the location
+ * of the copy that will actually be run. The tricky bit here
+ * is making sure that the sign extension happens correctly in
+ * this calculation, since we need a signed 32-bit result to
+ * be sign-extended to 64 bits when it's added to the %rip
+ * value and yield the same 64-bit result that the sign-
+ * extension of the original signed 32-bit displacement would
+ * have given.
+ */
+ newdisp = (u8 *) p->addr + (s64) insn.displacement.value -
+ (u8 *) p->ainsn.insn;
+ BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
+ disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn);
+ *(s32 *) disp = (s32) newdisp;
}
#endif
}
@@ -359,6 +337,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
+ if (!can_probe((unsigned long)p->addr))
+ return -EILSEQ;
/* insn: must be on special executable page on x86. */
p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn)
@@ -472,17 +452,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
{
switch (kcb->kprobe_status) {
case KPROBE_HIT_SSDONE:
-#ifdef CONFIG_X86_64
- /* TODO: Provide re-entrancy from post_kprobes_handler() and
- * avoid exception stack corruption while single-stepping on
- * the instruction of the new probe.
- */
- arch_disarm_kprobe(p);
- regs->ip = (unsigned long)p->addr;
- reset_current_kprobe();
- preempt_enable_no_resched();
- break;
-#endif
case KPROBE_HIT_ACTIVE:
save_previous_kprobe(kcb);
set_current_kprobe(p, regs, kcb);
@@ -491,18 +460,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
kcb->kprobe_status = KPROBE_REENTER;
break;
case KPROBE_HIT_SS:
- if (p == kprobe_running()) {
- regs->flags &= ~X86_EFLAGS_TF;
- regs->flags |= kcb->kprobe_saved_flags;
- return 0;
- } else {
- /* A probe has been hit in the codepath leading up
- * to, or just after, single-stepping of a probed
- * instruction. This entire codepath should strictly
- * reside in .kprobes.text section. Raise a warning
- * to highlight this peculiar case.
- */
- }
+ /* A probe has been hit in the codepath leading up to, or just
+ * after, single-stepping of a probed instruction. This entire
+ * codepath should strictly reside in .kprobes.text section.
+ * Raise a BUG or we'll continue in an endless reentering loop
+ * and eventually a stack overflow.
+ */
+ printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
+ p->addr);
+ dump_kprobe(p);
+ BUG();
default:
/* impossible cases */
WARN_ON(1);
@@ -967,8 +934,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
ret = NOTIFY_STOP;
break;
case DIE_DEBUG:
- if (post_kprobe_handler(args->regs))
+ if (post_kprobe_handler(args->regs)) {
+ /*
+ * Reset the BS bit in dr6 (pointed by args->err) to
+ * denote completion of processing
+ */
+ (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
ret = NOTIFY_STOP;
+ }
break;
case DIE_GPF:
/*
diff --git a/trunk/arch/x86/kernel/machine_kexec_32.c b/trunk/arch/x86/kernel/machine_kexec_32.c
index c1c429d00130..c843f8406da2 100644
--- a/trunk/arch/x86/kernel/machine_kexec_32.c
+++ b/trunk/arch/x86/kernel/machine_kexec_32.c
@@ -25,6 +25,7 @@
#include
#include
#include
+#include
static void set_idt(void *newidt, __u16 limit)
{
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ hw_breakpoint_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
diff --git a/trunk/arch/x86/kernel/machine_kexec_64.c b/trunk/arch/x86/kernel/machine_kexec_64.c
index 84c3bf209e98..4a8bb82248ae 100644
--- a/trunk/arch/x86/kernel/machine_kexec_64.c
+++ b/trunk/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
#include
#include
#include
+#include
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
unsigned long addr)
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ hw_breakpoint_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
diff --git a/trunk/arch/x86/kernel/microcode_core.c b/trunk/arch/x86/kernel/microcode_core.c
index 378e9a8f1bf8..2bcad3926edb 100644
--- a/trunk/arch/x86/kernel/microcode_core.c
+++ b/trunk/arch/x86/kernel/microcode_core.c
@@ -73,7 +73,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -201,7 +200,6 @@ static int do_microcode_update(const void __user *buf, size_t size)
static int microcode_open(struct inode *unused1, struct file *unused2)
{
- cycle_kernel_lock();
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
diff --git a/trunk/arch/x86/kernel/msr.c b/trunk/arch/x86/kernel/msr.c
index 6a3cefc7dda1..553449951b84 100644
--- a/trunk/arch/x86/kernel/msr.c
+++ b/trunk/arch/x86/kernel/msr.c
@@ -174,21 +174,17 @@ static int msr_open(struct inode *inode, struct file *file)
{
unsigned int cpu = iminor(file->f_path.dentry->d_inode);
struct cpuinfo_x86 *c = &cpu_data(cpu);
- int ret = 0;
- lock_kernel();
cpu = iminor(file->f_path.dentry->d_inode);
- if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
- ret = -ENXIO; /* No such CPU */
- goto out;
- }
+ if (cpu >= nr_cpu_ids || !cpu_online(cpu))
+ return -ENXIO; /* No such CPU */
+
c = &cpu_data(cpu);
if (!cpu_has(c, X86_FEATURE_MSR))
- ret = -EIO; /* MSR not supported */
-out:
- unlock_kernel();
- return ret;
+ return -EIO; /* MSR not supported */
+
+ return 0;
}
/*
diff --git a/trunk/arch/x86/kernel/pci-calgary_64.c b/trunk/arch/x86/kernel/pci-calgary_64.c
index 971a3bec47a8..c563e4c8ff39 100644
--- a/trunk/arch/x86/kernel/pci-calgary_64.c
+++ b/trunk/arch/x86/kernel/pci-calgary_64.c
@@ -46,6 +46,7 @@
#include
#include
#include
+#include
#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
int use_calgary __read_mostly = 1;
@@ -244,7 +245,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
if (panic_on_overflow)
panic("Calgary: fix the allocator.\n");
else
- return bad_dma_address;
+ return DMA_ERROR_CODE;
}
}
@@ -260,12 +261,15 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
void *vaddr, unsigned int npages, int direction)
{
unsigned long entry;
- dma_addr_t ret = bad_dma_address;
+ dma_addr_t ret;
entry = iommu_range_alloc(dev, tbl, npages);
- if (unlikely(entry == bad_dma_address))
- goto error;
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
+ "iommu %p\n", npages, tbl);
+ return DMA_ERROR_CODE;
+ }
/* set the return dma address */
ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK);
@@ -273,13 +277,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
/* put the TCEs in the HW table */
tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
direction);
-
return ret;
-
-error:
- printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
- "iommu %p\n", npages, tbl);
- return bad_dma_address;
}
static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
@@ -290,8 +288,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned long flags;
/* were we called with bad_dma_address? */
- badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE);
- if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) {
+ badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE);
+ if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) {
WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
"address 0x%Lx\n", dma_addr);
return;
@@ -318,13 +316,15 @@ static inline struct iommu_table *find_iommu_table(struct device *dev)
pdev = to_pci_dev(dev);
+ /* search up the device tree for an iommu */
pbus = pdev->bus;
-
- /* is the device behind a bridge? Look for the root bus */
- while (pbus->parent)
+ do {
+ tbl = pci_iommu(pbus);
+ if (tbl && tbl->it_busno == pbus->number)
+ break;
+ tbl = NULL;
pbus = pbus->parent;
-
- tbl = pci_iommu(pbus);
+ } while (pbus);
BUG_ON(tbl && (tbl->it_busno != pbus->number));
@@ -373,7 +373,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE);
entry = iommu_range_alloc(dev, tbl, npages);
- if (entry == bad_dma_address) {
+ if (entry == DMA_ERROR_CODE) {
/* makes sure unmap knows to stop */
s->dma_length = 0;
goto error;
@@ -391,7 +391,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
error:
calgary_unmap_sg(dev, sg, nelems, dir, NULL);
for_each_sg(sg, s, nelems, i) {
- sg->dma_address = bad_dma_address;
+ sg->dma_address = DMA_ERROR_CODE;
sg->dma_length = 0;
}
return 0;
@@ -446,7 +446,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
/* set up tces to cover the allocated range */
mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
- if (mapping == bad_dma_address)
+ if (mapping == DMA_ERROR_CODE)
goto free;
*dma_handle = mapping;
return ret;
@@ -727,7 +727,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev)
struct iommu_table *tbl = pci_iommu(dev->bus);
/* reserve EMERGENCY_PAGES from bad_dma_address and up */
- iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES);
+ iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES);
/* avoid the BIOS/VGA first 640KB-1MB region */
/* for CalIOC2 - avoid the entire first MB */
@@ -1344,6 +1344,23 @@ static void __init get_tce_space_from_tar(void)
return;
}
+static int __init calgary_iommu_init(void)
+{
+ int ret;
+
+ /* ok, we're trying to use Calgary - let's roll */
+ printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
+
+ ret = calgary_init();
+ if (ret) {
+ printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
+ "falling back to no_iommu\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
void __init detect_calgary(void)
{
int bus;
@@ -1357,7 +1374,7 @@ void __init detect_calgary(void)
* if the user specified iommu=off or iommu=soft or we found
* another HW IOMMU already, bail out.
*/
- if (swiotlb || no_iommu || iommu_detected)
+ if (no_iommu || iommu_detected)
return;
if (!use_calgary)
@@ -1442,9 +1459,7 @@ void __init detect_calgary(void)
printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
specified_table_size);
- /* swiotlb for devices that aren't behind the Calgary. */
- if (max_pfn > MAX_DMA32_PFN)
- swiotlb = 1;
+ x86_init.iommu.iommu_init = calgary_iommu_init;
}
return;
@@ -1457,35 +1472,6 @@ void __init detect_calgary(void)
}
}
-int __init calgary_iommu_init(void)
-{
- int ret;
-
- if (no_iommu || (swiotlb && !calgary_detected))
- return -ENODEV;
-
- if (!calgary_detected)
- return -ENODEV;
-
- /* ok, we're trying to use Calgary - let's roll */
- printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
-
- ret = calgary_init();
- if (ret) {
- printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
- "falling back to no_iommu\n", ret);
- return ret;
- }
-
- force_iommu = 1;
- bad_dma_address = 0x0;
- /* dma_ops is set to swiotlb or nommu */
- if (!dma_ops)
- dma_ops = &nommu_dma_ops;
-
- return 0;
-}
-
static int __init calgary_parse_options(char *p)
{
unsigned int bridge;
diff --git a/trunk/arch/x86/kernel/pci-dma.c b/trunk/arch/x86/kernel/pci-dma.c
index a6e804d16c35..afcc58b69c7c 100644
--- a/trunk/arch/x86/kernel/pci-dma.c
+++ b/trunk/arch/x86/kernel/pci-dma.c
@@ -11,10 +11,11 @@
#include
#include