diff --git a/.mailmap b/.mailmap index 17dd8eb2630e6..ae0adc499f4ac 100644 --- a/.mailmap +++ b/.mailmap @@ -142,13 +142,17 @@ Boris Brezillon Boris Brezillon Brendan Higgins Brian Avery +Brian Cain +Brian Cain Brian King Brian Silverman Bryan Tan Cai Huoqing Can Guo Carl Huang -Carlos Bilbao +Carlos Bilbao +Carlos Bilbao +Carlos Bilbao Changbin Du Changbin Du Chao Yu @@ -165,6 +169,7 @@ Christian Brauner Christian Brauner Christian Marangi Christophe Ricard +Christopher Obbard Christoph Hellwig Chuck Lever Chuck Lever @@ -261,6 +266,7 @@ Guo Ren Guru Das Srinagesh Gustavo Padovan Gustavo Padovan +Hamza Mahfooz Hanjun Guo Hans Verkuil Hans Verkuil @@ -761,6 +767,7 @@ Wolfram Sang Yakir Yang Yanteng Si Ying Huang +Yosry Ahmed Yusuke Goda Zack Rusin Zhu Yanjun diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 955fbcd19ce90..f273ea15a8e83 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -293,3 +293,13 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED`: Misaligned vector accesses are not supported at all and will generate a misaligned address fault. + +* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0`: A bitmask containing the + thead vendor extensions that are compatible with the + :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior. + + * T-HEAD + + * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR`: The xtheadvector vendor + extension is supported in the T-Head ISA extensions spec starting from + commit a18c801634 ("Add T-Head VECTOR vendor extension. "). diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml index 3713175548637..0db2cbd7891ff 100644 --- a/Documentation/devicetree/bindings/arm/fsl.yaml +++ b/Documentation/devicetree/bindings/arm/fsl.yaml @@ -1091,6 +1091,7 @@ properties: - dmo,imx8mp-data-modul-edm-sbc # i.MX8MP eDM SBC - emcraft,imx8mp-navqp # i.MX8MP Emcraft Systems NavQ+ Kit - fsl,imx8mp-evk # i.MX8MP EVK Board + - fsl,imx8mp-evk-revb4 # i.MX8MP EVK Rev B4 Board - gateworks,imx8mp-gw71xx-2x # i.MX8MP Gateworks Board - gateworks,imx8mp-gw72xx-2x # i.MX8MP Gateworks Board - gateworks,imx8mp-gw73xx-2x # i.MX8MP Gateworks Board @@ -1271,6 +1272,7 @@ properties: items: - enum: - fsl,imx8qm-mek # i.MX8QM MEK Board + - fsl,imx8qm-mek-revd # i.MX8QM MEK Rev D Board - toradex,apalis-imx8 # Apalis iMX8 Modules - toradex,apalis-imx8-v1.1 # Apalis iMX8 V1.1 Modules - const: fsl,imx8qm @@ -1299,6 +1301,7 @@ properties: - enum: - einfochips,imx8qxp-ai_ml # i.MX8QXP AI_ML Board - fsl,imx8qxp-mek # i.MX8QXP MEK Board + - fsl,imx8qxp-mek-wcpu # i.MX8QXP MEK WCPU Board - const: fsl,imx8qxp - description: i.MX8DXL based Boards diff --git a/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml b/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml index b2adc71741770..dca16e202da99 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml @@ -14,9 +14,8 @@ allOf: description: | The Microchip LAN966x outband interrupt controller (OIC) maps the internal - interrupt sources of the LAN966x device to an external interrupt. - When the LAN966x device is used as a PCI device, the external interrupt is - routed to the PCI interrupt. + interrupt sources of the LAN966x device to a PCI interrupt when the LAN966x + device is used as a PCI device. properties: compatible: diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index acb5b9ba6f049..2c72f148a74b0 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -26,6 +26,18 @@ description: | allOf: - $ref: /schemas/cpu.yaml# - $ref: extensions.yaml + - if: + not: + properties: + compatible: + contains: + enum: + - thead,c906 + - thead,c910 + - thead,c920 + then: + properties: + thead,vlenb: false properties: compatible: @@ -96,6 +108,13 @@ properties: description: The blocksize in bytes for the Zicboz cache operations. + thead,vlenb: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + VLEN/8, the vector register length in bytes. This property is required on + thead systems where the vector register length is not identical on all harts, or + the vlenb CSR is not available. + # RISC-V has multiple properties for cache op block sizes as the sizes # differ between individual CBO extensions cache-op-block-size: false diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index 9c7dd7e75e0ca..a63b994e07636 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -621,6 +621,10 @@ properties: latency, as ratified in commit 56ed795 ("Update riscv-crypto-spec-vector.adoc") of riscv-crypto. + # vendor extensions, each extension sorted alphanumerically under the + # vendor they belong to. Vendors are sorted alphanumerically as well. + + # Andes - const: xandespmu description: The Andes Technology performance monitor extension for counter overflow @@ -628,6 +632,12 @@ properties: Registers in the AX45MP datasheet. https://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf + # T-HEAD + - const: xtheadvector + description: + The T-HEAD specific 0.7.1 vector implementation as written in + https://github.com/T-head-Semi/thead-extension-spec/blob/95358cb2cca9489361c61d335e03d3134b14133f/xtheadvector.adoc. + allOf: # Zcb depends on Zca - if: diff --git a/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml b/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml index a14b52178c4b0..2599b847f406c 100644 --- a/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml +++ b/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml @@ -14,9 +14,13 @@ maintainers: properties: compatible: - enum: - - fsl,imx1-rtc - - fsl,imx21-rtc + oneOf: + - const: fsl,imx1-rtc + - const: fsl,imx21-rtc + - items: + - enum: + - fsl,imx31-rtc + - const: fsl,imx21-rtc reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml b/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml index 5aa00617291c9..1f0e6787a7464 100644 --- a/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml +++ b/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/ti,pcm1681.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments PCM1681 8-channel PWM Processor +title: Texas Instruments PCM1681 8-channel Digital-to-Analog Converter maintainers: - Shenghao Ding diff --git a/Documentation/kbuild/gendwarfksyms.rst b/Documentation/kbuild/gendwarfksyms.rst new file mode 100644 index 0000000000000..e4beaae7e456c --- /dev/null +++ b/Documentation/kbuild/gendwarfksyms.rst @@ -0,0 +1,308 @@ +======================= +DWARF module versioning +======================= + +1. Introduction +=============== + +When CONFIG_MODVERSIONS is enabled, symbol versions for modules +are typically calculated from preprocessed source code using the +**genksyms** tool. However, this is incompatible with languages such +as Rust, where the source code has insufficient information about +the resulting ABI. With CONFIG_GENDWARFKSYMS (and CONFIG_DEBUG_INFO) +selected, **gendwarfksyms** is used instead to calculate symbol versions +from the DWARF debugging information, which contains the necessary +details about the final module ABI. + +1.1. Usage +========== + +gendwarfksyms accepts a list of object files on the command line, and a +list of symbol names (one per line) in standard input:: + + Usage: gendwarfksyms [options] elf-object-file ... < symbol-list + + Options: + -d, --debug Print debugging information + --dump-dies Dump DWARF DIE contents + --dump-die-map Print debugging information about die_map changes + --dump-types Dump type strings + --dump-versions Dump expanded type strings used for symbol versions + -s, --stable Support kABI stability features + -T, --symtypes file Write a symtypes file + -h, --help Print this message + + +2. Type information availability +================================ + +While symbols are typically exported in the same translation unit (TU) +where they're defined, it's also perfectly fine for a TU to export +external symbols. For example, this is done when calculating symbol +versions for exports in stand-alone assembly code. + +To ensure the compiler emits the necessary DWARF type information in the +TU where symbols are actually exported, gendwarfksyms adds a pointer +to exported symbols in the `EXPORT_SYMBOL()` macro using the following +macro:: + + #define __GENDWARFKSYMS_EXPORT(sym) \ + static typeof(sym) *__gendwarfksyms_ptr_##sym __used \ + __section(".discard.gendwarfksyms") = &sym; + + +When a symbol pointer is found in DWARF, gendwarfksyms can use its +type for calculating symbol versions even if the symbol is defined +elsewhere. The name of the symbol pointer is expected to start with +`__gendwarfksyms_ptr_`, followed by the name of the exported symbol. + +3. Symtypes output format +========================= + +Similarly to genksyms, gendwarfksyms supports writing a symtypes +file for each processed object that contain types for exported +symbols and each referenced type that was used in calculating symbol +versions. These files can be useful when trying to determine what +exactly caused symbol versions to change between builds. To generate +symtypes files during a kernel build, set `KBUILD_SYMTYPES=1`. + +Matching the existing format, the first column of each line contains +either a type reference or a symbol name. Type references have a +one-letter prefix followed by "#" and the name of the type. Four +reference types are supported:: + + e# = enum + s# = struct + t# = typedef + u# = union + +Type names with spaces in them are wrapped in single quotes, e.g.:: + + s#'core::result::Result' + +The rest of the line contains a type string. Unlike with genksyms that +produces C-style type strings, gendwarfksyms uses the same simple parsed +DWARF format produced by **--dump-dies**, but with type references +instead of fully expanded strings. + +4. Maintaining a stable kABI +============================ + +Distribution maintainers often need the ability to make ABI compatible +changes to kernel data structures due to LTS updates or backports. Using +the traditional `#ifndef __GENKSYMS__` to hide these changes from symbol +versioning won't work when processing object files. To support this +use case, gendwarfksyms provides kABI stability features designed to +hide changes that won't affect the ABI when calculating versions. These +features are all gated behind the **--stable** command line flag and are +not used in the mainline kernel. To use stable features during a kernel +build, set `KBUILD_GENDWARFKSYMS_STABLE=1`. + +Examples for using these features are provided in the +**scripts/gendwarfksyms/examples** directory, including helper macros +for source code annotation. Note that as these features are only used to +transform the inputs for symbol versioning, the user is responsible for +ensuring that their changes actually won't break the ABI. + +4.1. kABI rules +=============== + +kABI rules allow distributions to fine-tune certain parts +of gendwarfksyms output and thus control how symbol +versions are calculated. These rules are defined in the +`.discard.gendwarfksyms.kabi_rules` section of the object file and +consist of simple null-terminated strings with the following structure:: + + version\0type\0target\0value\0 + +This string sequence is repeated as many times as needed to express all +the rules. The fields are as follows: + +- `version`: Ensures backward compatibility for future changes to the + structure. Currently expected to be "1". +- `type`: Indicates the type of rule being applied. +- `target`: Specifies the target of the rule, typically the fully + qualified name of the DWARF Debugging Information Entry (DIE). +- `value`: Provides rule-specific data. + +The following helper macro, for example, can be used to specify rules +in the source code:: + + #define __KABI_RULE(hint, target, value) \ + static const char __PASTE(__gendwarfksyms_rule_, \ + __COUNTER__)[] __used __aligned(1) \ + __section(".discard.gendwarfksyms.kabi_rules") = \ + "1\0" #hint "\0" #target "\0" #value + + +Currently, only the rules discussed in this section are supported, but +the format is extensible enough to allow further rules to be added as +need arises. + +4.1.1. Managing definition visibility +===================================== + +A declaration can change into a full definition when additional includes +are pulled into the translation unit. This changes the versions of any +symbol that references the type even if the ABI remains unchanged. As +it may not be possible to drop includes without breaking the build, the +`declonly` rule can be used to specify a type as declaration-only, even +if the debugging information contains the full definition. + +The rule fields are expected to be as follows: + +- `type`: "declonly" +- `target`: The fully qualified name of the target data structure + (as shown in **--dump-dies** output). +- `value`: This field is ignored. + +Using the `__KABI_RULE` macro, this rule can be defined as:: + + #define KABI_DECLONLY(fqn) __KABI_RULE(declonly, fqn, ) + +Example usage:: + + struct s { + /* definition */ + }; + + KABI_DECLONLY(s); + +4.1.2. Adding enumerators +========================= + +For enums, all enumerators and their values are included in calculating +symbol versions, which becomes a problem if we later need to add more +enumerators without changing symbol versions. The `enumerator_ignore` +rule allows us to hide named enumerators from the input. + +The rule fields are expected to be as follows: + +- `type`: "enumerator_ignore" +- `target`: The fully qualified name of the target enum + (as shown in **--dump-dies** output) and the name of the + enumerator field separated by a space. +- `value`: This field is ignored. + +Using the `__KABI_RULE` macro, this rule can be defined as:: + + #define KABI_ENUMERATOR_IGNORE(fqn, field) \ + __KABI_RULE(enumerator_ignore, fqn field, ) + +Example usage:: + + enum e { + A, B, C, D, + }; + + KABI_ENUMERATOR_IGNORE(e, B); + KABI_ENUMERATOR_IGNORE(e, C); + +If the enum additionally includes an end marker and new values must +be added in the middle, we may need to use the old value for the last +enumerator when calculating versions. The `enumerator_value` rule allows +us to override the value of an enumerator for version calculation: + +- `type`: "enumerator_value" +- `target`: The fully qualified name of the target enum + (as shown in **--dump-dies** output) and the name of the + enumerator field separated by a space. +- `value`: Integer value used for the field. + +Using the `__KABI_RULE` macro, this rule can be defined as:: + + #define KABI_ENUMERATOR_VALUE(fqn, field, value) \ + __KABI_RULE(enumerator_value, fqn field, value) + +Example usage:: + + enum e { + A, B, C, LAST, + }; + + KABI_ENUMERATOR_IGNORE(e, C); + KABI_ENUMERATOR_VALUE(e, LAST, 2); + +4.3. Adding structure members +============================= + +Perhaps the most common ABI compatible change is adding a member to a +kernel data structure. When changes to a structure are anticipated, +distribution maintainers can pre-emptively reserve space in the +structure and take it into use later without breaking the ABI. If +changes are needed to data structures without reserved space, existing +alignment holes can potentially be used instead. While kABI rules could +be added for these type of changes, using unions is typically a more +natural method. This section describes gendwarfksyms support for using +reserved space in data structures and hiding members that don't change +the ABI when calculating symbol versions. + +4.3.1. Reserving space and replacing members +============================================ + +Space is typically reserved for later use by appending integer types, or +arrays, to the end of the data structure, but any type can be used. Each +reserved member needs a unique name, but as the actual purpose is usually +not known at the time the space is reserved, for convenience, names that +start with `__kabi_` are left out when calculating symbol versions:: + + struct s { + long a; + long __kabi_reserved_0; /* reserved for future use */ + }; + +The reserved space can be taken into use by wrapping the member in a +union, which includes the original type and the replacement member:: + + struct s { + long a; + union { + long __kabi_reserved_0; /* original type */ + struct b b; /* replaced field */ + }; + }; + +If the `__kabi_` naming scheme was used when reserving space, the name +of the first member of the union must start with `__kabi_reserved`. This +ensures the original type is used when calculating versions, but the name +is again left out. The rest of the union is ignored. + +If we're replacing a member that doesn't follow this naming convention, +we also need to preserve the original name to avoid changing versions, +which we can do by changing the first union member's name to start with +`__kabi_renamed` followed by the original name. + +The examples include `KABI_(RESERVE|USE|REPLACE)*` macros that help +simplify the process and also ensure the replacement member is correctly +aligned and its size won't exceed the reserved space. + +4.3.2. Hiding members +===================== + +Predicting which structures will require changes during the support +timeframe isn't always possible, in which case one might have to resort +to placing new members into existing alignment holes:: + + struct s { + int a; + /* a 4-byte alignment hole */ + unsigned long b; + }; + + +While this won't change the size of the data structure, one needs to +be able to hide the added members from symbol versioning. Similarly +to reserved fields, this can be accomplished by wrapping the added +member to a union where one of the fields has a name starting with +`__kabi_ignored`:: + + struct s { + int a; + union { + char __kabi_ignored_0; + int n; + }; + unsigned long b; + }; + +With **--stable**, both versions produce the same symbol version. diff --git a/Documentation/kbuild/index.rst b/Documentation/kbuild/index.rst index cee2f99f734b5..e82af05cd652c 100644 --- a/Documentation/kbuild/index.rst +++ b/Documentation/kbuild/index.rst @@ -21,6 +21,7 @@ Kernel Build System reproducible-builds gcc-plugins llvm + gendwarfksyms .. only:: subproject and html diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst index 101de236cd0c9..a42f00d8cb90f 100644 --- a/Documentation/kbuild/modules.rst +++ b/Documentation/kbuild/modules.rst @@ -423,6 +423,26 @@ Symbols From the Kernel (vmlinux + modules) 1) It lists all exported symbols from vmlinux and all modules. 2) It lists the CRC if CONFIG_MODVERSIONS is enabled. +Version Information Formats +--------------------------- + + Exported symbols have information stored in __ksymtab or __ksymtab_gpl + sections. Symbol names and namespaces are stored in __ksymtab_strings, + using a format similar to the string table used for ELF. If + CONFIG_MODVERSIONS is enabled, the CRCs corresponding to exported + symbols will be added to the __kcrctab or __kcrctab_gpl. + + If CONFIG_BASIC_MODVERSIONS is enabled (default with + CONFIG_MODVERSIONS), imported symbols will have their symbol name and + CRC stored in the __versions section of the importing module. This + mode only supports symbols of length up to 64 bytes. + + If CONFIG_EXTENDED_MODVERSIONS is enabled (required to enable both + CONFIG_MODVERSIONS and CONFIG_RUST at the same time), imported symbols + will have their symbol name recorded in the __version_ext_names + section as a series of concatenated, null-terminated strings. CRCs for + these symbols will be recorded in the __version_ext_crcs section. + Symbols and External Modules ---------------------------- diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 82b5e378eebff..a0beca805362d 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -59,7 +59,6 @@ iptables 1.4.2 iptables -V openssl & libcrypto 1.0.0 openssl version bc 1.06.95 bc --version Sphinx\ [#f1]_ 2.4.4 sphinx-build --version -cpio any cpio --version GNU tar 1.28 tar --version gtags (optional) 6.6.5 gtags --version mkimage (optional) 2017.01 mkimage --version @@ -536,11 +535,6 @@ mcelog - -cpio ----- - -- - Networking ********** diff --git a/Documentation/translations/sp_SP/index.rst b/Documentation/translations/sp_SP/index.rst index aae7018b0d1a2..2b50283e16089 100644 --- a/Documentation/translations/sp_SP/index.rst +++ b/Documentation/translations/sp_SP/index.rst @@ -7,7 +7,7 @@ Traducción al español \kerneldocCJKoff -:maintainer: Carlos Bilbao +:maintainer: Carlos Bilbao .. _sp_disclaimer: diff --git a/MAINTAINERS b/MAINTAINERS index d1086e53a3176..873aa2cce4d7f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1090,7 +1090,7 @@ F: drivers/video/fbdev/geode/ AMD HSMP DRIVER M: Naveen Krishna Chatradhi -R: Carlos Bilbao +R: Carlos Bilbao L: platform-driver-x86@vger.kernel.org S: Maintained F: Documentation/arch/x86/amd_hsmp.rst @@ -5857,7 +5857,7 @@ F: drivers/usb/atm/cxacru.c CONFIDENTIAL COMPUTING THREAT MODEL FOR X86 VIRTUALIZATION (SNP/TDX) M: Elena Reshetova -M: Carlos Bilbao +M: Carlos Bilbao S: Maintained F: Documentation/security/snp-tdx-threat-model.rst @@ -9641,6 +9641,13 @@ W: https://linuxtv.org T: git git://linuxtv.org/media.git F: drivers/media/radio/radio-gemtek* +GENDWARFKSYMS +M: Sami Tolvanen +L: linux-modules@vger.kernel.org +L: linux-kbuild@vger.kernel.org +S: Maintained +F: scripts/gendwarfksyms/ + GENERIC ARCHITECTURE TOPOLOGY M: Sudeep Holla L: linux-kernel@vger.kernel.org @@ -11324,7 +11331,7 @@ S: Orphan F: drivers/video/fbdev/imsttfb.c INDEX OF FURTHER KERNEL DOCUMENTATION -M: Carlos Bilbao +M: Carlos Bilbao S: Maintained F: Documentation/process/kernel-docs.rst @@ -16455,6 +16462,22 @@ F: include/net/dsa.h F: net/dsa/ F: tools/testing/selftests/drivers/net/dsa/ +NETWORKING [ETHTOOL] +M: Andrew Lunn +M: Jakub Kicinski +F: Documentation/netlink/specs/ethtool.yaml +F: Documentation/networking/ethtool-netlink.rst +F: include/linux/ethtool* +F: include/uapi/linux/ethtool* +F: net/ethtool/ +F: tools/testing/selftests/drivers/net/*/ethtool* + +NETWORKING [ETHTOOL CABLE TEST] +M: Andrew Lunn +F: net/ethtool/cabletest.c +F: tools/testing/selftests/drivers/net/*/ethtool* +K: cable_test + NETWORKING [GENERAL] M: "David S. Miller" M: Eric Dumazet @@ -16614,6 +16637,7 @@ F: tools/testing/selftests/net/mptcp/ NETWORKING [TCP] M: Eric Dumazet M: Neal Cardwell +R: Kuniyuki Iwashima L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/net_cachelines/tcp_sock.rst @@ -16641,6 +16665,31 @@ F: include/net/tls.h F: include/uapi/linux/tls.h F: net/tls/* +NETWORKING [SOCKETS] +M: Eric Dumazet +M: Kuniyuki Iwashima +M: Paolo Abeni +M: Willem de Bruijn +S: Maintained +F: include/linux/sock_diag.h +F: include/linux/socket.h +F: include/linux/sockptr.h +F: include/net/sock.h +F: include/net/sock_reuseport.h +F: include/uapi/linux/socket.h +F: net/core/*sock* +F: net/core/scm.c +F: net/socket.c + +NETWORKING [UNIX SOCKETS] +M: Kuniyuki Iwashima +S: Maintained +F: include/net/af_unix.h +F: include/net/netns/unix.h +F: include/uapi/linux/unix_diag.h +F: net/unix/ +F: tools/testing/selftests/net/af_unix/ + NETXEN (1/10) GbE SUPPORT M: Manish Chopra M: Rahul Verma @@ -17706,6 +17755,7 @@ L: netdev@vger.kernel.org L: dev@openvswitch.org S: Maintained W: http://openvswitch.org +F: Documentation/networking/openvswitch.rst F: include/uapi/linux/openvswitch.h F: net/openvswitch/ F: tools/testing/selftests/net/openvswitch/ @@ -19446,7 +19496,7 @@ F: drivers/misc/fastrpc.c F: include/uapi/misc/fastrpc.h QUALCOMM HEXAGON ARCHITECTURE -M: Brian Cain +M: Brian Cain L: linux-hexagon@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/bcain/linux.git @@ -22208,7 +22258,7 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/ F: drivers/media/dvb-frontends/sp2* SPANISH DOCUMENTATION -M: Carlos Bilbao +M: Carlos Bilbao R: Avadhut Naik S: Maintained F: Documentation/translations/sp_SP/ @@ -25732,11 +25782,13 @@ F: arch/x86/entry/vdso/ XARRAY M: Matthew Wilcox L: linux-fsdevel@vger.kernel.org +L: linux-mm@kvack.org S: Supported F: Documentation/core-api/xarray.rst F: include/linux/idr.h F: include/linux/xarray.h F: lib/idr.c +F: lib/test_xarray.c F: lib/xarray.c F: tools/testing/radix-tree @@ -26216,7 +26268,7 @@ K: zstd ZSWAP COMPRESSED SWAP CACHING M: Johannes Weiner -M: Yosry Ahmed +M: Yosry Ahmed M: Nhat Pham R: Chengming Zhou L: linux-mm@kvack.org diff --git a/Makefile b/Makefile index 4117cc79748bf..9e0d63d9d94b9 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 13 +PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Baby Opossum Posse # *DOCUMENTATION* diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 62da5827f471b..f27e6b90428e4 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -18,6 +18,7 @@ config ARC select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC select ARCH_32BIT_OFF_T select BUILDTIME_TABLE_SORT + select GENERIC_BUILTIN_DTB select CLONE_BACKWARDS select COMMON_CLK select DMA_DIRECT_REMAP @@ -550,11 +551,11 @@ config ARC_DBG_JUMP_LABEL part of static keys (jump labels) related code. endif -config ARC_BUILTIN_DTB_NAME +config BUILTIN_DTB_NAME string "Built in DTB" + default "nsim_700" help - Set the name of the DTB to embed in the vmlinux binary - Leaving it blank selects the "nsim_700" dtb. + Set the name of the DTB to embed in the vmlinux binary. endmenu # "ARC Architecture Configuration" diff --git a/arch/arc/Makefile b/arch/arc/Makefile index fb98478ed1ab0..0c5e6e6314f29 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -82,9 +82,6 @@ KBUILD_CFLAGS += $(cflags-y) KBUILD_AFLAGS += $(KBUILD_CFLAGS) KBUILD_LDFLAGS += $(ldflags-y) -# w/o this dtb won't embed into kernel binary -core-y += arch/arc/boot/dts/ - core-y += arch/arc/plat-sim/ core-$(CONFIG_ARC_PLAT_TB10X) += arch/arc/plat-tb10x/ core-$(CONFIG_ARC_PLAT_AXS10X) += arch/arc/plat-axs10x/ diff --git a/arch/arc/boot/dts/Makefile b/arch/arc/boot/dts/Makefile index 48704dfdf75cb..ee5664f0640d5 100644 --- a/arch/arc/boot/dts/Makefile +++ b/arch/arc/boot/dts/Makefile @@ -1,13 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -# Built-in dtb -builtindtb-y := nsim_700 -ifneq ($(CONFIG_ARC_BUILTIN_DTB_NAME),) - builtindtb-y := $(CONFIG_ARC_BUILTIN_DTB_NAME) -endif - -obj-y += $(builtindtb-y).dtb.o -dtb-y := $(builtindtb-y).dtb +dtb-y := $(addsuffix .dtb, $(CONFIG_BUILTIN_DTB_NAME)) # for CONFIG_OF_ALL_DTBS test dtb- := $(patsubst $(src)/%.dts,%.dtb, $(wildcard $(src)/*.dts)) diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 319bbe2703223..a7cd526dd7ca3 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -23,7 +23,7 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS101=y CONFIG_ARC_CACHE_LINE_SHIFT=5 -CONFIG_ARC_BUILTIN_DTB_NAME="axs101" +CONFIG_BUILTIN_DTB_NAME="axs101" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 8c1f1a111a175..afa6a348f4445 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -22,7 +22,7 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS103=y CONFIG_ISA_ARCV2=y -CONFIG_ARC_BUILTIN_DTB_NAME="axs103" +CONFIG_BUILTIN_DTB_NAME="axs103" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 75cab9f25b5bb..2bfa6371953cc 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -22,7 +22,7 @@ CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS103=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y -CONFIG_ARC_BUILTIN_DTB_NAME="axs103_idu" +CONFIG_BUILTIN_DTB_NAME="axs103_idu" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig index 8c3ed5d6e6c35..3a15771120782 100644 --- a/arch/arc/configs/haps_hs_defconfig +++ b/arch/arc/configs/haps_hs_defconfig @@ -14,7 +14,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set -CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs" +CONFIG_BUILTIN_DTB_NAME="haps_hs" CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_COMPACTION is not set diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig index 6fc98c1b9b368..a3cf940b1f5b4 100644 --- a/arch/arc/configs/haps_hs_smp_defconfig +++ b/arch/arc/configs/haps_hs_smp_defconfig @@ -16,7 +16,7 @@ CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set CONFIG_SMP=y -CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs_idu" +CONFIG_BUILTIN_DTB_NAME="haps_hs_idu" CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 9e79154b5535a..1558e8e87767e 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -20,7 +20,7 @@ CONFIG_ISA_ARCV2=y CONFIG_SMP=y CONFIG_LINUX_LINK_BASE=0x90000000 CONFIG_LINUX_RAM_BASE=0x80000000 -CONFIG_ARC_BUILTIN_DTB_NAME="hsdk" +CONFIG_BUILTIN_DTB_NAME="hsdk" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 51092c39e3607..f8b3235d9a65e 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -17,7 +17,7 @@ CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set CONFIG_ISA_ARCOMPACT=y -CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700" +CONFIG_BUILTIN_DTB_NAME="nsim_700" CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 70c17bca49397..ee45dc0877fbc 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -19,7 +19,7 @@ CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set -CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci" +CONFIG_BUILTIN_DTB_NAME="nsimosci" # CONFIG_COMPACTION is not set CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index 59a3b6642fe71..e0a309970c20b 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -19,7 +19,7 @@ CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set CONFIG_ISA_ARCV2=y -CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs" +CONFIG_BUILTIN_DTB_NAME="nsimosci_hs" # CONFIG_COMPACTION is not set CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index 1419fc946a083..88325b8b49cf4 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -16,7 +16,7 @@ CONFIG_MODULES=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y # CONFIG_ARC_TIMERS_64BIT is not set -CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu" +CONFIG_BUILTIN_DTB_NAME="nsimosci_hs_idu" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index 5aba3d850fa2f..865fbc19ef031 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -26,7 +26,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_ARC_PLAT_TB10X=y CONFIG_ARC_CACHE_LINE_SHIFT=5 CONFIG_HZ=250 -CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk" +CONFIG_BUILTIN_DTB_NAME="abilis_tb100_dvk" CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index 50c3439138257..03d9ac20baa98 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -13,7 +13,7 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS103=y CONFIG_ISA_ARCV2=y -CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38" +CONFIG_BUILTIN_DTB_NAME="vdk_hs38" CONFIG_PREEMPT=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index 6d9e1d9f71d21..c09488992f131 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -15,7 +15,7 @@ CONFIG_AXS103=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y # CONFIG_ARC_TIMERS_64BIT is not set -CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp" +CONFIG_BUILTIN_DTB_NAME="vdk_hs38_smp" CONFIG_PREEMPT=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h index bf6cf5579cf45..9c58fb81f7fd6 100644 --- a/arch/hexagon/include/asm/cmpxchg.h +++ b/arch/hexagon/include/asm/cmpxchg.h @@ -56,7 +56,7 @@ __arch_xchg(unsigned long x, volatile void *ptr, int size) __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(ptr)) __old = (old); \ __typeof__(*(ptr)) __new = (new); \ - __typeof__(*(ptr)) __oldval = 0; \ + __typeof__(*(ptr)) __oldval = (__typeof__(*(ptr))) 0; \ \ asm volatile( \ "1: %0 = memw_locked(%1);\n" \ diff --git a/arch/hexagon/include/asm/setup.h b/arch/hexagon/include/asm/setup.h new file mode 100644 index 0000000000000..9f2749cd4052d --- /dev/null +++ b/arch/hexagon/include/asm/setup.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + */ + +#ifndef _ASM_HEXAGON_SETUP_H +#define _ASM_HEXAGON_SETUP_H + +#include +#include + +extern char external_cmdline_buffer; + +void __init setup_arch_memory(void); + +#endif diff --git a/arch/hexagon/include/uapi/asm/setup.h b/arch/hexagon/include/uapi/asm/setup.h index 8ce9428b15832..598f74f671f65 100644 --- a/arch/hexagon/include/uapi/asm/setup.h +++ b/arch/hexagon/include/uapi/asm/setup.h @@ -17,19 +17,9 @@ * 02110-1301, USA. */ -#ifndef _ASM_SETUP_H -#define _ASM_SETUP_H - -#ifdef __KERNEL__ -#include -#else -#define __init -#endif +#ifndef _UAPI_ASM_HEXAGON_SETUP_H +#define _UAPI_ASM_HEXAGON_SETUP_H #include -extern char external_cmdline_buffer; - -void __init setup_arch_memory(void); - #endif diff --git a/arch/hexagon/kernel/time.c b/arch/hexagon/kernel/time.c index f0f207e2a6947..6f851e1cd4ee0 100644 --- a/arch/hexagon/kernel/time.c +++ b/arch/hexagon/kernel/time.c @@ -170,8 +170,7 @@ static void __init time_init_deferred(void) ce_dev->cpumask = cpu_all_mask; - if (!resource) - resource = rtos_timer_device.resource; + resource = rtos_timer_device.resource; /* ioremap here means this has to run later, after paging init */ rtos_timer = ioremap(resource->start, resource_size(resource)); diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 75e062722d285..e732aa01c2ff0 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -135,7 +135,7 @@ static void do_show_stack(struct task_struct *task, unsigned long *fp, } /* Attempt to continue past exception. */ - if (0 == newfp) { + if (!newfp) { struct pt_regs *regs = (struct pt_regs *) (((void *)fp) + 8); @@ -195,8 +195,10 @@ int die(const char *str, struct pt_regs *regs, long err) printk(KERN_EMERG "Oops: %s[#%d]:\n", str, ++die.counter); if (notify_die(DIE_OOPS, str, regs, err, pt_cause(regs), SIGSEGV) == - NOTIFY_STOP) + NOTIFY_STOP) { + spin_unlock_irq(&die.lock); return 1; + } print_modules(); show_regs(regs); diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 8acfa66e10954..dbf2ea561c855 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -626,6 +626,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 35e9a08723048..b0fd199cc0a4e 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -583,6 +583,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 32891ddd3cc59..bb5b2d3b6c103 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -603,6 +603,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index ca276f0db3dd1..8315a13bab73b 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -575,6 +575,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index e83f14fe1a4f8..350370657e5fe 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -585,6 +585,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 6b58be24da793..f942b47557026 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -602,6 +602,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 0e8d24f825656..b1eaad02efab9 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -689,6 +689,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 24a7608c13ac7..6309a4442bb3f 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -575,6 +575,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index c415f75821f39..3feb0731f8142 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -576,6 +576,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 2c715a8ff551e..ea04b1b0da7d4 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -592,6 +592,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 15ff37fcccbfb..f52d9af92153d 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -572,6 +572,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 40a44bf9f48d1..f348447824da9 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -573,6 +573,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index e8a57c2067580..349b8aad1159f 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -382,15 +382,15 @@ 368 o32 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents # room for arch specific calls 393 o32 semget sys_semget -394 o32 semctl sys_old_semctl compat_sys_old_semctl +394 o32 semctl sys_semctl compat_sys_semctl 395 o32 shmget sys_shmget -396 o32 shmctl sys_old_shmctl compat_sys_old_shmctl +396 o32 shmctl sys_shmctl compat_sys_shmctl 397 o32 shmat sys_shmat compat_sys_shmat 398 o32 shmdt sys_shmdt 399 o32 msgget sys_msgget 400 o32 msgsnd sys_msgsnd compat_sys_msgsnd 401 o32 msgrcv sys_msgrcv compat_sys_msgrcv -402 o32 msgctl sys_old_msgctl compat_sys_old_msgctl +402 o32 msgctl sys_msgctl compat_sys_msgctl 403 o32 clock_gettime64 sys_clock_gettime sys_clock_gettime 404 o32 clock_settime64 sys_clock_settime sys_clock_settime 405 o32 clock_adjtime64 sys_clock_adjtime sys_clock_adjtime diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index e9c46b59ebbcb..465eb96c755e0 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -448,6 +448,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 45dac7b46aa3c..34a5aec4908fb 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -369,6 +369,24 @@ static void dedotify_versions(struct modversion_info *vers, } } +/* Same as normal versions, remove a leading dot if present. */ +static void dedotify_ext_version_names(char *str_seq, unsigned long size) +{ + unsigned long out = 0; + unsigned long in; + char last = '\0'; + + for (in = 0; in < size; in++) { + /* Skip one leading dot */ + if (last == '\0' && str_seq[in] == '.') + in++; + last = str_seq[in]; + str_seq[out++] = last; + } + /* Zero the trailing portion of the names table for robustness */ + memset(&str_seq[out], 0, size - out); +} + /* * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC. * seem to be defined (value set later). @@ -438,10 +456,12 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, me->arch.toc_section = i; if (sechdrs[i].sh_addralign < 8) sechdrs[i].sh_addralign = 8; - } - else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) + } else if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0) dedotify_versions((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size); + else if (strcmp(secstrings + sechdrs[i].sh_name, "__version_ext_names") == 0) + dedotify_ext_version_names((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size); if (sechdrs[i].sh_type == SHT_SYMTAB) dedotify((void *)hdr + sechdrs[i].sh_offset, diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index 2acc7d876e1fb..e318119d570de 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -119,4 +119,15 @@ config ERRATA_THEAD_PMU If you don't know what to do here, say "Y". +config ERRATA_THEAD_GHOSTWRITE + bool "Apply T-Head Ghostwrite errata" + depends on ERRATA_THEAD && RISCV_ISA_XTHEADVECTOR + default y + help + The T-Head C9xx cores have a vulnerability in the xtheadvector + instruction set. When this errata is enabled, the CPUs will be probed + to determine if they are vulnerable and disable xtheadvector. + + If you don't know what to do here, say "Y". + endmenu # "CPU errata selection" diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor index 6f1cdd32ed29a..b096548fe0ffd 100644 --- a/arch/riscv/Kconfig.vendor +++ b/arch/riscv/Kconfig.vendor @@ -16,4 +16,30 @@ config RISCV_ISA_VENDOR_EXT_ANDES If you don't know what to do here, say Y. endmenu +menu "T-Head" +config RISCV_ISA_VENDOR_EXT_THEAD + bool "T-Head vendor extension support" + select RISCV_ISA_VENDOR_EXT + default y + help + Say N here to disable detection of and support for all T-Head vendor + extensions. Without this option enabled, T-Head vendor extensions will + not be detected at boot and their presence not reported to userspace. + + If you don't know what to do here, say Y. + +config RISCV_ISA_XTHEADVECTOR + bool "xtheadvector extension support" + depends on RISCV_ISA_VENDOR_EXT_THEAD + depends on RISCV_ISA_V + depends on FPU + default y + help + Say N here if you want to disable all xtheadvector related procedures + in the kernel. This will disable vector for any T-Head board that + contains xtheadvector rather than the standard vector. + + If you don't know what to do here, say Y. +endmenu + endmenu diff --git a/arch/riscv/Makefile.postlink b/arch/riscv/Makefile.postlink index 829b9abc91f61..6b0580949b6a2 100644 --- a/arch/riscv/Makefile.postlink +++ b/arch/riscv/Makefile.postlink @@ -10,6 +10,7 @@ __archpost: -include include/config/auto.conf include $(srctree)/scripts/Kbuild.include +include $(srctree)/scripts/Makefile.lib quiet_cmd_relocs_check = CHKREL $@ cmd_relocs_check = \ @@ -19,11 +20,6 @@ ifdef CONFIG_RELOCATABLE quiet_cmd_cp_vmlinux_relocs = CPREL vmlinux.relocs cmd_cp_vmlinux_relocs = cp vmlinux vmlinux.relocs -quiet_cmd_relocs_strip = STRIPREL $@ -cmd_relocs_strip = $(OBJCOPY) --remove-section='.rel.*' \ - --remove-section='.rel__*' \ - --remove-section='.rela.*' \ - --remove-section='.rela__*' $@ endif # `@true` prevents complaint when there is nothing to be done @@ -33,7 +29,7 @@ vmlinux: FORCE ifdef CONFIG_RELOCATABLE $(call if_changed,relocs_check) $(call if_changed,cp_vmlinux_relocs) - $(call if_changed,relocs_strip) + $(call if_changed,strip_relocs) endif clean: diff --git a/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi b/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi index 64c3c2e6cbe02..6367112e614a1 100644 --- a/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi +++ b/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi @@ -27,7 +27,8 @@ riscv,isa = "rv64imafdc"; riscv,isa-base = "rv64i"; riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "zicntr", "zicsr", - "zifencei", "zihpm"; + "zifencei", "zihpm", "xtheadvector"; + thead,vlenb = <128>; #cooling-cells = <2>; cpu0_intc: interrupt-controller { diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index a924ef116d5e3..0f7dcbe3c45bb 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -10,7 +10,6 @@ CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index e24770a779323..0b942183f708f 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -142,6 +143,31 @@ static bool errata_probe_pmu(unsigned int stage, return true; } +static bool errata_probe_ghostwrite(unsigned int stage, + unsigned long arch_id, unsigned long impid) +{ + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_GHOSTWRITE)) + return false; + + /* + * target-c9xx cores report arch_id and impid as 0 + * + * While ghostwrite may not affect all c9xx cores that implement + * xtheadvector, there is no futher granularity than c9xx. Assume + * vulnerable for this entire class of processors when xtheadvector is + * enabled. + */ + if (arch_id != 0 || impid != 0) + return false; + + if (stage != RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + ghostwrite_set_vulnerable(); + + return true; +} + static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid) { @@ -155,6 +181,8 @@ static u32 thead_errata_probe(unsigned int stage, if (errata_probe_pmu(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_PMU); + errata_probe_ghostwrite(stage, archid, impid); + return cpu_req_errata; } diff --git a/arch/riscv/include/asm/bugs.h b/arch/riscv/include/asm/bugs.h new file mode 100644 index 0000000000000..17ca0a9477307 --- /dev/null +++ b/arch/riscv/include/asm/bugs.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Interface for managing mitigations for riscv vulnerabilities. + * + * Copyright (C) 2024 Rivos Inc. + */ + +#ifndef __ASM_BUGS_H +#define __ASM_BUGS_H + +/* Watch out, ordering is important here. */ +enum mitigation_state { + UNAFFECTED, + MITIGATED, + VULNERABLE, +}; + +void ghostwrite_set_vulnerable(void); +bool ghostwrite_enable_mitigation(void); +enum mitigation_state ghostwrite_get_state(void); + +#endif /* __ASM_BUGS_H */ diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 4bd054c54c21a..569140d6e6399 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -34,6 +34,8 @@ DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; +extern u32 thead_vlenb_of; + void __init riscv_user_isa_enable(void); #define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \ diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 37bdea65bbd8a..6fed42e377059 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -30,6 +30,12 @@ #define SR_VS_CLEAN _AC(0x00000400, UL) #define SR_VS_DIRTY _AC(0x00000600, UL) +#define SR_VS_THEAD _AC(0x01800000, UL) /* xtheadvector Status */ +#define SR_VS_OFF_THEAD _AC(0x00000000, UL) +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL) +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL) +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL) + #define SR_XS _AC(0x00018000, UL) /* Extension Status */ #define SR_XS_OFF _AC(0x00000000, UL) #define SR_XS_INITIAL _AC(0x00008000, UL) @@ -315,6 +321,15 @@ #define CSR_STIMECMP 0x14D #define CSR_STIMECMPH 0x15D +/* xtheadvector symbolic CSR names */ +#define CSR_VXSAT 0x9 +#define CSR_VXRM 0xa + +/* xtheadvector CSR masks */ +#define CSR_VXRM_MASK 3 +#define CSR_VXRM_SHIFT 1 +#define CSR_VXSAT_MASK 1 + /* Supervisor-Level Window to Indirectly Accessed Registers (AIA) */ #define CSR_SISELECT 0x150 #define CSR_SIREG 0x151 diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 7c8a71a526a30..6e426ed7919a4 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -25,7 +25,8 @@ #ifdef CONFIG_ERRATA_THEAD #define ERRATA_THEAD_MAE 0 #define ERRATA_THEAD_PMU 1 -#define ERRATA_THEAD_NUMBER 2 +#define ERRATA_THEAD_GHOSTWRITE 2 +#define ERRATA_THEAD_NUMBER 3 #endif #ifdef __ASSEMBLY__ diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h index fc8130f995c1e..72be100afa236 100644 --- a/arch/riscv/include/asm/futex.h +++ b/arch/riscv/include/asm/futex.h @@ -85,7 +85,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, __enable_user_access(); __asm__ __volatile__ ( - "1: lr.w.aqrl %[v],%[u] \n" + "1: lr.w %[v],%[u] \n" " bne %[v],%z[ov],3f \n" "2: sc.w.aqrl %[t],%z[nv],%[u] \n" " bnez %[t],1b \n" diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 1ce1df6d0ff3c..dd624523981c8 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * Copyright 2023 Rivos, Inc + * Copyright 2023-2024 Rivos, Inc */ #ifndef _ASM_HWPROBE_H @@ -8,7 +8,7 @@ #include -#define RISCV_HWPROBE_MAX_KEY 10 +#define RISCV_HWPROBE_MAX_KEY 11 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { @@ -21,6 +21,7 @@ static inline bool hwprobe_key_is_bitmask(__s64 key) case RISCV_HWPROBE_KEY_BASE_BEHAVIOR: case RISCV_HWPROBE_KEY_IMA_EXT_0: case RISCV_HWPROBE_KEY_CPUPERF_0: + case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: return true; } diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 94e33216b2d94..0e71eb82f920c 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -117,7 +117,7 @@ do { \ __set_prev_cpu(__prev->thread); \ if (has_fpu()) \ __switch_to_fpu(__prev, __next); \ - if (has_vector()) \ + if (has_vector() || has_xtheadvector()) \ __switch_to_vector(__prev, __next); \ if (switch_to_should_flush_icache(__next)) \ local_flush_icache_all(); \ diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index c7c023afbacd7..e8a83f55be2ba 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -18,6 +18,27 @@ #include #include #include +#include +#include +#include + +#define __riscv_v_vstate_or(_val, TYPE) ({ \ + typeof(_val) _res = _val; \ + if (has_xtheadvector()) \ + _res = (_res & ~SR_VS_THEAD) | SR_VS_##TYPE##_THEAD; \ + else \ + _res = (_res & ~SR_VS) | SR_VS_##TYPE; \ + _res; \ +}) + +#define __riscv_v_vstate_check(_val, TYPE) ({ \ + bool _res; \ + if (has_xtheadvector()) \ + _res = ((_val) & SR_VS_THEAD) == SR_VS_##TYPE##_THEAD; \ + else \ + _res = ((_val) & SR_VS) == SR_VS_##TYPE; \ + _res; \ +}) extern unsigned long riscv_v_vsize; int riscv_v_setup_vsize(void); @@ -41,39 +62,62 @@ static __always_inline bool has_vector(void) return riscv_has_extension_unlikely(RISCV_ISA_EXT_ZVE32X); } +static __always_inline bool has_xtheadvector_no_alternatives(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return riscv_isa_vendor_extension_available(THEAD_VENDOR_ID, XTHEADVECTOR); + else + return false; +} + +static __always_inline bool has_xtheadvector(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return riscv_has_vendor_extension_unlikely(THEAD_VENDOR_ID, + RISCV_ISA_VENDOR_EXT_XTHEADVECTOR); + else + return false; +} + static inline void __riscv_v_vstate_clean(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_CLEAN; + regs->status = __riscv_v_vstate_or(regs->status, CLEAN); } static inline void __riscv_v_vstate_dirty(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_DIRTY; + regs->status = __riscv_v_vstate_or(regs->status, DIRTY); } static inline void riscv_v_vstate_off(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_OFF; + regs->status = __riscv_v_vstate_or(regs->status, OFF); } static inline void riscv_v_vstate_on(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_INITIAL; + regs->status = __riscv_v_vstate_or(regs->status, INITIAL); } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { - return (regs->status & SR_VS) != 0; + return !__riscv_v_vstate_check(regs->status, OFF); } static __always_inline void riscv_v_enable(void) { - csr_set(CSR_SSTATUS, SR_VS); + if (has_xtheadvector()) + csr_set(CSR_SSTATUS, SR_VS_THEAD); + else + csr_set(CSR_SSTATUS, SR_VS); } static __always_inline void riscv_v_disable(void) { - csr_clear(CSR_SSTATUS, SR_VS); + if (has_xtheadvector()) + csr_clear(CSR_SSTATUS, SR_VS_THEAD); + else + csr_clear(CSR_SSTATUS, SR_VS); } static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) @@ -82,10 +126,36 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) "csrr %0, " __stringify(CSR_VSTART) "\n\t" "csrr %1, " __stringify(CSR_VTYPE) "\n\t" "csrr %2, " __stringify(CSR_VL) "\n\t" - "csrr %3, " __stringify(CSR_VCSR) "\n\t" - "csrr %4, " __stringify(CSR_VLENB) "\n\t" : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), - "=r" (dest->vcsr), "=r" (dest->vlenb) : :); + "=r" (dest->vcsr) : :); + + if (has_xtheadvector()) { + unsigned long status; + + /* + * CSR_VCSR is defined as + * [2:1] - vxrm[1:0] + * [0] - vxsat + * The earlier vector spec implemented by T-Head uses separate + * registers for the same bit-elements, so just combine those + * into the existing output field. + * + * Additionally T-Head cores need FS to be enabled when accessing + * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions. + * Though the cores do not implement the VXRM and VXSAT fields in the + * FCSR CSR that vector-0.7.1 specifies. + */ + status = csr_read_set(CSR_STATUS, SR_FS_DIRTY); + dest->vcsr = csr_read(CSR_VXSAT) | csr_read(CSR_VXRM) << CSR_VXRM_SHIFT; + + dest->vlenb = riscv_v_vsize / 32; + + if ((status & SR_FS) != SR_FS_DIRTY) + csr_write(CSR_STATUS, status); + } else { + dest->vcsr = csr_read(CSR_VCSR); + dest->vlenb = csr_read(CSR_VLENB); + } } static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) @@ -96,9 +166,25 @@ static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src "vsetvl x0, %2, %1\n\t" ".option pop\n\t" "csrw " __stringify(CSR_VSTART) ", %0\n\t" - "csrw " __stringify(CSR_VCSR) ", %3\n\t" - : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl), - "r" (src->vcsr) :); + : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl)); + + if (has_xtheadvector()) { + unsigned long status = csr_read(CSR_SSTATUS); + + /* + * Similar to __vstate_csr_save above, restore values for the + * separate VXRM and VXSAT CSRs from the vcsr variable. + */ + status = csr_read_set(CSR_STATUS, SR_FS_DIRTY); + + csr_write(CSR_VXRM, (src->vcsr >> CSR_VXRM_SHIFT) & CSR_VXRM_MASK); + csr_write(CSR_VXSAT, src->vcsr & CSR_VXSAT_MASK); + + if ((status & SR_FS) != SR_FS_DIRTY) + csr_write(CSR_STATUS, status); + } else { + csr_write(CSR_VCSR, src->vcsr); + } } static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, @@ -108,19 +194,33 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, riscv_v_enable(); __vstate_csr_save(save_to); - asm volatile ( - ".option push\n\t" - ".option arch, +zve32x\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" - "vse8.v v0, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v8, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v16, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl) : "r" (datap) : "memory"); + if (has_xtheadvector()) { + asm volatile ( + "mv t0, %0\n\t" + THEAD_VSETVLI_T4X0E8M8D1 + THEAD_VSB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VSB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VSB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VSB_V_V0T0 + : : "r" (datap) : "memory", "t0", "t4"); + } else { + asm volatile ( + ".option push\n\t" + ".option arch, +zve32x\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vse8.v v0, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v8, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v16, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v24, (%1)\n\t" + ".option pop\n\t" + : "=&r" (vl) : "r" (datap) : "memory"); + } riscv_v_disable(); } @@ -130,19 +230,33 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ unsigned long vl; riscv_v_enable(); - asm volatile ( - ".option push\n\t" - ".option arch, +zve32x\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" - "vle8.v v0, (%1)\n\t" - "add %1, %1, %0\n\t" - "vle8.v v8, (%1)\n\t" - "add %1, %1, %0\n\t" - "vle8.v v16, (%1)\n\t" - "add %1, %1, %0\n\t" - "vle8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl) : "r" (datap) : "memory"); + if (has_xtheadvector()) { + asm volatile ( + "mv t0, %0\n\t" + THEAD_VSETVLI_T4X0E8M8D1 + THEAD_VLB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VLB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VLB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VLB_V_V0T0 + : : "r" (datap) : "memory", "t0", "t4"); + } else { + asm volatile ( + ".option push\n\t" + ".option arch, +zve32x\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vle8.v v0, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v8, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v16, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v24, (%1)\n\t" + ".option pop\n\t" + : "=&r" (vl) : "r" (datap) : "memory"); + } __vstate_csr_restore(restore_from); riscv_v_disable(); } @@ -152,33 +266,41 @@ static inline void __riscv_v_vstate_discard(void) unsigned long vl, vtype_inval = 1UL << (BITS_PER_LONG - 1); riscv_v_enable(); + if (has_xtheadvector()) + asm volatile (THEAD_VSETVLI_T4X0E8M8D1 : : : "t4"); + else + asm volatile ( + ".option push\n\t" + ".option arch, +zve32x\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + ".option pop\n\t": "=&r" (vl)); + asm volatile ( ".option push\n\t" ".option arch, +zve32x\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" "vmv.v.i v0, -1\n\t" "vmv.v.i v8, -1\n\t" "vmv.v.i v16, -1\n\t" "vmv.v.i v24, -1\n\t" "vsetvl %0, x0, %1\n\t" ".option pop\n\t" - : "=&r" (vl) : "r" (vtype_inval) : "memory"); + : "=&r" (vl) : "r" (vtype_inval)); + riscv_v_disable(); } static inline void riscv_v_vstate_discard(struct pt_regs *regs) { - if ((regs->status & SR_VS) == SR_VS_OFF) - return; - - __riscv_v_vstate_discard(); - __riscv_v_vstate_dirty(regs); + if (riscv_v_vstate_query(regs)) { + __riscv_v_vstate_discard(); + __riscv_v_vstate_dirty(regs); + } } static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { - if ((regs->status & SR_VS) == SR_VS_DIRTY) { + if (__riscv_v_vstate_check(regs->status, DIRTY)) { __riscv_v_vstate_save(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } @@ -187,7 +309,7 @@ static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate, static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { - if ((regs->status & SR_VS) != SR_VS_OFF) { + if (riscv_v_vstate_query(regs)) { __riscv_v_vstate_restore(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } @@ -196,7 +318,7 @@ static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate, static inline void riscv_v_vstate_set_restore(struct task_struct *task, struct pt_regs *regs) { - if ((regs->status & SR_VS) != SR_VS_OFF) { + if (riscv_v_vstate_query(regs)) { set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE); riscv_v_vstate_on(regs); } @@ -270,6 +392,8 @@ struct pt_regs; static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; } static __always_inline bool has_vector(void) { return false; } static __always_inline bool insn_is_vector(u32 insn_buf) { return false; } +static __always_inline bool has_xtheadvector_no_alternatives(void) { return false; } +static __always_inline bool has_xtheadvector(void) { return false; } static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } diff --git a/arch/riscv/include/asm/vendor_extensions/thead.h b/arch/riscv/include/asm/vendor_extensions/thead.h new file mode 100644 index 0000000000000..e85c75b3b3408 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/thead.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_H + +#include + +#include + +/* + * Extension keys must be strictly less than RISCV_ISA_VENDOR_EXT_MAX. + */ +#define RISCV_ISA_VENDOR_EXT_XTHEADVECTOR 0 + +extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead; + +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD +void disable_xtheadvector(void); +#else +static inline void disable_xtheadvector(void) { } +#endif + +/* Extension specific helpers */ + +/* + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for + * vsetvli t4, x0, e8, m8, d1 + */ +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t" + +/* + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize + * the call resulting in a different encoding and then using a value for + * the "mop" field that is not part of vector-0.7.1 + * So encode specific variants for vstate_save and _restore. + */ +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t" +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t" +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t" +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t" +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t" +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t" +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t" +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t" + +#endif diff --git a/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h b/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h new file mode 100644 index 0000000000000..65a9c5612466d --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_HWPROBE_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_HWPROBE_H + +#include + +#include + +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD +void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, const struct cpumask *cpus); +#else +static inline void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, + const struct cpumask *cpus) +{ + pair->value = 0; +} +#endif + +#endif diff --git a/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h b/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h new file mode 100644 index 0000000000000..6b9293e984a92 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2024 Rivos, Inc + */ + +#ifndef _ASM_RISCV_SYS_HWPROBE_H +#define _ASM_RISCV_SYS_HWPROBE_H + +#include + +#define VENDOR_EXT_KEY(ext) \ + do { \ + if (__riscv_isa_extension_available(isainfo->isa, RISCV_ISA_VENDOR_EXT_##ext)) \ + pair->value |= RISCV_HWPROBE_VENDOR_EXT_##ext; \ + else \ + missing |= RISCV_HWPROBE_VENDOR_EXT_##ext; \ + } while (false) + +/* + * Loop through and record extensions that 1) anyone has, and 2) anyone + * doesn't have. + * + * _extension_checks is an arbitrary C block to set the values of pair->value + * and missing. It should be filled with VENDOR_EXT_KEY expressions. + */ +#define VENDOR_EXTENSION_SUPPORTED(pair, cpus, per_hart_vendor_bitmap, _extension_checks) \ + do { \ + int cpu; \ + u64 missing = 0; \ + for_each_cpu(cpu, (cpus)) { \ + struct riscv_isavendorinfo *isainfo = &(per_hart_vendor_bitmap)[cpu]; \ + _extension_checks \ + } \ + (pair)->value &= ~missing; \ + } while (false) \ + +#endif /* _ASM_RISCV_SYS_HWPROBE_H */ diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 3af142b99f778..c3c1cc951cb94 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * Copyright 2023 Rivos, Inc + * Copyright 2023-2024 Rivos, Inc */ #ifndef _UAPI_ASM_HWPROBE_H @@ -94,6 +94,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW 2 #define RISCV_HWPROBE_MISALIGNED_VECTOR_FAST 3 #define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4 +#define RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0 11 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/include/uapi/asm/vendor/thead.h b/arch/riscv/include/uapi/asm/vendor/thead.h new file mode 100644 index 0000000000000..43790ebe5faf3 --- /dev/null +++ b/arch/riscv/include/uapi/asm/vendor/thead.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#define RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR (1 << 0) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 063d1faf5a53d..8d186bfced451 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -123,3 +123,5 @@ obj-$(CONFIG_COMPAT) += compat_vdso/ obj-$(CONFIG_64BIT) += pi/ obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o + +obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += bugs.o diff --git a/arch/riscv/kernel/bugs.c b/arch/riscv/kernel/bugs.c new file mode 100644 index 0000000000000..3655fe7d678cd --- /dev/null +++ b/arch/riscv/kernel/bugs.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Rivos Inc. + */ + +#include +#include +#include + +#include +#include + +static enum mitigation_state ghostwrite_state; + +void ghostwrite_set_vulnerable(void) +{ + ghostwrite_state = VULNERABLE; +} + +/* + * Vendor extension alternatives will use the value set at the time of boot + * alternative patching, thus this must be called before boot alternatives are + * patched (and after extension probing) to be effective. + * + * Returns true if mitgated, false otherwise. + */ +bool ghostwrite_enable_mitigation(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR) && + ghostwrite_state == VULNERABLE && !cpu_mitigations_off()) { + disable_xtheadvector(); + ghostwrite_state = MITIGATED; + return true; + } + + return false; +} + +enum mitigation_state ghostwrite_get_state(void) +{ + return ghostwrite_state; +} + +ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) { + switch (ghostwrite_state) { + case UNAFFECTED: + return sprintf(buf, "Not affected\n"); + case MITIGATED: + return sprintf(buf, "Mitigation: xtheadvector disabled\n"); + case VULNERABLE: + fallthrough; + default: + return sprintf(buf, "Vulnerable\n"); + } + } else { + return sprintf(buf, "Not affected\n"); + } +} diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c0916ed318c20..c6ba750536c32 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,7 @@ #include #include #include +#include #define NUM_ALPHA_EXTS ('z' - 'a' + 1) @@ -39,6 +41,8 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; /* Per-cpu ISA extensions. */ struct riscv_isainfo hart_isa[NR_CPUS]; +u32 thead_vlenb_of; + /** * riscv_isa_extension_base() - Get base extension word * @@ -791,9 +795,50 @@ static void __init riscv_fill_vendor_ext_list(int cpu) } } +static int has_thead_homogeneous_vlenb(void) +{ + int cpu; + u32 prev_vlenb = 0; + u32 vlenb; + + /* Ignore thead,vlenb property if xtheavector is not enabled in the kernel */ + if (!IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return 0; + + for_each_possible_cpu(cpu) { + struct device_node *cpu_node; + + cpu_node = of_cpu_device_node_get(cpu); + if (!cpu_node) { + pr_warn("Unable to find cpu node\n"); + return -ENOENT; + } + + if (of_property_read_u32(cpu_node, "thead,vlenb", &vlenb)) { + of_node_put(cpu_node); + + if (prev_vlenb) + return -ENOENT; + continue; + } + + if (prev_vlenb && vlenb != prev_vlenb) { + of_node_put(cpu_node); + return -ENOENT; + } + + prev_vlenb = vlenb; + of_node_put(cpu_node); + } + + thead_vlenb_of = vlenb; + return 0; +} + static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) { unsigned int cpu; + bool mitigated; for_each_possible_cpu(cpu) { unsigned long this_hwcap = 0; @@ -844,6 +889,17 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) riscv_fill_vendor_ext_list(cpu); } + /* + * Execute ghostwrite mitigation immediately after detecting extensions + * to disable xtheadvector if necessary. + */ + mitigated = ghostwrite_enable_mitigation(); + + if (!mitigated && has_xtheadvector_no_alternatives() && has_thead_homogeneous_vlenb() < 0) { + pr_warn("Unsupported heterogeneous vlenb detected, vector extension disabled.\n"); + disable_xtheadvector(); + } + if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX)) return -ENOENT; @@ -896,7 +952,8 @@ void __init riscv_fill_hwcap(void) elf_hwcap &= ~COMPAT_HWCAP_ISA_F; } - if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X)) { + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X) || + has_xtheadvector_no_alternatives()) { /* * This cannot fail when called on the boot hart */ diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c index 6afe80c7f03ab..99972a48e86bc 100644 --- a/arch/riscv/kernel/kernel_mode_vector.c +++ b/arch/riscv/kernel/kernel_mode_vector.c @@ -143,7 +143,7 @@ static int riscv_v_start_kernel_context(bool *is_nested) /* Transfer the ownership of V from user to kernel, then save */ riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY); - if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) { + if (__riscv_v_vstate_check(task_pt_regs(current)->status, DIRTY)) { uvstate = ¤t->thread.vstate; __riscv_v_vstate_save(uvstate, uvstate->datap); } @@ -160,7 +160,7 @@ asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs) return; depth = riscv_v_ctx_get_depth(); - if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY) + if (depth == 0 && __riscv_v_vstate_check(regs->status, DIRTY)) riscv_preempt_v_set_dirty(); riscv_v_ctx_depth_inc(); @@ -208,7 +208,7 @@ void kernel_vector_begin(void) { bool nested = false; - if (WARN_ON(!has_vector())) + if (WARN_ON(!(has_vector() || has_xtheadvector()))) return; BUG_ON(!may_use_simd()); @@ -236,7 +236,7 @@ EXPORT_SYMBOL_GPL(kernel_vector_begin); */ void kernel_vector_end(void) { - if (WARN_ON(!has_vector())) + if (WARN_ON(!(has_vector() || has_xtheadvector()))) return; riscv_v_disable(); diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 7891294abf493..7c244de771800 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -190,7 +190,7 @@ void flush_thread(void) void arch_release_task_struct(struct task_struct *tsk) { /* Free the vector context of datap. */ - if (has_vector()) + if (has_vector() || has_xtheadvector()) riscv_v_thread_free(tsk); } @@ -240,7 +240,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.s[0] = 0; } p->thread.riscv_v_flags = 0; - if (has_vector()) + if (has_vector() || has_xtheadvector()) riscv_v_thread_alloc(p); p->thread.ra = (unsigned long)ret_from_fork; p->thread.sp = (unsigned long)childregs; /* kernel sp */ diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index dcd2824194561..94e905eea1dee 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -189,7 +189,7 @@ static long restore_sigcontext(struct pt_regs *regs, return 0; case RISCV_V_MAGIC: - if (!has_vector() || !riscv_v_vstate_query(regs) || + if (!(has_vector() || has_xtheadvector()) || !riscv_v_vstate_query(regs) || size != riscv_v_sc_size) return -EINVAL; @@ -211,7 +211,7 @@ static size_t get_rt_frame_size(bool cal_all) frame_size = sizeof(*frame); - if (has_vector()) { + if (has_vector() || has_xtheadvector()) { if (cal_all || riscv_v_vstate_query(task_pt_regs(current))) total_context_size += riscv_v_sc_size; } @@ -284,7 +284,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, if (has_fpu()) err |= save_fp_state(regs, &sc->sc_fpregs); /* Save the vector state. */ - if (has_vector() && riscv_v_vstate_query(regs)) + if ((has_vector() || has_xtheadvector()) && riscv_v_vstate_query(regs)) err |= save_v_state(regs, (void __user **)&sc_ext_ptr); /* Write zero to fp-reserved space and check it on restore_sigcontext */ err |= __put_user(0, &sc->sc_extdesc.reserved); diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index cb93adfffc486..bcd3b816306c2 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -286,6 +287,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, pair->value = riscv_timebase; break; + case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: + hwprobe_isa_vendor_ext_thead_0(pair, cpus); + break; + /* * For forward compatibility, unknown keys don't fail the whole * call, but get their element key set to -1 and value set to 0 diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index d022b028ac3ff..184f780c932d4 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -33,7 +33,17 @@ int riscv_v_setup_vsize(void) { unsigned long this_vsize; - /* There are 32 vector registers with vlenb length. */ + /* + * There are 32 vector registers with vlenb length. + * + * If the thead,vlenb property was provided by the firmware, use that + * instead of probing the CSRs. + */ + if (thead_vlenb_of) { + riscv_v_vsize = thead_vlenb_of * 32; + return 0; + } + riscv_v_enable(); this_vsize = csr_read(CSR_VLENB) * 32; riscv_v_disable(); @@ -53,7 +63,7 @@ int riscv_v_setup_vsize(void) void __init riscv_v_setup_ctx_cache(void) { - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return; riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx", @@ -173,7 +183,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) u32 __user *epc = (u32 __user *)regs->epc; u32 insn = (u32)regs->badaddr; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return false; /* Do not handle if V is not supported, or disabled */ @@ -216,7 +226,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) bool inherit; int cur, next; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return; next = riscv_v_ctrl_get_next(tsk); @@ -238,7 +248,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) long riscv_v_vstate_ctrl_get_current(void) { - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return -EINVAL; return current->thread.vstate_ctrl & PR_RISCV_V_VSTATE_CTRL_MASK; @@ -249,7 +259,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg) bool inherit; int cur, next; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return -EINVAL; if (arg & ~PR_RISCV_V_VSTATE_CTRL_MASK) @@ -299,7 +309,7 @@ static const struct ctl_table riscv_v_default_vstate_table[] = { static int __init riscv_v_sysctl_init(void) { - if (has_vector()) + if (has_vector() || has_xtheadvector()) if (!register_sysctl("abi", riscv_v_default_vstate_table)) return -EINVAL; return 0; @@ -309,7 +319,7 @@ static int __init riscv_v_sysctl_init(void) static int __init riscv_v_sysctl_init(void) { return 0; } #endif /* ! CONFIG_SYSCTL */ -static int riscv_v_init(void) +static int __init riscv_v_init(void) { return riscv_v_sysctl_init(); } diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c index a8126d1183412..a31ff84740ebc 100644 --- a/arch/riscv/kernel/vendor_extensions.c +++ b/arch/riscv/kernel/vendor_extensions.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -14,6 +15,9 @@ struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = { #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES &riscv_isa_vendor_ext_list_andes, #endif +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD + &riscv_isa_vendor_ext_list_thead, +#endif }; const size_t riscv_isa_vendor_ext_list_size = ARRAY_SIZE(riscv_isa_vendor_ext_list); @@ -41,6 +45,12 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap; break; #endif + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD + case THEAD_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap; + break; + #endif default: return false; } diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile index 6a61aed944f17..866414c81a9f5 100644 --- a/arch/riscv/kernel/vendor_extensions/Makefile +++ b/arch/riscv/kernel/vendor_extensions/Makefile @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead_hwprobe.o diff --git a/arch/riscv/kernel/vendor_extensions/thead.c b/arch/riscv/kernel/vendor_extensions/thead.c new file mode 100644 index 0000000000000..519dbf70710af --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/thead.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include + +#include +#include +#include + +/* All T-Head vendor extensions supported in Linux */ +static const struct riscv_isa_ext_data riscv_isa_vendor_ext_thead[] = { + __RISCV_ISA_EXT_DATA(xtheadvector, RISCV_ISA_VENDOR_EXT_XTHEADVECTOR), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_thead), + .ext_data = riscv_isa_vendor_ext_thead, +}; + +void disable_xtheadvector(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap[cpu].isa); + + clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap.isa); +} diff --git a/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c new file mode 100644 index 0000000000000..2eba340117869 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include + +#include +#include + +#include +#include + +void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, const struct cpumask *cpus) +{ + VENDOR_EXTENSION_SUPPORTED(pair, cpus, + riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap, { + VENDOR_EXT_KEY(XTHEADVECTOR); + }); +} diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index a9f2b4af8f3f1..0194324a0c506 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -22,6 +22,57 @@ #include "../kernel/head.h" +static void show_pte(unsigned long addr) +{ + pgd_t *pgdp, pgd; + p4d_t *p4dp, p4d; + pud_t *pudp, pud; + pmd_t *pmdp, pmd; + pte_t *ptep, pte; + struct mm_struct *mm = current->mm; + + if (!mm) + mm = &init_mm; + + pr_alert("Current %s pgtable: %luK pagesize, %d-bit VAs, pgdp=0x%016llx\n", + current->comm, PAGE_SIZE / SZ_1K, VA_BITS, + mm == &init_mm ? (u64)__pa_symbol(mm->pgd) : virt_to_phys(mm->pgd)); + + pgdp = pgd_offset(mm, addr); + pgd = pgdp_get(pgdp); + pr_alert("[%016lx] pgd=%016lx", addr, pgd_val(pgd)); + if (pgd_none(pgd) || pgd_bad(pgd) || pgd_leaf(pgd)) + goto out; + + p4dp = p4d_offset(pgdp, addr); + p4d = p4dp_get(p4dp); + pr_cont(", p4d=%016lx", p4d_val(p4d)); + if (p4d_none(p4d) || p4d_bad(p4d) || p4d_leaf(p4d)) + goto out; + + pudp = pud_offset(p4dp, addr); + pud = pudp_get(pudp); + pr_cont(", pud=%016lx", pud_val(pud)); + if (pud_none(pud) || pud_bad(pud) || pud_leaf(pud)) + goto out; + + pmdp = pmd_offset(pudp, addr); + pmd = pmdp_get(pmdp); + pr_cont(", pmd=%016lx", pmd_val(pmd)); + if (pmd_none(pmd) || pmd_bad(pmd) || pmd_leaf(pmd)) + goto out; + + ptep = pte_offset_map(pmdp, addr); + if (!ptep) + goto out; + + pte = ptep_get(ptep); + pr_cont(", pte=%016lx", pte_val(pte)); + pte_unmap(ptep); +out: + pr_cont("\n"); +} + static void die_kernel_fault(const char *msg, unsigned long addr, struct pt_regs *regs) { @@ -31,6 +82,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr, addr); bust_spinlocks(0); + show_pte(addr); die(regs, "Oops"); make_task_dead(SIGKILL); } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 722178ae34883..15b2eda4c364b 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -268,8 +268,12 @@ static void __init setup_bootmem(void) */ if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) { max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE; - memblock_cap_memory_range(phys_ram_base, - max_mapped_addr - phys_ram_base); + if (memblock_end_of_DRAM() > max_mapped_addr) { + memblock_cap_memory_range(phys_ram_base, + max_mapped_addr - phys_ram_base); + pr_warn("Physical memory overflows the linear mapping size: region above %pa removed", + &max_mapped_addr); + } } /* diff --git a/arch/s390/Makefile.postlink b/arch/s390/Makefile.postlink index df82f54107693..1ae5478cd6aca 100644 --- a/arch/s390/Makefile.postlink +++ b/arch/s390/Makefile.postlink @@ -11,6 +11,7 @@ __archpost: -include include/config/auto.conf include $(srctree)/scripts/Kbuild.include +include $(srctree)/scripts/Makefile.lib CMD_RELOCS=arch/s390/tools/relocs OUT_RELOCS = arch/s390/boot @@ -19,11 +20,6 @@ quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/relocs.S mkdir -p $(OUT_RELOCS); \ $(CMD_RELOCS) $@ > $(OUT_RELOCS)/relocs.S -quiet_cmd_strip_relocs = RSTRIP $@ - cmd_strip_relocs = \ - $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \ - --remove-section='.rela.*' --remove-section='.rela__*' $@ - vmlinux: FORCE $(call cmd,relocs) $(call cmd,strip_relocs) diff --git a/arch/sh/Kbuild b/arch/sh/Kbuild index 056efec72c2a0..0da6c6d6821ad 100644 --- a/arch/sh/Kbuild +++ b/arch/sh/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y += kernel/ mm/ boards/ obj-$(CONFIG_SH_FPU_EMU) += math-emu/ -obj-$(CONFIG_USE_BUILTIN_DTB) += boot/dts/ obj-$(CONFIG_HD6446X_SERIES) += cchips/hd6446x/ diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 04ff5fb9242ed..89185af7bcc98 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -648,10 +648,11 @@ endmenu menu "Boot options" -config USE_BUILTIN_DTB +config BUILTIN_DTB bool "Use builtin DTB" default n depends on SH_DEVICE_TREE + select GENERIC_BUILTIN_DTB help Link a device tree blob for particular hardware into the kernel, suppressing use of the DTB pointer provided by the bootloader. @@ -659,10 +660,10 @@ config USE_BUILTIN_DTB not capable of providing a DTB to the kernel, or for experimental hardware without stable device tree bindings. -config BUILTIN_DTB_SOURCE +config BUILTIN_DTB_NAME string "Source file for builtin DTB" default "" - depends on USE_BUILTIN_DTB + depends on BUILTIN_DTB help Base name (without suffix, relative to arch/sh/boot/dts) for the a DTS file that will be used to produce the DTB linked into the diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig index 109bec4dad94a..1af93be61b1ff 100644 --- a/arch/sh/boards/Kconfig +++ b/arch/sh/boards/Kconfig @@ -80,8 +80,8 @@ config SH_7724_SOLUTION_ENGINE select SOLUTION_ENGINE depends on CPU_SUBTYPE_SH7724 select GPIOLIB - select SND_SOC_AK4642 if SND_SIMPLE_CARD select REGULATOR_FIXED_VOLTAGE if REGULATOR + imply SND_SOC_AK4642 if SND_SIMPLE_CARD help Select 7724 SolutionEngine if configuring for a Hitachi SH7724 evaluation board. @@ -259,8 +259,8 @@ config SH_ECOVEC bool "EcoVec" depends on CPU_SUBTYPE_SH7724 select GPIOLIB - select SND_SOC_DA7210 if SND_SIMPLE_CARD select REGULATOR_FIXED_VOLTAGE if REGULATOR + imply SND_SOC_DA7210 if SND_SIMPLE_CARD help Renesas "R0P7724LC0011/21RL (EcoVec)" support. diff --git a/arch/sh/boot/dts/Makefile b/arch/sh/boot/dts/Makefile index 4a6dec9714a9e..d109978a5eb9c 100644 --- a/arch/sh/boot/dts/Makefile +++ b/arch/sh/boot/dts/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_USE_BUILTIN_DTB) += $(addsuffix .dtb.o, $(CONFIG_BUILTIN_DTB_SOURCE)) +obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .dtb.o, $(CONFIG_BUILTIN_DTB_NAME)) diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 4e6835de54cf8..9022d8af9d686 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -43,9 +43,9 @@ int arch_show_interrupts(struct seq_file *p, int prec) { int j; - seq_printf(p, "%*s: ", prec, "NMI"); + seq_printf(p, "%*s:", prec, "NMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat.__nmi_count, j)); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat.__nmi_count, j), 10); seq_printf(p, " Non-maskable interrupts\n"); seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index f2b6f16a46b85..039a51291002b 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -249,7 +249,7 @@ void __ref sh_fdt_init(phys_addr_t dt_phys) /* Avoid calling an __init function on secondary cpus. */ if (done) return; -#ifdef CONFIG_USE_BUILTIN_DTB +#ifdef CONFIG_BUILTIN_DTB dt_virt = __dtb_start; #else dt_virt = phys_to_virt(dt_phys); @@ -323,7 +323,7 @@ void __init setup_arch(char **cmdline_p) sh_early_platform_driver_probe("earlyprintk", 1, 1); #ifdef CONFIG_OF_EARLY_FLATTREE -#ifdef CONFIG_USE_BUILTIN_DTB +#ifdef CONFIG_BUILTIN_DTB unflatten_and_copy_device_tree(); #else unflatten_device_tree(); diff --git a/arch/um/drivers/rtc_kern.c b/arch/um/drivers/rtc_kern.c index 134a58f93c859..9158c936c1281 100644 --- a/arch/um/drivers/rtc_kern.c +++ b/arch/um/drivers/rtc_kern.c @@ -51,6 +51,7 @@ static int uml_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) { + struct timespec64 ts; unsigned long long secs; if (!enable && !uml_rtc_alarm_enabled) @@ -58,7 +59,8 @@ static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) uml_rtc_alarm_enabled = enable; - secs = uml_rtc_alarm_time - ktime_get_real_seconds(); + read_persistent_clock64(&ts); + secs = uml_rtc_alarm_time - ts.tv_sec; if (time_travel_mode == TT_MODE_OFF) { if (!enable) { @@ -73,7 +75,8 @@ static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) if (enable) time_travel_add_event_rel(¨_rtc_alarm_event, - secs * NSEC_PER_SEC); + secs * NSEC_PER_SEC - + ts.tv_nsec); } return 0; diff --git a/arch/um/include/asm/fixmap.h b/arch/um/include/asm/fixmap.h deleted file mode 100644 index 2efac58271880..0000000000000 --- a/arch/um/include/asm/fixmap.h +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_FIXMAP_H -#define __UM_FIXMAP_H - -#include -#include -#include -#include - -/* - * Here we define all the compile-time 'special' virtual - * addresses. The point is to have a constant address at - * compile time, but to set the physical address only - * in the boot process. We allocate these special addresses - * from the end of virtual memory (0xfffff000) backwards. - * Also this lets us do fail-safe vmalloc(), we - * can guarantee that these special addresses and - * vmalloc()-ed addresses never overlap. - * - * these 'compile-time allocated' memory buffers are - * fixed-size 4k pages. (or larger if used with an increment - * highger than 1) use fixmap_set(idx,phys) to associate - * physical memory with fixmap indices. - * - * TLB entries of such buffers will not be flushed across - * task switches. - */ - -/* - * on UP currently we will have no trace of the fixmap mechanizm, - * no page table allocations, etc. This might change in the - * future, say framebuffers for the console driver(s) could be - * fix-mapped? - */ -enum fixed_addresses { - __end_of_fixed_addresses -}; - -extern void __set_fixmap (enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); - -/* - * used by vmalloc.c. - * - * Leave one empty page between vmalloc'ed areas and - * the start of the fixmap, and leave one page empty - * at the top of mem.. - */ - -#define FIXADDR_TOP (TASK_SIZE - 2 * PAGE_SIZE) -#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) - -#include - -#endif diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 0bd60afcc37d5..5601ca98e8a6a 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -8,7 +8,8 @@ #ifndef __UM_PGTABLE_H #define __UM_PGTABLE_H -#include +#include +#include #define _PAGE_PRESENT 0x001 #define _PAGE_NEEDSYNC 0x002 @@ -48,11 +49,9 @@ extern unsigned long end_iomem; #define VMALLOC_OFFSET (__va_space) #define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) -#define PKMAP_BASE ((FIXADDR_START - LAST_PKMAP * PAGE_SIZE) & PMD_MASK) -#define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) +#define VMALLOC_END (TASK_SIZE-2*PAGE_SIZE) #define MODULES_VADDR VMALLOC_START #define MODULES_END VMALLOC_END -#define MODULES_LEN (MODULES_VADDR - MODULES_END) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index d988129074937..befed230aac28 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -74,6 +73,7 @@ void __init mem_init(void) kmalloc_ok = 1; } +#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) /* * Create a page table and place a pointer to it in a middle page * directory entry. @@ -152,7 +152,6 @@ static void __init fixrange_init(unsigned long start, unsigned long end, static void __init fixaddr_user_init( void) { -#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA long size = FIXADDR_USER_END - FIXADDR_USER_START; pte_t *pte; phys_t p; @@ -174,13 +173,12 @@ static void __init fixaddr_user_init( void) pte = virt_to_kpte(vaddr); pte_set_val(*pte, p, PAGE_READONLY); } -#endif } +#endif void __init paging_init(void) { unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 }; - unsigned long vaddr; empty_zero_page = (unsigned long *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); @@ -191,14 +189,9 @@ void __init paging_init(void) max_zone_pfn[ZONE_NORMAL] = end_iomem >> PAGE_SHIFT; free_area_init(max_zone_pfn); - /* - * Fixed mappings, only the page table structure has to be - * created - mappings will be set by set_fixmap(): - */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, FIXADDR_TOP, swapper_pg_dir); - +#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) fixaddr_user_init(); +#endif } /* diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 30bdc0a87dc85..e5a2d4d897e0c 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -213,14 +213,6 @@ int __uml_cant_sleep(void) { /* Is in_interrupt() really needed? */ } -int user_context(unsigned long sp) -{ - unsigned long stack; - - stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER); - return stack != (unsigned long) current_thread_info(); -} - extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end; void do_uml_exitcalls(void) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 8037a967225d8..79ea97d4797ec 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -264,7 +264,7 @@ EXPORT_SYMBOL(end_iomem); #define MIN_VMALLOC (32 * 1024 * 1024) -static void parse_host_cpu_flags(char *line) +static void __init parse_host_cpu_flags(char *line) { int i; for (i = 0; i < 32*NCAPINTS; i++) { @@ -272,7 +272,8 @@ static void parse_host_cpu_flags(char *line) set_cpu_cap(&boot_cpu_data, i); } } -static void parse_cache_line(char *line) + +static void __init parse_cache_line(char *line) { long res; char *to_parse = strstr(line, ":"); @@ -288,7 +289,7 @@ static void parse_cache_line(char *line) } } -static unsigned long get_top_address(char **envp) +static unsigned long __init get_top_address(char **envp) { unsigned long top_addr = (unsigned long) &top_addr; int i; @@ -376,9 +377,8 @@ int __init linux_main(int argc, char **argv, char **envp) iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; - - if (physmem_size + iomem_size > max_physmem) { - physmem_size = max_physmem - iomem_size; + if (physmem_size > max_physmem) { + physmem_size = max_physmem; os_info("Physical memory size shrunk to %llu bytes\n", physmem_size); } diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index 0afcdeb8995b7..3c63ce19e3bf5 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -19,13 +19,11 @@ #include #include "internal.h" -#define PGD_BOUND (4 * 1024 * 1024) #define STACKSIZE (8 * 1024 * 1024) -#define THREAD_NAME_LEN (256) long elf_aux_hwcap; -static void set_stklim(void) +static void __init set_stklim(void) { struct rlimit lim; @@ -48,7 +46,7 @@ static void last_ditch_exit(int sig) exit(1); } -static void install_fatal_handler(int sig) +static void __init install_fatal_handler(int sig) { struct sigaction action; @@ -73,7 +71,7 @@ static void install_fatal_handler(int sig) #define UML_LIB_PATH ":" OS_LIB_PATH "/uml" -static void setup_env_path(void) +static void __init setup_env_path(void) { char *new_path = NULL; char *old_path = NULL; diff --git a/arch/x86/Makefile.postlink b/arch/x86/Makefile.postlink index fef2e977cc7dc..8b8a68162c940 100644 --- a/arch/x86/Makefile.postlink +++ b/arch/x86/Makefile.postlink @@ -11,6 +11,7 @@ __archpost: -include include/config/auto.conf include $(srctree)/scripts/Kbuild.include +include $(srctree)/scripts/Makefile.lib CMD_RELOCS = arch/x86/tools/relocs OUT_RELOCS = arch/x86/boot/compressed @@ -20,11 +21,6 @@ quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/$@.relocs $(CMD_RELOCS) $@ > $(OUT_RELOCS)/$@.relocs; \ $(CMD_RELOCS) --abs-relocs $@ -quiet_cmd_strip_relocs = RSTRIP $@ - cmd_strip_relocs = \ - $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \ - --remove-section='.rela.*' --remove-section='.rela__*' $@ - # `@true` prevents complaint when there is nothing to be done vmlinux: FORCE diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index ab9f3dd87c805..ab0c78855ecb2 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -84,7 +84,6 @@ extern int hpet_set_rtc_irq_bit(unsigned long bit_mask); extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec); extern int hpet_set_periodic_freq(unsigned long freq); -extern int hpet_rtc_dropped_irq(void); extern int hpet_rtc_timer_init(void); extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id); extern int hpet_register_irq_handler(rtc_irq_handler handler); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index ce4677b8b7356..3b496cdcb74b3 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -37,6 +37,8 @@ typedef struct { */ atomic64_t tlb_gen; + unsigned long next_trim_cpumask; + #ifdef CONFIG_MODIFY_LDT_SYSCALL struct rw_semaphore ldt_usr_sem; struct ldt_struct *ldt; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 2886cb668d7fa..795fdd53bd0a6 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -151,6 +151,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); + mm->context.next_trim_cpumask = jiffies + HZ; #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 69e79fff41b80..02fc2aa06e9e0 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -222,6 +222,7 @@ struct flush_tlb_info { unsigned int initiating_cpu; u8 stride_shift; u8 freed_tables; + u8 trim_cpumask; }; void flush_tlb_local(void); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7c15d6e83c377..dae6a73be40e1 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -226,6 +226,28 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) return 0; } +static int __init +acpi_check_lapic(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_local_apic *processor = NULL; + + processor = (struct acpi_madt_local_apic *)header; + + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + + /* Ignore invalid ID */ + if (processor->id == 0xff) + return 0; + + /* Ignore processors that can not be onlined */ + if (!acpi_is_processor_usable(processor->lapic_flags)) + return 0; + + has_lapic_cpus = true; + return 0; +} + static int __init acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) { @@ -257,7 +279,6 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) processor->processor_id, /* ACPI ID */ processor->lapic_flags & ACPI_MADT_ENABLED); - has_lapic_cpus = true; return 0; } @@ -1026,6 +1047,8 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) static int __init acpi_parse_madt_lapic_entries(void) { int count, x2count = 0; + struct acpi_subtable_proc madt_proc[2]; + int ret; if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; @@ -1034,10 +1057,27 @@ static int __init acpi_parse_madt_lapic_entries(void) acpi_parse_sapic, MAX_LOCAL_APIC); if (!count) { - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, - acpi_parse_lapic, MAX_LOCAL_APIC); - x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, - acpi_parse_x2apic, MAX_LOCAL_APIC); + /* Check if there are valid LAPIC entries */ + acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_check_lapic, MAX_LOCAL_APIC); + + /* + * Enumerate the APIC IDs in the order that they appear in the + * MADT, no matter LAPIC entry or x2APIC entry is used. + */ + memset(madt_proc, 0, sizeof(madt_proc)); + madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC; + madt_proc[0].handler = acpi_parse_lapic; + madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC; + madt_proc[1].handler = acpi_parse_x2apic; + ret = acpi_table_parse_entries_array(ACPI_SIG_MADT, + sizeof(struct acpi_table_madt), + madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC); + if (ret < 0) { + pr_err("Error parsing LAPIC/X2APIC entries\n"); + return ret; + } + count = madt_proc[0].count; + x2count = madt_proc[1].count; } if (!count && !x2count) { pr_err("No LAPIC entries present\n"); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 243843e44e89d..c71b575bf2292 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1854,11 +1854,18 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm) return temp_state; } +__ro_after_init struct mm_struct *poking_mm; +__ro_after_init unsigned long poking_addr; + static inline void unuse_temporary_mm(temp_mm_state_t prev_state) { lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(NULL, prev_state.mm, current); + /* Clear the cpumask, to indicate no TLB flushing is needed anywhere */ + cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(poking_mm)); + /* * Restore the breakpoints if they were disabled before the temporary mm * was loaded. @@ -1867,9 +1874,6 @@ static inline void unuse_temporary_mm(temp_mm_state_t prev_state) hw_breakpoint_restore(); } -__ro_after_init struct mm_struct *poking_mm; -__ro_after_init unsigned long poking_addr; - static void text_poke_memcpy(void *dst, const void *src, size_t len) { memcpy(dst, src, len); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 9182303a50b0e..7f4b2966e15cb 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1382,12 +1382,6 @@ int hpet_set_periodic_freq(unsigned long freq) } EXPORT_SYMBOL_GPL(hpet_set_periodic_freq); -int hpet_rtc_dropped_irq(void) -{ - return is_hpet_enabled(); -} -EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq); - static void hpet_rtc_timer_reinit(void) { unsigned int delta; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ac52255fab01f..296d294142c8d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -7,7 +7,6 @@ #include /* test_thread_flag(), ... */ #include /* task_stack_*(), ... */ #include /* oops_begin/end, ... */ -#include /* search_exception_tables */ #include /* max_low_pfn */ #include /* kfence_handle_page_fault */ #include /* NOKPROBE_SYMBOL, ... */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a2becb85bea79..6cf881a942bbe 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -607,18 +607,15 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, cond_mitigation(tsk); /* - * Stop remote flushes for the previous mm. - * Skip kernel threads; we never send init_mm TLB flushing IPIs, - * but the bitmap manipulation can cause cache line contention. + * Leave this CPU in prev's mm_cpumask. Atomic writes to + * mm_cpumask can be expensive under contention. The CPU + * will be removed lazily at TLB flush time. */ - if (prev != &init_mm) { - VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, - mm_cpumask(prev))); - cpumask_clear_cpu(cpu, mm_cpumask(prev)); - } + VM_WARN_ON_ONCE(prev != &init_mm && !cpumask_test_cpu(cpu, + mm_cpumask(prev))); /* Start receiving IPIs and then read tlb_gen (and LAM below) */ - if (next != &init_mm) + if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next))) cpumask_set_cpu(cpu, mm_cpumask(next)); next_tlb_gen = atomic64_read(&next->context.tlb_gen); @@ -760,10 +757,13 @@ static void flush_tlb_func(void *info) if (!local) { inc_irq_stat(irq_tlb_count); count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + } - /* Can only happen on remote CPUs */ - if (f->mm && f->mm != loaded_mm) - return; + /* The CPU was left in the mm_cpumask of the target mm. Clear it. */ + if (f->mm && f->mm != loaded_mm) { + cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(f->mm)); + trace_tlb_flush(TLB_REMOTE_WRONG_CPU, 0); + return; } if (unlikely(loaded_mm == &init_mm)) @@ -893,9 +893,36 @@ static void flush_tlb_func(void *info) nr_invalidate); } -static bool tlb_is_not_lazy(int cpu, void *data) +static bool should_flush_tlb(int cpu, void *data) +{ + struct flush_tlb_info *info = data; + + /* Lazy TLB will get flushed at the next context switch. */ + if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) + return false; + + /* No mm means kernel memory flush. */ + if (!info->mm) + return true; + + /* The target mm is loaded, and the CPU is not lazy. */ + if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm) + return true; + + /* In cpumask, but not the loaded mm? Periodically remove by flushing. */ + if (info->trim_cpumask) + return true; + + return false; +} + +static bool should_trim_cpumask(struct mm_struct *mm) { - return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu); + if (time_after(jiffies, READ_ONCE(mm->context.next_trim_cpumask))) { + WRITE_ONCE(mm->context.next_trim_cpumask, jiffies + HZ); + return true; + } + return false; } DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); @@ -929,7 +956,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, if (info->freed_tables) on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); else - on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, + on_each_cpu_cond_mask(should_flush_tlb, flush_tlb_func, (void *)info, 1, cpumask); } @@ -980,6 +1007,7 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm, info->freed_tables = freed_tables; info->new_tlb_gen = new_tlb_gen; info->initiating_cpu = smp_processor_id(); + info->trim_cpumask = 0; return info; } @@ -1022,6 +1050,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, * flush_tlb_func_local() directly in this case. */ if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { + info->trim_cpumask = should_trim_cpumask(mm); flush_tlb_multi(mm_cpumask(mm), info); } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { lockdep_assert_irqs_enabled(); diff --git a/arch/x86/um/asm/archparam.h b/arch/x86/um/asm/archparam.h deleted file mode 100644 index c17cf68dda0f1..0000000000000 --- a/arch/x86/um/asm/archparam.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) - * Copyright 2003 PathScale, Inc. - * Licensed under the GPL - */ - -#ifndef __UM_ARCHPARAM_H -#define __UM_ARCHPARAM_H - -#ifdef CONFIG_X86_32 - -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif - -#endif - -#endif diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h index 2dd4ca6713f8b..8f7476ff6e95d 100644 --- a/arch/x86/um/shared/sysdep/ptrace.h +++ b/arch/x86/um/shared/sysdep/ptrace.h @@ -74,8 +74,6 @@ struct uml_pt_regs { #define UPT_FAULTINFO(r) (&(r)->faultinfo) #define UPT_IS_USER(r) ((r)->is_user) -extern int user_context(unsigned long sp); - extern int arch_init_registers(int pid); #endif /* __SYSDEP_X86_PTRACE_H */ diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f1cf7f2909f3a..9ed93d91d754a 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1546,6 +1546,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) struct request_queue *q = disk->queue; struct blkg_policy_data *pd_prealloc = NULL; struct blkcg_gq *blkg, *pinned_blkg = NULL; + unsigned int memflags; int ret; if (blkcg_policy_enabled(q, pol)) @@ -1560,7 +1561,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) return -EINVAL; if (queue_is_mq(q)) - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); retry: spin_lock_irq(&q->queue_lock); @@ -1624,7 +1625,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) spin_unlock_irq(&q->queue_lock); out: if (queue_is_mq(q)) - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); if (pinned_blkg) blkg_put(pinned_blkg); if (pd_prealloc) @@ -1668,12 +1669,13 @@ void blkcg_deactivate_policy(struct gendisk *disk, { struct request_queue *q = disk->queue; struct blkcg_gq *blkg; + unsigned int memflags; if (!blkcg_policy_enabled(q, pol)) return; if (queue_is_mq(q)) - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); mutex_lock(&q->blkcg_mutex); spin_lock_irq(&q->queue_lock); @@ -1697,7 +1699,7 @@ void blkcg_deactivate_policy(struct gendisk *disk, mutex_unlock(&q->blkcg_mutex); if (queue_is_mq(q)) - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); diff --git a/block/blk-core.c b/block/blk-core.c index 32fb28a6372cd..d6c4fa3943b5c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -430,7 +430,6 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) refcount_set(&q->refs, 1); mutex_init(&q->debugfs_mutex); mutex_init(&q->sysfs_lock); - mutex_init(&q->sysfs_dir_lock); mutex_init(&q->limits_lock); mutex_init(&q->rq_qos_mutex); spin_lock_init(&q->queue_lock); diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c index c9eb4241e0483..d479f5481b66a 100644 --- a/block/blk-ia-ranges.c +++ b/block/blk-ia-ranges.c @@ -111,7 +111,6 @@ int disk_register_independent_access_ranges(struct gendisk *disk) struct request_queue *q = disk->queue; int i, ret; - lockdep_assert_held(&q->sysfs_dir_lock); lockdep_assert_held(&q->sysfs_lock); if (!iars) @@ -155,7 +154,6 @@ void disk_unregister_independent_access_ranges(struct gendisk *disk) struct blk_independent_access_ranges *iars = disk->ia_ranges; int i; - lockdep_assert_held(&q->sysfs_dir_lock); lockdep_assert_held(&q->sysfs_lock); if (!iars) @@ -289,7 +287,6 @@ void disk_set_independent_access_ranges(struct gendisk *disk, { struct request_queue *q = disk->queue; - mutex_lock(&q->sysfs_dir_lock); mutex_lock(&q->sysfs_lock); if (iars && !disk_check_ia_ranges(disk, iars)) { kfree(iars); @@ -313,6 +310,5 @@ void disk_set_independent_access_ranges(struct gendisk *disk, disk_register_independent_access_ranges(disk); unlock: mutex_unlock(&q->sysfs_lock); - mutex_unlock(&q->sysfs_dir_lock); } EXPORT_SYMBOL_GPL(disk_set_independent_access_ranges); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index a5894ec9696e7..65a1d4427ccf4 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3224,6 +3224,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, u32 qos[NR_QOS_PARAMS]; bool enable, user; char *body, *p; + unsigned int memflags; int ret; blkg_conf_init(&ctx, input); @@ -3247,7 +3248,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, ioc = q_to_ioc(disk->queue); } - blk_mq_freeze_queue(disk->queue); + memflags = blk_mq_freeze_queue(disk->queue); blk_mq_quiesce_queue(disk->queue); spin_lock_irq(&ioc->lock); @@ -3347,7 +3348,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, wbt_enable_default(disk); blk_mq_unquiesce_queue(disk->queue); - blk_mq_unfreeze_queue(disk->queue); + blk_mq_unfreeze_queue(disk->queue, memflags); blkg_conf_exit(&ctx); return nbytes; @@ -3355,7 +3356,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(disk->queue); - blk_mq_unfreeze_queue(disk->queue); + blk_mq_unfreeze_queue(disk->queue, memflags); ret = -EINVAL; err: @@ -3414,6 +3415,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, { struct blkg_conf_ctx ctx; struct request_queue *q; + unsigned int memflags; struct ioc *ioc; u64 u[NR_I_LCOEFS]; bool user; @@ -3441,7 +3443,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, ioc = q_to_ioc(q); } - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); spin_lock_irq(&ioc->lock); @@ -3493,7 +3495,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); blkg_conf_exit(&ctx); return nbytes; @@ -3502,7 +3504,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); ret = -EINVAL; err: diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index ebb522788d978..42c1e0b9a68f2 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -749,9 +749,11 @@ static void blkiolatency_enable_work_fn(struct work_struct *work) */ enabled = atomic_read(&blkiolat->enable_cnt); if (enabled != blkiolat->enabled) { - blk_mq_freeze_queue(blkiolat->rqos.disk->queue); + unsigned int memflags; + + memflags = blk_mq_freeze_queue(blkiolat->rqos.disk->queue); blkiolat->enabled = enabled; - blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue); + blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue, memflags); } } diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index ad8d6a363f24a..444798c5374f4 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -87,7 +87,6 @@ void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap, return; fallback: - WARN_ON_ONCE(qmap->nr_queues > 1); - blk_mq_clear_mq_map(qmap); + blk_mq_map_queues(qmap); } EXPORT_SYMBOL_GPL(blk_mq_map_hw_queues); diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 156e9bb07abf1..3feeeccf8a992 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -223,30 +223,27 @@ int blk_mq_sysfs_register(struct gendisk *disk) unsigned long i, j; int ret; - lockdep_assert_held(&q->sysfs_dir_lock); - ret = kobject_add(q->mq_kobj, &disk_to_dev(disk)->kobj, "mq"); if (ret < 0) - goto out; + return ret; kobject_uevent(q->mq_kobj, KOBJ_ADD); + mutex_lock(&q->tag_set->tag_list_lock); queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); if (ret) - goto unreg; + goto out_unreg; } + mutex_unlock(&q->tag_set->tag_list_lock); + return 0; - q->mq_sysfs_init_done = true; - -out: - return ret; - -unreg: +out_unreg: queue_for_each_hw_ctx(q, hctx, j) { if (j < i) blk_mq_unregister_hctx(hctx); } + mutex_unlock(&q->tag_set->tag_list_lock); kobject_uevent(q->mq_kobj, KOBJ_REMOVE); kobject_del(q->mq_kobj); @@ -259,15 +256,13 @@ void blk_mq_sysfs_unregister(struct gendisk *disk) struct blk_mq_hw_ctx *hctx; unsigned long i; - lockdep_assert_held(&q->sysfs_dir_lock); - + mutex_lock(&q->tag_set->tag_list_lock); queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); + mutex_unlock(&q->tag_set->tag_list_lock); kobject_uevent(q->mq_kobj, KOBJ_REMOVE); kobject_del(q->mq_kobj); - - q->mq_sysfs_init_done = false; } void blk_mq_sysfs_unregister_hctxs(struct request_queue *q) @@ -275,15 +270,11 @@ void blk_mq_sysfs_unregister_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned long i; - mutex_lock(&q->sysfs_dir_lock); - if (!q->mq_sysfs_init_done) - goto unlock; + if (!blk_queue_registered(q)) + return; queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); - -unlock: - mutex_unlock(&q->sysfs_dir_lock); } int blk_mq_sysfs_register_hctxs(struct request_queue *q) @@ -292,9 +283,8 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q) unsigned long i; int ret = 0; - mutex_lock(&q->sysfs_dir_lock); - if (!q->mq_sysfs_init_done) - goto unlock; + if (!blk_queue_registered(q)) + goto out; queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); @@ -302,8 +292,6 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q) break; } -unlock: - mutex_unlock(&q->sysfs_dir_lock); - +out: return ret; } diff --git a/block/blk-mq.c b/block/blk-mq.c index da39a1cac7022..40490ac880457 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -210,12 +210,12 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout); -void blk_mq_freeze_queue(struct request_queue *q) +void blk_mq_freeze_queue_nomemsave(struct request_queue *q) { blk_freeze_queue_start(q); blk_mq_freeze_queue_wait(q); } -EXPORT_SYMBOL_GPL(blk_mq_freeze_queue); +EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_nomemsave); bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) { @@ -236,12 +236,12 @@ bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) return unfreeze; } -void blk_mq_unfreeze_queue(struct request_queue *q) +void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q) { if (__blk_mq_unfreeze_queue(q, false)) blk_unfreeze_release_lock(q); } -EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); +EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue_nomemrestore); /* * non_owner variant of blk_freeze_queue_start @@ -4223,13 +4223,14 @@ static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set, bool shared) { struct request_queue *q; + unsigned int memflags; lockdep_assert_held(&set->tag_list_lock); list_for_each_entry(q, &set->tag_list, tag_set_list) { - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); queue_set_hctx_shared(q, shared); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } } @@ -4992,6 +4993,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, struct request_queue *q; LIST_HEAD(head); int prev_nr_hw_queues = set->nr_hw_queues; + unsigned int memflags; int i; lockdep_assert_held(&set->tag_list_lock); @@ -5003,8 +5005,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues) return; + memflags = memalloc_noio_save(); list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_freeze_queue(q); + blk_mq_freeze_queue_nomemsave(q); + /* * Switch IO scheduler to 'none', cleaning up the data associated * with the previous scheduler. We will switch back once we are done @@ -5052,7 +5056,8 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, blk_mq_elv_switch_back(&head, q); list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue_nomemrestore(q); + memalloc_noio_restore(memflags); /* Free the excess tags when nr_hw_queues shrink. */ for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++) diff --git a/block/blk-pm.c b/block/blk-pm.c index 42e8420747153..8d3e052f91da1 100644 --- a/block/blk-pm.c +++ b/block/blk-pm.c @@ -89,7 +89,7 @@ int blk_pre_runtime_suspend(struct request_queue *q) if (percpu_ref_is_zero(&q->q_usage_counter)) ret = 0; /* Switch q_usage_counter back to per-cpu mode. */ - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue_nomemrestore(q); if (ret < 0) { spin_lock_irq(&q->queue_lock); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index eb9618cd68adf..d4d4f4dc0e23f 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -299,6 +299,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, const struct rq_qos_ops *ops) { struct request_queue *q = disk->queue; + unsigned int memflags; lockdep_assert_held(&q->rq_qos_mutex); @@ -310,14 +311,14 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, * No IO can be in-flight when adding rqos, so freeze queue, which * is fine since we only support rq_qos for blk-mq queue. */ - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); if (rq_qos_id(q, rqos->id)) goto ebusy; rqos->next = q->rq_qos; q->rq_qos = rqos; - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); if (rqos->ops->debugfs_attrs) { mutex_lock(&q->debugfs_mutex); @@ -327,7 +328,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, return 0; ebusy: - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); return -EBUSY; } @@ -335,17 +336,18 @@ void rq_qos_del(struct rq_qos *rqos) { struct request_queue *q = rqos->disk->queue; struct rq_qos **cur; + unsigned int memflags; lockdep_assert_held(&q->rq_qos_mutex); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { if (*cur == rqos) { *cur = rqos->next; break; } } - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_rqos(rqos); diff --git a/block/blk-settings.c b/block/blk-settings.c index db12396ff5c79..c44dadc35e1ec 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -461,11 +461,12 @@ EXPORT_SYMBOL_GPL(queue_limits_commit_update); int queue_limits_commit_update_frozen(struct request_queue *q, struct queue_limits *lim) { + unsigned int memflags; int ret; - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); ret = queue_limits_commit_update(q, lim); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); return ret; } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e09b455874bfd..6f548a4376aa4 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -681,7 +681,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, struct queue_sysfs_entry *entry = to_queue(attr); struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); struct request_queue *q = disk->queue; - unsigned int noio_flag; + unsigned int memflags; ssize_t res; if (!entry->store_limit && !entry->store) @@ -711,11 +711,9 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, } mutex_lock(&q->sysfs_lock); - blk_mq_freeze_queue(q); - noio_flag = memalloc_noio_save(); + memflags = blk_mq_freeze_queue(q); res = entry->store(disk, page, length); - memalloc_noio_restore(noio_flag); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); mutex_unlock(&q->sysfs_lock); return res; } @@ -764,7 +762,6 @@ int blk_register_queue(struct gendisk *disk) struct request_queue *q = disk->queue; int ret; - mutex_lock(&q->sysfs_dir_lock); kobject_init(&disk->queue_kobj, &blk_queue_ktype); ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue"); if (ret < 0) @@ -805,7 +802,6 @@ int blk_register_queue(struct gendisk *disk) if (q->elevator) kobject_uevent(&q->elevator->kobj, KOBJ_ADD); mutex_unlock(&q->sysfs_lock); - mutex_unlock(&q->sysfs_dir_lock); /* * SCSI probing may synchronously create and destroy a lot of @@ -830,7 +826,6 @@ int blk_register_queue(struct gendisk *disk) mutex_unlock(&q->sysfs_lock); out_put_queue_kobj: kobject_put(&disk->queue_kobj); - mutex_unlock(&q->sysfs_dir_lock); return ret; } @@ -861,7 +856,6 @@ void blk_unregister_queue(struct gendisk *disk) blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q); mutex_unlock(&q->sysfs_lock); - mutex_lock(&q->sysfs_dir_lock); /* * Remove the sysfs attributes before unregistering the queue data * structures that can be modified through sysfs. @@ -878,7 +872,6 @@ void blk_unregister_queue(struct gendisk *disk) /* Now that we've deleted all child objects, we can delete the queue. */ kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE); kobject_del(&disk->queue_kobj); - mutex_unlock(&q->sysfs_dir_lock); blk_debugfs_remove(disk); } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 82dbaefcfa3bf..8d149aff9fd0b 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1202,6 +1202,7 @@ static int blk_throtl_init(struct gendisk *disk) { struct request_queue *q = disk->queue; struct throtl_data *td; + unsigned int memflags; int ret; td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); @@ -1215,7 +1216,7 @@ static int blk_throtl_init(struct gendisk *disk) * Freeze queue before activating policy, to synchronize with IO path, * which is protected by 'q_usage_counter'. */ - blk_mq_freeze_queue(disk->queue); + memflags = blk_mq_freeze_queue(disk->queue); blk_mq_quiesce_queue(disk->queue); q->td = td; @@ -1239,7 +1240,7 @@ static int blk_throtl_init(struct gendisk *disk) out: blk_mq_unquiesce_queue(disk->queue); - blk_mq_unfreeze_queue(disk->queue); + blk_mq_unfreeze_queue(disk->queue, memflags); return ret; } diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 9d08a54c201ee..761ea662ddc34 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1717,9 +1717,10 @@ int blk_revalidate_disk_zones(struct gendisk *disk) else pr_warn("%s: failed to revalidate zones\n", disk->disk_name); if (ret) { - blk_mq_freeze_queue(q); + unsigned int memflags = blk_mq_freeze_queue(q); + disk_free_zone_resources(disk); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } return ret; diff --git a/block/elevator.c b/block/elevator.c index b81216c48b6bc..cd2ce49216010 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -570,6 +570,7 @@ static struct elevator_type *elevator_get_default(struct request_queue *q) void elevator_init_mq(struct request_queue *q) { struct elevator_type *e; + unsigned int memflags; int err; WARN_ON_ONCE(blk_queue_registered(q)); @@ -590,13 +591,13 @@ void elevator_init_mq(struct request_queue *q) * * Disk isn't added yet, so verifying queue lock only manually. */ - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_cancel_work_sync(q); err = blk_mq_init_sched(q, e); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); if (err) { pr_warn("\"%s\" elevator initialization failed, " @@ -614,11 +615,12 @@ void elevator_init_mq(struct request_queue *q) */ int elevator_switch(struct request_queue *q, struct elevator_type *new_e) { + unsigned int memflags; int ret; lockdep_assert_held(&q->sysfs_lock); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); if (q->elevator) { @@ -639,7 +641,7 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e) out_unfreeze: blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); if (ret) { pr_warn("elv: switch to \"%s\" failed, falling back to \"none\"\n", @@ -651,9 +653,11 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e) void elevator_disable(struct request_queue *q) { + unsigned int memflags; + lockdep_assert_held(&q->sysfs_lock); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); elv_unregister_queue(q); @@ -664,7 +668,7 @@ void elevator_disable(struct request_queue *q) blk_add_trace_msg(q, "elv switch: none"); blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } /* diff --git a/block/fops.c b/block/fops.c index 6d5c4fc5a2168..be9f1dbea9ce0 100644 --- a/block/fops.c +++ b/block/fops.c @@ -783,11 +783,12 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) file_accessed(iocb->ki_filp); ret = blkdev_direct_IO(iocb, to); - if (ret >= 0) { + if (ret > 0) { iocb->ki_pos += ret; count -= ret; } - iov_iter_revert(to, count - iov_iter_count(to)); + if (ret != -EIOCBQUEUED) + iov_iter_revert(to, count - iov_iter_count(to)); if (ret < 0 || !count) goto reexpand; } diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index 59fffe34c9d04..00ac0d7bb8c9f 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -95,9 +95,13 @@ int __init fix_pxm_node_maps(int max_nid) int i, j, index = -1, count = 0; nodemask_t nodes_to_enable; - if (numa_off || srat_disabled()) + if (numa_off) return -1; + /* no or incomplete node/PXM mapping set, nothing to do */ + if (srat_disabled()) + return 0; + /* find fake nodes PXM mapping */ for (i = 0; i < MAX_NUMNODES; i++) { if (node_to_pxm_map[i] != PXM_INVAL) { @@ -117,6 +121,11 @@ int __init fix_pxm_node_maps(int max_nid) } } } + if (index == -1) { + pr_debug("No node/PXM mapping has been set\n"); + /* nothing more to be done */ + return 0; + } if (WARN(index != max_nid, "%d max nid when expected %d\n", index, max_nid)) return -1; diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index cb45ef5240dab..068c1612660bc 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -407,6 +407,19 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), }, + { + /* Vexia Edu Atla 10 tablet 5V version */ + .matches = { + /* Having all 3 of these not set is somewhat unique */ + DMI_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), + DMI_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."), + DMI_MATCH(DMI_BOARD_NAME, "To be filled by O.E.M."), + /* Above strings are too generic, also match on BIOS date */ + DMI_MATCH(DMI_BIOS_DATE, "05/14/2015"), + }, + .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | + ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), + }, { /* Vexia Edu Atla 10 tablet 9V version */ .matches = { diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c085dd81ebe7f..63ec2f2184319 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4143,6 +4143,10 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { { "Samsung SSD 860*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, + { "Samsung SSD 870 QVO*", NULL, ATA_QUIRK_NO_NCQ_TRIM | + ATA_QUIRK_ZERO_AFTER_TRIM | + ATA_QUIRK_NO_NCQ_ON_ATI | + ATA_QUIRK_NOLPM }, { "Samsung SSD 870*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 67f277e1c3bf3..5a46c066abc36 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -601,7 +601,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; struct page *page; - unsigned int offset; + unsigned int offset, count; if (!qc->cursg) { qc->curbytes = qc->nbytes; @@ -617,25 +617,27 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) page = nth_page(page, (offset >> PAGE_SHIFT)); offset %= PAGE_SIZE; - trace_ata_sff_pio_transfer_data(qc, offset, qc->sect_size); + /* don't overrun current sg */ + count = min(qc->cursg->length - qc->cursg_ofs, qc->sect_size); + + trace_ata_sff_pio_transfer_data(qc, offset, count); /* * Split the transfer when it splits a page boundary. Note that the * split still has to be dword aligned like all ATA data transfers. */ WARN_ON_ONCE(offset % 4); - if (offset + qc->sect_size > PAGE_SIZE) { + if (offset + count > PAGE_SIZE) { unsigned int split_len = PAGE_SIZE - offset; ata_pio_xfer(qc, page, offset, split_len); - ata_pio_xfer(qc, nth_page(page, 1), 0, - qc->sect_size - split_len); + ata_pio_xfer(qc, nth_page(page, 1), 0, count - split_len); } else { - ata_pio_xfer(qc, page, offset, qc->sect_size); + ata_pio_xfer(qc, page, offset, count); } - qc->curbytes += qc->sect_size; - qc->cursg_ofs += qc->sect_size; + qc->curbytes += count; + qc->cursg_ofs += count; if (qc->cursg_ofs == qc->cursg->length) { qc->cursg = sg_next(qc->cursg); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index fdaa24bb641a0..a7e5118498758 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -599,6 +599,7 @@ CPU_SHOW_VULN_FALLBACK(retbleed); CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow); CPU_SHOW_VULN_FALLBACK(gds); CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); +CPU_SHOW_VULN_FALLBACK(ghostwrite); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -614,6 +615,7 @@ static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); +static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -630,6 +632,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_spec_rstack_overflow.attr, &dev_attr_gather_data_sampling.attr, &dev_attr_reg_file_data_sampling.attr, + &dev_attr_ghostwrite.attr, NULL }; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index cbc9a7a75def7..d497d448e4b2a 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -656,13 +656,15 @@ static void device_resume_noirq(struct device *dev, pm_message_t state, bool asy * so change its status accordingly. * * Otherwise, the device is going to be resumed, so set its PM-runtime - * status to "active", but do that only if DPM_FLAG_SMART_SUSPEND is set - * to avoid confusing drivers that don't use it. + * status to "active" unless its power.set_active flag is clear, in + * which case it is not necessary to update its PM-runtime status. */ - if (skip_resume) + if (skip_resume) { pm_runtime_set_suspended(dev); - else if (dev_pm_skip_suspend(dev)) + } else if (dev->power.set_active) { pm_runtime_set_active(dev); + dev->power.set_active = false; + } if (dev->pm_domain) { info = "noirq power domain "; @@ -1189,18 +1191,24 @@ static pm_message_t resume_event(pm_message_t sleep_state) return PMSG_ON; } -static void dpm_superior_set_must_resume(struct device *dev) +static void dpm_superior_set_must_resume(struct device *dev, bool set_active) { struct device_link *link; int idx; - if (dev->parent) + if (dev->parent) { dev->parent->power.must_resume = true; + if (set_active) + dev->parent->power.set_active = true; + } idx = device_links_read_lock(); - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) + list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) { link->supplier->power.must_resume = true; + if (set_active) + link->supplier->power.set_active = true; + } device_links_read_unlock(idx); } @@ -1278,8 +1286,11 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state, bool asy dev->power.may_skip_resume)) dev->power.must_resume = true; - if (dev->power.must_resume) - dpm_superior_set_must_resume(dev); + if (dev->power.must_resume) { + dev->power.set_active = dev->power.set_active || + dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); + dpm_superior_set_must_resume(dev, dev->power.set_active); + } Complete: complete_all(&dev->power.completion); diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 3523dd82d7a00..4db7f6ce8ade0 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -226,10 +226,11 @@ aoedev_downdev(struct aoedev *d) /* fast fail all pending I/O */ if (d->blkq) { /* UP is cleared, freeze+quiesce to insure all are errored */ - blk_mq_freeze_queue(d->blkq); + unsigned int memflags = blk_mq_freeze_queue(d->blkq); + blk_mq_quiesce_queue(d->blkq); blk_mq_unquiesce_queue(d->blkq); - blk_mq_unfreeze_queue(d->blkq); + blk_mq_unfreeze_queue(d->blkq, memflags); } if (d->gd) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 110f9aca2667d..a81ade622a01d 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -746,6 +746,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) unsigned char *p; int sect, nsect; unsigned long flags; + unsigned int memflags; int ret; if (type) { @@ -758,7 +759,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) } q = unit[drive].disk[type]->queue; - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); local_irq_save(flags); @@ -817,7 +818,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) ret = FormatError ? -EIO : 0; out: blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); return ret; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1ec7417c7f005..c05fe27a96b64 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -586,6 +586,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, { struct file *file = fget(arg); struct file *old_file; + unsigned int memflags; int error; bool partscan; bool is_loop; @@ -623,14 +624,14 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, /* and ... switch */ disk_force_media_change(lo->lo_disk); - blk_mq_freeze_queue(lo->lo_queue); + memflags = blk_mq_freeze_queue(lo->lo_queue); mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); lo->lo_backing_file = file; lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping); mapping_set_gfp_mask(file->f_mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); loop_update_dio(lo); - blk_mq_unfreeze_queue(lo->lo_queue); + blk_mq_unfreeze_queue(lo->lo_queue, memflags); partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; loop_global_unlock(lo, is_loop); @@ -1255,6 +1256,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) int err; bool partscan = false; bool size_changed = false; + unsigned int memflags; err = mutex_lock_killable(&lo->lo_mutex); if (err) @@ -1272,7 +1274,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) } /* I/O needs to be drained before changing lo_offset or lo_sizelimit */ - blk_mq_freeze_queue(lo->lo_queue); + memflags = blk_mq_freeze_queue(lo->lo_queue); err = loop_set_status_from_info(lo, info); if (err) @@ -1281,8 +1283,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) partscan = !(lo->lo_flags & LO_FLAGS_PARTSCAN) && (info->lo_flags & LO_FLAGS_PARTSCAN); - lo->lo_flags &= ~(LOOP_SET_STATUS_SETTABLE_FLAGS | - LOOP_SET_STATUS_CLEARABLE_FLAGS); + lo->lo_flags &= ~LOOP_SET_STATUS_CLEARABLE_FLAGS; lo->lo_flags |= (info->lo_flags & LOOP_SET_STATUS_SETTABLE_FLAGS); if (size_changed) { @@ -1295,7 +1296,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) loop_update_dio(lo); out_unfreeze: - blk_mq_unfreeze_queue(lo->lo_queue); + blk_mq_unfreeze_queue(lo->lo_queue, memflags); if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); out_unlock: @@ -1447,6 +1448,7 @@ static int loop_set_capacity(struct loop_device *lo) static int loop_set_dio(struct loop_device *lo, unsigned long arg) { bool use_dio = !!arg; + unsigned int memflags; if (lo->lo_state != Lo_bound) return -ENXIO; @@ -1460,18 +1462,19 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) vfs_fsync(lo->lo_backing_file, 0); } - blk_mq_freeze_queue(lo->lo_queue); + memflags = blk_mq_freeze_queue(lo->lo_queue); if (use_dio) lo->lo_flags |= LO_FLAGS_DIRECT_IO; else lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; - blk_mq_unfreeze_queue(lo->lo_queue); + blk_mq_unfreeze_queue(lo->lo_queue, memflags); return 0; } static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { struct queue_limits lim; + unsigned int memflags; int err = 0; if (lo->lo_state != Lo_bound) @@ -1486,10 +1489,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) lim = queue_limits_start_update(lo->lo_queue); loop_update_limits(lo, &lim, arg); - blk_mq_freeze_queue(lo->lo_queue); + memflags = blk_mq_freeze_queue(lo->lo_queue); err = queue_limits_commit_update(lo->lo_queue, &lim); loop_update_dio(lo); - blk_mq_unfreeze_queue(lo->lo_queue); + blk_mq_unfreeze_queue(lo->lo_queue, memflags); return err; } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index b63a0f29a54ab..7bdc7eb808ea9 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1234,6 +1234,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, struct socket *sock; struct nbd_sock **socks; struct nbd_sock *nsock; + unsigned int memflags; int err; /* Arg will be cast to int, check it to avoid overflow */ @@ -1247,7 +1248,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, * We need to make sure we don't get any errant requests while we're * reallocating the ->socks array. */ - blk_mq_freeze_queue(nbd->disk->queue); + memflags = blk_mq_freeze_queue(nbd->disk->queue); if (!netlink && !nbd->task_setup && !test_bit(NBD_RT_BOUND, &config->runtime_flags)) @@ -1288,12 +1289,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, INIT_WORK(&nsock->work, nbd_pending_cmd_work); socks[config->num_connections++] = nsock; atomic_inc(&config->live_connections); - blk_mq_unfreeze_queue(nbd->disk->queue); + blk_mq_unfreeze_queue(nbd->disk->queue, memflags); return 0; put_socket: - blk_mq_unfreeze_queue(nbd->disk->queue); + blk_mq_unfreeze_queue(nbd->disk->queue, memflags); sockfd_put(sock); return err; } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5b393e4a1ddfc..faafd7ff43d6e 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -7281,9 +7281,10 @@ static ssize_t do_rbd_remove(const char *buf, size_t count) * Prevent new IO from being queued and wait for existing * IO to complete/fail. */ - blk_mq_freeze_queue(rbd_dev->disk->queue); + unsigned int memflags = blk_mq_freeze_queue(rbd_dev->disk->queue); + blk_mark_disk_dead(rbd_dev->disk); - blk_mq_unfreeze_queue(rbd_dev->disk->queue); + blk_mq_unfreeze_queue(rbd_dev->disk->queue, memflags); } del_gendisk(rbd_dev->disk); diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index e4d1e7284daee..33b3bc99d532a 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -1113,6 +1113,7 @@ static void vdc_requeue_inflight(struct vdc_port *port) static void vdc_queue_drain(struct vdc_port *port) { struct request_queue *q = port->disk->queue; + unsigned int memflags; /* * Mark the queue as draining, then freeze/quiesce to ensure @@ -1121,12 +1122,12 @@ static void vdc_queue_drain(struct vdc_port *port) port->drain = 1; spin_unlock_irq(&port->vio.lock); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); spin_lock_irq(&port->vio.lock); port->drain = 0; - blk_mq_unquiesce_queue(q); + blk_mq_unquiesce_queue(q, memflags); blk_mq_unfreeze_queue(q); } diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 9914153b365b6..3aedcb5add61f 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -840,6 +840,7 @@ static int grab_drive(struct floppy_state *fs, enum swim_state state, static void release_drive(struct floppy_state *fs) { struct request_queue *q = disks[fs->index]->queue; + unsigned int memflags; unsigned long flags; swim3_dbg("%s", "-> release drive\n"); @@ -848,10 +849,10 @@ static void release_drive(struct floppy_state *fs) fs->state = idle; spin_unlock_irqrestore(&swim3_lock, flags); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } static int fd_eject(struct floppy_state *fs) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index bfbe391c20fee..6a61ec35f4260 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1583,11 +1583,12 @@ static int virtblk_freeze_priv(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; struct request_queue *q = vblk->disk->queue; + unsigned int memflags; /* Ensure no requests in virtqueues before deleting vqs. */ - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue_nowait(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); /* Ensure we don't receive any more interrupts */ virtio_reset_device(vdev); diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 704e84d006390..0ee5c691fb36b 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -17,7 +17,7 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM config ARM_AIROHA_SOC_CPUFREQ tristate "Airoha EN7581 SoC CPUFreq support" - depends on ARCH_AIROHA || COMPILE_TEST + depends on (ARCH_AIROHA && OF) || COMPILE_TEST select PM_OPP default ARCH_AIROHA help diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 302df42d68875..463b69a2dff52 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -909,11 +909,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); - if (acpi_cpufreq_driver.set_boost) { - set_boost(policy, acpi_cpufreq_driver.boost_enabled); - policy->boost_enabled = acpi_cpufreq_driver.boost_enabled; - } - return result; err_unreg: diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 2486a6c5256ae..8f512448382f4 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -611,7 +611,8 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * Section 8.4.7.1.1.5 of ACPI 6.1 spec) */ policy->min = cppc_perf_to_khz(caps, caps->lowest_nonlinear_perf); - policy->max = cppc_perf_to_khz(caps, caps->nominal_perf); + policy->max = cppc_perf_to_khz(caps, policy->boost_enabled ? + caps->highest_perf : caps->nominal_perf); /* * Set cpuinfo.min_freq to Lowest to make the full range of performance @@ -619,7 +620,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * nonlinear perf */ policy->cpuinfo.min_freq = cppc_perf_to_khz(caps, caps->lowest_perf); - policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->nominal_perf); + policy->cpuinfo.max_freq = policy->max; policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu); policy->shared_type = cpu_data->shared_type; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1076e37a18ad0..e0048856eceee 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1410,10 +1410,6 @@ static int cpufreq_online(unsigned int cpu) goto out_free_policy; } - /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ - if (cpufreq_boost_enabled() && policy_has_boost_freq(policy)) - policy->boost_enabled = true; - /* * The initialization has succeeded and the policy is online. * If there is a problem with its frequency table, take it @@ -1476,6 +1472,10 @@ static int cpufreq_online(unsigned int cpu) blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); + } else { + ret = freq_qos_update_request(policy->max_freq_req, policy->max); + if (ret < 0) + goto out_destroy_policy; } if (cpufreq_driver->get && has_target()) { @@ -1570,6 +1570,18 @@ static int cpufreq_online(unsigned int cpu) if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); + /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ + if (policy->boost_enabled != cpufreq_boost_enabled()) { + policy->boost_enabled = cpufreq_boost_enabled(); + ret = cpufreq_driver->set_boost(policy, policy->boost_enabled); + if (ret) { + /* If the set_boost fails, the online operation is not affected */ + pr_info("%s: CPU%d: Cannot %s BOOST\n", __func__, policy->cpu, + policy->boost_enabled ? "enable" : "disable"); + policy->boost_enabled = !policy->boost_enabled; + } + } + pr_debug("initialization complete\n"); return 0; diff --git a/drivers/cpufreq/s3c64xx-cpufreq.c b/drivers/cpufreq/s3c64xx-cpufreq.c index c6bdfc308e990..9cef715280762 100644 --- a/drivers/cpufreq/s3c64xx-cpufreq.c +++ b/drivers/cpufreq/s3c64xx-cpufreq.c @@ -24,6 +24,7 @@ struct s3c64xx_dvfs { unsigned int vddarm_max; }; +#ifdef CONFIG_REGULATOR static struct s3c64xx_dvfs s3c64xx_dvfs_table[] = { [0] = { 1000000, 1150000 }, [1] = { 1050000, 1150000 }, @@ -31,6 +32,7 @@ static struct s3c64xx_dvfs s3c64xx_dvfs_table[] = { [3] = { 1200000, 1350000 }, [4] = { 1300000, 1350000 }, }; +#endif static struct cpufreq_frequency_table s3c64xx_freq_table[] = { { 0, 0, 66000 }, @@ -51,15 +53,16 @@ static struct cpufreq_frequency_table s3c64xx_freq_table[] = { static int s3c64xx_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) { - struct s3c64xx_dvfs *dvfs; - unsigned int old_freq, new_freq; + unsigned int new_freq = s3c64xx_freq_table[index].frequency; int ret; +#ifdef CONFIG_REGULATOR + struct s3c64xx_dvfs *dvfs; + unsigned int old_freq; + old_freq = clk_get_rate(policy->clk) / 1000; - new_freq = s3c64xx_freq_table[index].frequency; dvfs = &s3c64xx_dvfs_table[s3c64xx_freq_table[index].driver_data]; -#ifdef CONFIG_REGULATOR if (vddarm && new_freq > old_freq) { ret = regulator_set_voltage(vddarm, dvfs->vddarm_min, diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 173ddcac540ad..8fe5e1b47ef90 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -41,11 +41,7 @@ * idle state 2, the third bin spans from the target residency of idle state 2 * up to, but not including, the target residency of idle state 3 and so on. * The last bin spans from the target residency of the deepest idle state - * supplied by the driver to the scheduler tick period length or to infinity if - * the tick period length is less than the target residency of that state. In - * the latter case, the governor also counts events with the measured idle - * duration between the tick period length and the target residency of the - * deepest idle state. + * supplied by the driver to infinity. * * Two metrics called "hits" and "intercepts" are associated with each bin. * They are updated every time before selecting an idle state for the given CPU @@ -60,6 +56,10 @@ * into by the sleep length (these events are also referred to as "intercepts" * below). * + * The governor also counts "intercepts" with the measured idle duration below + * the tick period length and uses this information when deciding whether or not + * to stop the scheduler tick. + * * In order to select an idle state for a CPU, the governor takes the following * steps (modulo the possible latency constraint that must be taken into account * too): @@ -105,6 +105,12 @@ #include "gov.h" +/* + * Idle state exit latency threshold used for deciding whether or not to check + * the time till the closest expected timer event. + */ +#define LATENCY_THRESHOLD_NS (RESIDENCY_THRESHOLD_NS / 2) + /* * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value * is used for decreasing metrics on a regular basis. @@ -124,18 +130,20 @@ struct teo_bin { /** * struct teo_cpu - CPU data used by the TEO cpuidle governor. - * @time_span_ns: Time between idle state selection and post-wakeup update. * @sleep_length_ns: Time till the closest timer event (at the selection time). * @state_bins: Idle state data bins for this CPU. * @total: Grand total of the "intercepts" and "hits" metrics for all bins. - * @tick_hits: Number of "hits" after TICK_NSEC. + * @tick_intercepts: "Intercepts" before TICK_NSEC. + * @short_idles: Wakeups after short idle periods. + * @artificial_wakeup: Set if the wakeup has been triggered by a safety net. */ struct teo_cpu { - s64 time_span_ns; s64 sleep_length_ns; struct teo_bin state_bins[CPUIDLE_STATE_MAX]; unsigned int total; - unsigned int tick_hits; + unsigned int tick_intercepts; + unsigned int short_idles; + bool artificial_wakeup; }; static DEFINE_PER_CPU(struct teo_cpu, teo_cpus); @@ -152,23 +160,17 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) s64 target_residency_ns; u64 measured_ns; - if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) { + cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT; + + if (cpu_data->artificial_wakeup) { /* - * One of the safety nets has triggered or the wakeup was close - * enough to the closest timer event expected at the idle state - * selection time to be discarded. + * If one of the safety nets has triggered, assume that this + * might have been a long sleep. */ measured_ns = U64_MAX; } else { u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns; - /* - * The computations below are to determine whether or not the - * (saved) time till the next timer event and the measured idle - * duration fall into the same "bin", so use last_residency_ns - * for that instead of time_span_ns which includes the cpuidle - * overhead. - */ measured_ns = dev->last_residency_ns; /* * The delay between the wakeup and the first instruction @@ -176,14 +178,16 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * time, so take 1/2 of the exit latency as a very rough * approximation of the average of it. */ - if (measured_ns >= lat_ns) + if (measured_ns >= lat_ns) { measured_ns -= lat_ns / 2; - else + if (measured_ns < RESIDENCY_THRESHOLD_NS) + cpu_data->short_idles += PULSE; + } else { measured_ns /= 2; + cpu_data->short_idles += PULSE; + } } - cpu_data->total = 0; - /* * Decay the "hits" and "intercepts" metrics for all of the bins and * find the bins that the sleep length and the measured idle duration @@ -195,8 +199,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) bin->hits -= bin->hits >> DECAY_SHIFT; bin->intercepts -= bin->intercepts >> DECAY_SHIFT; - cpu_data->total += bin->hits + bin->intercepts; - target_residency_ns = drv->states[i].target_residency_ns; if (target_residency_ns <= cpu_data->sleep_length_ns) { @@ -206,38 +208,22 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) } } - /* - * If the deepest state's target residency is below the tick length, - * make a record of it to help teo_select() decide whether or not - * to stop the tick. This effectively adds an extra hits-only bin - * beyond the last state-related one. - */ - if (target_residency_ns < TICK_NSEC) { - cpu_data->tick_hits -= cpu_data->tick_hits >> DECAY_SHIFT; - - cpu_data->total += cpu_data->tick_hits; - - if (TICK_NSEC <= cpu_data->sleep_length_ns) { - idx_timer = drv->state_count; - if (TICK_NSEC <= measured_ns) { - cpu_data->tick_hits += PULSE; - goto end; - } - } - } - + cpu_data->tick_intercepts -= cpu_data->tick_intercepts >> DECAY_SHIFT; /* * If the measured idle duration falls into the same bin as the sleep * length, this is a "hit", so update the "hits" metric for that bin. * Otherwise, update the "intercepts" metric for the bin fallen into by * the measured idle duration. */ - if (idx_timer == idx_duration) + if (idx_timer == idx_duration) { cpu_data->state_bins[idx_timer].hits += PULSE; - else + } else { cpu_data->state_bins[idx_duration].intercepts += PULSE; + if (TICK_NSEC <= measured_ns) + cpu_data->tick_intercepts += PULSE; + } -end: + cpu_data->total -= cpu_data->total >> DECAY_SHIFT; cpu_data->total += PULSE; } @@ -285,14 +271,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); s64 latency_req = cpuidle_governor_latency_req(dev->cpu); ktime_t delta_tick = TICK_NSEC / 2; - unsigned int tick_intercept_sum = 0; unsigned int idx_intercept_sum = 0; unsigned int intercept_sum = 0; unsigned int idx_hit_sum = 0; unsigned int hit_sum = 0; int constraint_idx = 0; int idx0 = 0, idx = -1; - int prev_intercept_idx; s64 duration_ns; int i; @@ -301,10 +285,14 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, dev->last_state_idx = -1; } - cpu_data->time_span_ns = local_clock(); /* - * Set the expected sleep length to infinity in case of an early - * return. + * Set the sleep length to infinity in case the invocation of + * tick_nohz_get_sleep_length() below is skipped, in which case it won't + * be known whether or not the subsequent wakeup is caused by a timer. + * It is generally fine to count the wakeup as an intercept then, except + * for the cases when the CPU is mostly woken up by timers and there may + * be opportunities to ask for a deeper idle state when no imminent + * timers are scheduled which may be missed. */ cpu_data->sleep_length_ns = KTIME_MAX; @@ -360,17 +348,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, goto end; } - tick_intercept_sum = intercept_sum + - cpu_data->state_bins[drv->state_count-1].intercepts; - /* * If the sum of the intercepts metric for all of the idle states * shallower than the current candidate one (idx) is greater than the * sum of the intercepts and hits metrics for the candidate state and - * all of the deeper states a shallower idle state is likely to be a + * all of the deeper states, a shallower idle state is likely to be a * better choice. */ - prev_intercept_idx = idx; if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) { int first_suitable_idx = idx; @@ -396,41 +380,38 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * first enabled state that is deep enough. */ if (teo_state_ok(i, drv) && - !dev->states_usage[i].disable) + !dev->states_usage[i].disable) { idx = i; - else - idx = first_suitable_idx; - + break; + } + idx = first_suitable_idx; break; } if (dev->states_usage[i].disable) continue; - if (!teo_state_ok(i, drv)) { + if (teo_state_ok(i, drv)) { /* - * The current state is too shallow, but if an - * alternative candidate state has been found, - * it may still turn out to be a better choice. + * The current state is deep enough, but still + * there may be a better one. */ - if (first_suitable_idx != idx) - continue; - - break; + first_suitable_idx = i; + continue; } - first_suitable_idx = i; + /* + * The current state is too shallow, so if no suitable + * states other than the initial candidate have been + * found, give up (the remaining states to check are + * shallower still), but otherwise the first suitable + * state other than the initial candidate may turn out + * to be preferable. + */ + if (first_suitable_idx == idx) + break; } } - if (!idx && prev_intercept_idx) { - /* - * We have to query the sleep length here otherwise we don't - * know after wakeup if our guess was correct. - */ - duration_ns = tick_nohz_get_sleep_length(&delta_tick); - cpu_data->sleep_length_ns = duration_ns; - goto out_tick; - } /* * If there is a latency constraint, it may be necessary to select an @@ -440,24 +421,39 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, idx = constraint_idx; /* - * Skip the timers check if state 0 is the current candidate one, - * because an immediate non-timer wakeup is expected in that case. + * If either the candidate state is state 0 or its target residency is + * low enough, there is basically nothing more to do, but if the sleep + * length is not updated, the subsequent wakeup will be counted as an + * "intercept" which may be problematic in the cases when timer wakeups + * are dominant. Namely, it may effectively prevent deeper idle states + * from being selected at one point even if no imminent timers are + * scheduled. + * + * However, frequent timers in the RESIDENCY_THRESHOLD_NS range on one + * CPU are unlikely (user space has a default 50 us slack value for + * hrtimers and there are relatively few timers with a lower deadline + * value in the kernel), and even if they did happen, the potential + * benefit from using a deep idle state in that case would be + * questionable anyway for latency reasons. Thus if the measured idle + * duration falls into that range in the majority of cases, assume + * non-timer wakeups to be dominant and skip updating the sleep length + * to reduce latency. + * + * Also, if the latency constraint is sufficiently low, it will force + * shallow idle states regardless of the wakeup type, so the sleep + * length need not be known in that case. */ - if (!idx) - goto out_tick; - - /* - * If state 0 is a polling one, check if the target residency of - * the current candidate state is low enough and skip the timers - * check in that case too. - */ - if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) && - drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) + if ((!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) && + (2 * cpu_data->short_idles >= cpu_data->total || + latency_req < LATENCY_THRESHOLD_NS)) goto out_tick; duration_ns = tick_nohz_get_sleep_length(&delta_tick); cpu_data->sleep_length_ns = duration_ns; + if (!idx) + goto out_tick; + /* * If the closest expected timer is before the target residency of the * candidate state, a shallower one needs to be found. @@ -474,7 +470,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * total wakeup events, do not stop the tick. */ if (drv->states[idx].target_residency_ns < TICK_NSEC && - tick_intercept_sum > cpu_data->total / 2 + cpu_data->total / 8) + cpu_data->tick_intercepts > cpu_data->total / 2 + cpu_data->total / 8) duration_ns = TICK_NSEC / 2; end: @@ -511,17 +507,16 @@ static void teo_reflect(struct cpuidle_device *dev, int state) struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); dev->last_state_idx = state; - /* - * If the wakeup was not "natural", but triggered by one of the safety - * nets, assume that the CPU might have been idle for the entire sleep - * length time. - */ if (dev->poll_time_limit || (tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) { + /* + * The wakeup was not "genuine", but triggered by one of the + * safety nets. + */ dev->poll_time_limit = false; - cpu_data->time_span_ns = cpu_data->sleep_length_ns; + cpu_data->artificial_wakeup = true; } else { - cpu_data->time_span_ns = local_clock() - cpu_data->time_span_ns; + cpu_data->artificial_wakeup = false; } } diff --git a/drivers/firmware/cirrus/Kconfig b/drivers/firmware/cirrus/Kconfig index ee09269c63b51..0a883091259a2 100644 --- a/drivers/firmware/cirrus/Kconfig +++ b/drivers/firmware/cirrus/Kconfig @@ -6,15 +6,13 @@ config FW_CS_DSP config FW_CS_DSP_KUNIT_TEST_UTILS tristate - depends on KUNIT - select REGMAP + depends on KUNIT && REGMAP select FW_CS_DSP config FW_CS_DSP_KUNIT_TEST tristate "KUnit tests for Cirrus Logic cs_dsp" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && REGMAP default KUNIT_ALL_TESTS - select REGMAP select FW_CS_DSP select FW_CS_DSP_KUNIT_TEST_UTILS help diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index c1772f44b1d74..2523221a2519d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -4021,17 +4021,6 @@ static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade if (def != data) WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); - - data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); - data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - - /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ - if (adev->sdma.num_instances > 1) { - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); - } } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 1405e8affd484..d4593374e7a1e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2325,9 +2325,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, */ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) { + while (halt_if_hws_hang) + schedule(); if (reset_queues_on_hws_hang(dqm)) { - while (halt_if_hws_hang) - schedule(); dqm->is_hws_hang = true; kfd_hws_hang(dqm); retval = -ETIME; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 9df56f8e09f91..bcddd989c7f39 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -86,9 +86,12 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) if (pdd->already_dequeued) return; - + /* The MES context flush needs to filter out the case which the + * KFD process is created without setting up the MES context and + * queue for creating a compute queue. + */ dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); - if (dev->kfd->shared_resources.enable_mes && + if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr && down_read_trylock(&dev->adev->reset_domain->sem)) { amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b0e66c05d8111..ac3fd81fecef2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12326,10 +12326,14 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || sink->sink_signal == SIGNAL_TYPE_EDP)) { - amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; - amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; - if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) - freesync_capable = true; + if (amdgpu_dm_connector->dc_link && + amdgpu_dm_connector->dc_link->dpcd_caps.allow_invalid_MSA_timing_param) { + amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; + amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; + if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) + freesync_capable = true; + } + parse_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info); if (vsdb_info.replay_mode) { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 1f974ea3b0c65..1648226586e22 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -89,7 +89,7 @@ #define mmCLK1_CLK4_ALLOW_DS 0x16EA8 #define mmCLK1_CLK5_ALLOW_DS 0x16EB1 -#define mmCLK5_spll_field_8 0x1B04B +#define mmCLK5_spll_field_8 0x1B24B #define mmDENTIST_DISPCLK_CNTL 0x0124 #define regDENTIST_DISPCLK_CNTL 0x0064 #define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index f6b0293543275..83163d7c7f001 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1732,7 +1732,6 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity; gpu_metrics->average_umc_activity = metrics.AverageUclkActivity; - gpu_metrics->average_mm_activity = 0; /* Valid power data is available only from primary die */ if (aldebaran_is_primary(smu)) { diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index eec95c724b25b..fc438f4457713 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -756,6 +756,7 @@ config I2C_IMX config I2C_IMX_LPI2C tristate "IMX Low Power I2C interface" depends on ARCH_MXC || COMPILE_TEST + select I2C_SLAVE help Say Y here if you want to use the Low Power IIC bus controller on the Freescale i.MX processors. diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index e2c2a2ef1c12c..5546184df05f9 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -583,6 +583,9 @@ static int i2c_device_probe(struct device *dev) goto err_detach_pm_domain; } + client->debugfs = debugfs_create_dir(dev_name(&client->dev), + client->adapter->debugfs); + if (driver->probe) status = driver->probe(client); else @@ -602,6 +605,7 @@ static int i2c_device_probe(struct device *dev) return 0; err_release_driver_resources: + debugfs_remove_recursive(client->debugfs); devres_release_group(&client->dev, client->devres_group_id); err_detach_pm_domain: dev_pm_domain_detach(&client->dev, do_power_on); @@ -627,6 +631,8 @@ static void i2c_device_remove(struct device *dev) driver->remove(client); } + debugfs_remove_recursive(client->debugfs); + devres_release_group(&client->dev, client->devres_group_id); dev_pm_domain_detach(&client->dev, true); @@ -1015,8 +1021,6 @@ i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *inf if (status) goto out_remove_swnode; - client->debugfs = debugfs_create_dir(dev_name(&client->dev), adap->debugfs); - dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n", client->name, dev_name(&client->dev)); @@ -1061,7 +1065,6 @@ void i2c_unregister_device(struct i2c_client *client) if (ACPI_COMPANION(&client->dev)) acpi_device_clear_enumerated(ACPI_COMPANION(&client->dev)); - debugfs_remove_recursive(client->debugfs); device_remove_software_node(&client->dev); device_unregister(&client->dev); } diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index be063bfb50c4b..c11b9965c4ad9 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -169,6 +169,7 @@ config IXP4XX_IRQ config LAN966X_OIC tristate "Microchip LAN966x OIC Support" + depends on MCHP_LAN966X_PCI || COMPILE_TEST select GENERIC_IRQ_CHIP select IRQ_DOMAIN help diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index da5250f0155cf..2b1684c60e3ca 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -577,7 +577,8 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs) AIC_FIQ_HWIRQ(AIC_TMR_EL02_VIRT)); } - if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) { + if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) == + (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) { int irq; if (cpumask_test_cpu(smp_processor_id(), &aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff)) diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c index b337f6c05f184..4eebed39880a5 100644 --- a/drivers/irqchip/irq-mvebu-icu.c +++ b/drivers/irqchip/irq-mvebu-icu.c @@ -68,7 +68,8 @@ static int mvebu_icu_translate(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *hwirq, unsigned int *type) { unsigned int param_count = static_branch_unlikely(&legacy_bindings) ? 3 : 2; - struct mvebu_icu_msi_data *msi_data = d->host_data; + struct msi_domain_info *info = d->host_data; + struct mvebu_icu_msi_data *msi_data = info->chip_data; struct mvebu_icu *icu = msi_data->icu; /* Check the count of the parameters in dt */ diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c index c5c2e6929a2f5..275df50057057 100644 --- a/drivers/irqchip/irq-riscv-imsic-early.c +++ b/drivers/irqchip/irq-riscv-imsic-early.c @@ -27,7 +27,7 @@ static void imsic_ipi_send(unsigned int cpu) { struct imsic_local_config *local = per_cpu_ptr(imsic->global.local, cpu); - writel_relaxed(IMSIC_IPI_ID, local->msi_va); + writel(IMSIC_IPI_ID, local->msi_va); } static void imsic_ipi_starting_cpu(void) diff --git a/drivers/irqchip/irq-thead-c900-aclint-sswi.c b/drivers/irqchip/irq-thead-c900-aclint-sswi.c index b0e366ade4271..8ff6e7a1363bd 100644 --- a/drivers/irqchip/irq-thead-c900-aclint-sswi.c +++ b/drivers/irqchip/irq-thead-c900-aclint-sswi.c @@ -31,7 +31,7 @@ static DEFINE_PER_CPU(void __iomem *, sswi_cpu_regs); static void thead_aclint_sswi_ipi_send(unsigned int cpu) { - writel_relaxed(0x1, per_cpu(sswi_cpu_regs, cpu)); + writel(0x1, per_cpu(sswi_cpu_regs, cpu)); } static void thead_aclint_sswi_ipi_clear(void) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index ec4ecd96e6b14..23c09d22fcdbc 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -2355,7 +2355,10 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats) if (!bitmap) return -ENOENT; - + if (bitmap->mddev->bitmap_info.external) + return -ENOENT; + if (!bitmap->storage.sb_page) /* no superblock */ + return -EINVAL; sb = kmap_local_page(bitmap->storage.sb_page); stats->sync_size = le64_to_cpu(sb->sync_size); kunmap_local(sb); diff --git a/drivers/md/md.c b/drivers/md/md.c index 22f7bd3b94d58..30b3dbbce2d2d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8376,6 +8376,10 @@ static int md_seq_show(struct seq_file *seq, void *v) return 0; spin_unlock(&all_mddevs_lock); + + /* prevent bitmap to be freed after checking */ + mutex_lock(&mddev->bitmap_info.mutex); + spin_lock(&mddev->lock); if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) { seq_printf(seq, "%s : ", mdname(mddev)); @@ -8451,6 +8455,7 @@ static int md_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "\n"); } spin_unlock(&mddev->lock); + mutex_unlock(&mddev->bitmap_info.mutex); spin_lock(&all_mddevs_lock); if (mddev == list_last_entry(&all_mddevs, struct mddev, all_mddevs)) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index a10d4f4d9f95f..deadbcea5e227 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -790,27 +790,14 @@ static const u8 uvc_media_transport_input_guid[16] = UVC_GUID_UVC_MEDIA_TRANSPORT_INPUT; static const u8 uvc_processing_guid[16] = UVC_GUID_UVC_PROCESSING; -static struct uvc_entity *uvc_alloc_new_entity(struct uvc_device *dev, u16 type, - u16 id, unsigned int num_pads, - unsigned int extra_size) +static struct uvc_entity *uvc_alloc_entity(u16 type, u16 id, + unsigned int num_pads, unsigned int extra_size) { struct uvc_entity *entity; unsigned int num_inputs; unsigned int size; unsigned int i; - /* Per UVC 1.1+ spec 3.7.2, the ID should be non-zero. */ - if (id == 0) { - dev_err(&dev->udev->dev, "Found Unit with invalid ID 0.\n"); - return ERR_PTR(-EINVAL); - } - - /* Per UVC 1.1+ spec 3.7.2, the ID is unique. */ - if (uvc_entity_by_id(dev, id)) { - dev_err(&dev->udev->dev, "Found multiple Units with ID %u\n", id); - return ERR_PTR(-EINVAL); - } - extra_size = roundup(extra_size, sizeof(*entity->pads)); if (num_pads) num_inputs = type & UVC_TERM_OUTPUT ? num_pads : num_pads - 1; @@ -820,7 +807,7 @@ static struct uvc_entity *uvc_alloc_new_entity(struct uvc_device *dev, u16 type, + num_inputs; entity = kzalloc(size, GFP_KERNEL); if (entity == NULL) - return ERR_PTR(-ENOMEM); + return NULL; entity->id = id; entity->type = type; @@ -932,10 +919,10 @@ static int uvc_parse_vendor_control(struct uvc_device *dev, break; } - unit = uvc_alloc_new_entity(dev, UVC_VC_EXTENSION_UNIT, - buffer[3], p + 1, 2 * n); - if (IS_ERR(unit)) - return PTR_ERR(unit); + unit = uvc_alloc_entity(UVC_VC_EXTENSION_UNIT, buffer[3], + p + 1, 2*n); + if (unit == NULL) + return -ENOMEM; memcpy(unit->guid, &buffer[4], 16); unit->extension.bNumControls = buffer[20]; @@ -1044,10 +1031,10 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - term = uvc_alloc_new_entity(dev, type | UVC_TERM_INPUT, - buffer[3], 1, n + p); - if (IS_ERR(term)) - return PTR_ERR(term); + term = uvc_alloc_entity(type | UVC_TERM_INPUT, buffer[3], + 1, n + p); + if (term == NULL) + return -ENOMEM; if (UVC_ENTITY_TYPE(term) == UVC_ITT_CAMERA) { term->camera.bControlSize = n; @@ -1103,10 +1090,10 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return 0; } - term = uvc_alloc_new_entity(dev, type | UVC_TERM_OUTPUT, - buffer[3], 1, 0); - if (IS_ERR(term)) - return PTR_ERR(term); + term = uvc_alloc_entity(type | UVC_TERM_OUTPUT, buffer[3], + 1, 0); + if (term == NULL) + return -ENOMEM; memcpy(term->baSourceID, &buffer[7], 1); @@ -1125,10 +1112,9 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], - p + 1, 0); - if (IS_ERR(unit)) - return PTR_ERR(unit); + unit = uvc_alloc_entity(buffer[2], buffer[3], p + 1, 0); + if (unit == NULL) + return -ENOMEM; memcpy(unit->baSourceID, &buffer[5], p); @@ -1148,9 +1134,9 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], 2, n); - if (IS_ERR(unit)) - return PTR_ERR(unit); + unit = uvc_alloc_entity(buffer[2], buffer[3], 2, n); + if (unit == NULL) + return -ENOMEM; memcpy(unit->baSourceID, &buffer[4], 1); unit->processing.wMaxMultiplier = @@ -1177,10 +1163,9 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], - p + 1, n); - if (IS_ERR(unit)) - return PTR_ERR(unit); + unit = uvc_alloc_entity(buffer[2], buffer[3], p + 1, n); + if (unit == NULL) + return -ENOMEM; memcpy(unit->guid, &buffer[4], 16); unit->extension.bNumControls = buffer[20]; @@ -1320,10 +1305,9 @@ static int uvc_gpio_parse(struct uvc_device *dev) return dev_err_probe(&dev->intf->dev, irq, "No IRQ for privacy GPIO\n"); - unit = uvc_alloc_new_entity(dev, UVC_EXT_GPIO_UNIT, - UVC_EXT_GPIO_UNIT_ID, 0, 1); - if (IS_ERR(unit)) - return PTR_ERR(unit); + unit = uvc_alloc_entity(UVC_EXT_GPIO_UNIT, UVC_EXT_GPIO_UNIT_ID, 0, 1); + if (!unit) + return -ENOMEM; unit->gpio.gpio_privacy = gpio_privacy; unit->gpio.irq = irq; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index ee7e1d9089861..847c11542f024 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -404,6 +404,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old) { unsigned long flags; + unsigned int memflags; lockdep_assert_held(&mtd_table_mutex); @@ -420,10 +421,10 @@ int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old) spin_unlock_irqrestore(&old->queue_lock, flags); /* freeze+quiesce queue to ensure all requests are flushed */ - blk_mq_freeze_queue(old->rq); + memflags = blk_mq_freeze_queue(old->rq); blk_mq_quiesce_queue(old->rq); blk_mq_unquiesce_queue(old->rq); - blk_mq_unfreeze_queue(old->rq); + blk_mq_unfreeze_queue(old->rq, memflags); /* If the device is currently open, tell trans driver to close it, then put mtd device, and don't touch it again */ diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 30be4ed68fad2..ef6a22f372f95 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -1537,7 +1537,7 @@ static int ubi_mtd_param_parse(const char *val, const struct kernel_param *kp) if (token) { int err = kstrtoint(token, 10, &p->ubi_num); - if (err) { + if (err || p->ubi_num < UBI_DEV_NUM_AUTO) { pr_err("UBI error: bad value for ubi_num parameter: %s\n", token); return -EINVAL; diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 6bb80d7714bc8..b700a0efaa931 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -828,6 +828,70 @@ static int rename_volumes(struct ubi_device *ubi, return err; } +static int ubi_get_ec_info(struct ubi_device *ubi, struct ubi_ecinfo_req __user *ureq) +{ + struct ubi_ecinfo_req req; + struct ubi_wl_entry *wl; + int read_cnt; + int peb; + int end_peb; + + /* Copy the input arguments */ + if (copy_from_user(&req, ureq, sizeof(struct ubi_ecinfo_req))) + return -EFAULT; + + /* Check input arguments */ + if (req.length <= 0 || req.start < 0 || req.start >= ubi->peb_count) + return -EINVAL; + + if (check_add_overflow(req.start, req.length, &end_peb)) + return -EINVAL; + + if (end_peb > ubi->peb_count) + end_peb = ubi->peb_count; + + /* Check access rights before filling erase_counters array */ + if (!access_ok((void __user *)ureq->erase_counters, + (end_peb-req.start) * sizeof(int32_t))) + return -EFAULT; + + /* Fill erase counter array */ + read_cnt = 0; + for (peb = req.start; peb < end_peb; read_cnt++, peb++) { + int ec; + + if (ubi_io_is_bad(ubi, peb)) { + if (__put_user(UBI_UNKNOWN, ureq->erase_counters+read_cnt)) + return -EFAULT; + + continue; + } + + spin_lock(&ubi->wl_lock); + + wl = ubi->lookuptbl[peb]; + if (wl) + ec = wl->ec; + else + ec = UBI_UNKNOWN; + + spin_unlock(&ubi->wl_lock); + + if (__put_user(ec, ureq->erase_counters+read_cnt)) + return -EFAULT; + + } + + /* Return actual read length */ + req.read_length = read_cnt; + + /* Copy everything except erase counter array */ + if (copy_to_user(ureq, &req, sizeof(struct ubi_ecinfo_req))) + return -EFAULT; + + return 0; +} + static long ubi_cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -991,6 +1055,12 @@ static long ubi_cdev_ioctl(struct file *file, unsigned int cmd, break; } + case UBI_IOCECNFO: + { + err = ubi_get_ec_info(ubi, argp); + break; + } + default: err = -ENOTTY; break; diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 26cc53ad34ec7..c792b9bcab9bc 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -549,7 +549,6 @@ struct ubi_debug_info { * @peb_buf: a buffer of PEB size used for different purposes * @buf_mutex: protects @peb_buf * @ckvol_mutex: serializes static volume checking when opening - * @wl_reboot_notifier: close all wear-leveling work before reboot * * @dbg: debugging information for this UBI device */ @@ -652,7 +651,6 @@ struct ubi_device { void *peb_buf; struct mutex buf_mutex; struct mutex ckvol_mutex; - struct notifier_block wl_reboot_notifier; struct ubi_debug_info dbg; }; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 4f6f339d8fb8a..fbd399cf65033 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -89,7 +89,6 @@ #include #include #include -#include #include "ubi.h" #include "wl.h" @@ -128,8 +127,6 @@ static int self_check_in_wl_tree(const struct ubi_device *ubi, struct ubi_wl_entry *e, struct rb_root *root); static int self_check_in_pq(const struct ubi_device *ubi, struct ubi_wl_entry *e); -static int ubi_wl_reboot_notifier(struct notifier_block *n, - unsigned long state, void *cmd); /** * wl_tree_add - add a wear-leveling entry to a WL RB-tree. @@ -1953,13 +1950,6 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) if (!ubi->ro_mode && !ubi->fm_disabled) ubi_ensure_anchor_pebs(ubi); #endif - - if (!ubi->wl_reboot_notifier.notifier_call) { - ubi->wl_reboot_notifier.notifier_call = ubi_wl_reboot_notifier; - ubi->wl_reboot_notifier.priority = 1; /* Higher than MTD */ - register_reboot_notifier(&ubi->wl_reboot_notifier); - } - return 0; out_free: @@ -2005,17 +1995,6 @@ void ubi_wl_close(struct ubi_device *ubi) kfree(ubi->lookuptbl); } -static int ubi_wl_reboot_notifier(struct notifier_block *n, - unsigned long state, void *cmd) -{ - struct ubi_device *ubi; - - ubi = container_of(n, struct ubi_device, wl_reboot_notifier); - ubi_wl_close(ubi); - - return NOTIFY_DONE; -} - /** * self_check_ec - make sure that the erase counter of a PEB is correct. * @ubi: UBI device description object diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index fe0e3e2a81171..71e50fc65c147 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -1441,7 +1441,9 @@ void aq_nic_deinit(struct aq_nic_s *self, bool link_down) aq_ptp_ring_free(self); aq_ptp_free(self); - if (likely(self->aq_fw_ops->deinit) && link_down) { + /* May be invoked during hot unplug. */ + if (pci_device_is_present(self->pdev) && + likely(self->aq_fw_ops->deinit) && link_down) { mutex_lock(&self->fwreq_mutex); self->aq_fw_ops->deinit(self->aq_hw); mutex_unlock(&self->fwreq_mutex); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 0715ea5bf13ed..3b082114f2e53 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -41,9 +41,12 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; + u32 phy_wolopts = 0; - if (dev->phydev) + if (dev->phydev) { phy_ethtool_get_wol(dev->phydev, wol); + phy_wolopts = wol->wolopts; + } /* MAC is not wake-up capable, return what the PHY does */ if (!device_can_wakeup(kdev)) @@ -51,9 +54,14 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) /* Overlay MAC capabilities with that of the PHY queried before */ wol->supported |= WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; - wol->wolopts = priv->wolopts; - memset(wol->sopass, 0, sizeof(wol->sopass)); + wol->wolopts |= priv->wolopts; + /* Return the PHY configured magic password */ + if (phy_wolopts & WAKE_MAGICSECURE) + return; + + /* Otherwise the MAC one */ + memset(wol->sopass, 0, sizeof(wol->sopass)); if (wol->wolopts & WAKE_MAGICSECURE) memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass)); } @@ -70,7 +78,7 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) /* Try Wake-on-LAN from the PHY first */ if (dev->phydev) { ret = phy_ethtool_set_wol(dev->phydev, wol); - if (ret != -EOPNOTSUPP) + if (ret != -EOPNOTSUPP && wol->wolopts) return ret; } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 1c94bf1db7186..d9d675f1ebfe9 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -18212,6 +18213,50 @@ static int tg3_resume(struct device *device) static SIMPLE_DEV_PM_OPS(tg3_pm_ops, tg3_suspend, tg3_resume); +/* Systems where ACPI _PTS (Prepare To Sleep) S5 will result in a fatal + * PCIe AER event on the tg3 device if the tg3 device is not, or cannot + * be, powered down. + */ +static const struct dmi_system_id tg3_restart_aer_quirk_table[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R440"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R540"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R640"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R650"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R740"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R750"), + }, + }, + {} +}; + static void tg3_shutdown(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); @@ -18228,6 +18273,19 @@ static void tg3_shutdown(struct pci_dev *pdev) if (system_state == SYSTEM_POWER_OFF) tg3_power_down(tp); + else if (system_state == SYSTEM_RESTART && + dmi_first_match(tg3_restart_aer_quirk_table) && + pdev->current_state != PCI_D3cold && + pdev->current_state != PCI_UNKNOWN) { + /* Disable PCIe AER on the tg3 to avoid a fatal + * error during this system restart. + */ + pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_CERE | + PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE); + } rtnl_unlock(); diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index d116e2b10bcea..dbdb83567364c 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -981,6 +981,9 @@ static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv /* preallocate memory for ice_sched_node */ node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + *priv = node; return 0; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 5d2d7736fd5f1..9c9ea4c1b93b7 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -527,15 +527,14 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action - * @rx_buf: Rx buffer to store the XDP action * @eop_desc: Last descriptor in packet to read metadata from * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ -static void +static u32 ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, - struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc) + union ice_32b_rx_flex_desc *eop_desc) { unsigned int ret = ICE_XDP_PASS; u32 act; @@ -574,7 +573,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, ret = ICE_XDP_CONSUMED; } exit: - ice_set_rx_bufs_act(xdp, rx_ring, ret); + return ret; } /** @@ -860,10 +859,8 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, xdp_buff_set_frags_flag(xdp); } - if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { - ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED); + if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) return -ENOMEM; - } __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, rx_buf->page_offset, size); @@ -924,7 +921,6 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[ntc]; - rx_buf->pgcnt = page_count(rx_buf->page); prefetchw(rx_buf->page); if (!size) @@ -940,6 +936,31 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, return rx_buf; } +/** + * ice_get_pgcnts - grab page_count() for gathered fragments + * @rx_ring: Rx descriptor ring to store the page counts on + * + * This function is intended to be called right before running XDP + * program so that the page recycling mechanism will be able to take + * a correct decision regarding underlying pages; this is done in such + * way as XDP program can change the refcount of page + */ +static void ice_get_pgcnts(struct ice_rx_ring *rx_ring) +{ + u32 nr_frags = rx_ring->nr_frags + 1; + u32 idx = rx_ring->first_desc; + struct ice_rx_buf *rx_buf; + u32 cnt = rx_ring->count; + + for (int i = 0; i < nr_frags; i++) { + rx_buf = &rx_ring->rx_buf[idx]; + rx_buf->pgcnt = page_count(rx_buf->page); + + if (++idx == cnt) + idx = 0; + } +} + /** * ice_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on @@ -1051,12 +1072,12 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) rx_buf->page_offset + headlen, size, xdp->frame_sz); } else { - /* buffer is unused, change the act that should be taken later - * on; data was copied onto skb's linear part so there's no + /* buffer is unused, restore biased page count in Rx buffer; + * data was copied onto skb's linear part so there's no * need for adjusting page offset and we can reuse this buffer * as-is */ - rx_buf->act = ICE_SKB_CONSUMED; + rx_buf->pagecnt_bias++; } if (unlikely(xdp_buff_has_frags(xdp))) { @@ -1103,6 +1124,65 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) rx_buf->page = NULL; } +/** + * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all frame frags + * @rx_ring: Rx ring with all the auxiliary data + * @xdp: XDP buffer carrying linear + frags part + * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage + * @ntc: a current next_to_clean value to be stored at rx_ring + * @verdict: return code from XDP program execution + * + * Walk through gathered fragments and satisfy internal page + * recycle mechanism; we take here an action related to verdict + * returned by XDP program; + */ +static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, + u32 *xdp_xmit, u32 ntc, u32 verdict) +{ + u32 nr_frags = rx_ring->nr_frags + 1; + u32 idx = rx_ring->first_desc; + u32 cnt = rx_ring->count; + u32 post_xdp_frags = 1; + struct ice_rx_buf *buf; + int i; + + if (unlikely(xdp_buff_has_frags(xdp))) + post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags; + + for (i = 0; i < post_xdp_frags; i++) { + buf = &rx_ring->rx_buf[idx]; + + if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) { + ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); + *xdp_xmit |= verdict; + } else if (verdict & ICE_XDP_CONSUMED) { + buf->pagecnt_bias++; + } else if (verdict == ICE_XDP_PASS) { + ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); + } + + ice_put_rx_buf(rx_ring, buf); + + if (++idx == cnt) + idx = 0; + } + /* handle buffers that represented frags released by XDP prog; + * for these we keep pagecnt_bias as-is; refcount from struct page + * has been decremented within XDP prog and we do not have to increase + * the biased refcnt + */ + for (; i < nr_frags; i++) { + buf = &rx_ring->rx_buf[idx]; + ice_put_rx_buf(rx_ring, buf); + if (++idx == cnt) + idx = 0; + } + + xdp->data = NULL; + rx_ring->first_desc = ntc; + rx_ring->nr_frags = 0; +} + /** * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: Rx descriptor ring to transact packets on @@ -1120,15 +1200,13 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_pkts = 0; unsigned int offset = rx_ring->rx_offset; struct xdp_buff *xdp = &rx_ring->xdp; - u32 cached_ntc = rx_ring->first_desc; struct ice_tx_ring *xdp_ring = NULL; struct bpf_prog *xdp_prog = NULL; u32 ntc = rx_ring->next_to_clean; + u32 cached_ntu, xdp_verdict; u32 cnt = rx_ring->count; u32 xdp_xmit = 0; - u32 cached_ntu; bool failure; - u32 first; xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (xdp_prog) { @@ -1190,6 +1268,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { + ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED); break; } if (++ntc == cnt) @@ -1199,15 +1278,15 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) if (ice_is_non_eop(rx_ring, rx_desc)) continue; - ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc); - if (rx_buf->act == ICE_XDP_PASS) + ice_get_pgcnts(rx_ring); + xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc); + if (xdp_verdict == ICE_XDP_PASS) goto construct_skb; total_rx_bytes += xdp_get_buff_len(xdp); total_rx_pkts++; - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; + ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); + continue; construct_skb: if (likely(ice_ring_uses_build_skb(rx_ring))) @@ -1217,18 +1296,12 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->ring_stats->rx_stats.alloc_page_failed++; - rx_buf->act = ICE_XDP_CONSUMED; - if (unlikely(xdp_buff_has_frags(xdp))) - ice_set_rx_bufs_act(xdp, rx_ring, - ICE_XDP_CONSUMED); - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; - break; + xdp_verdict = ICE_XDP_CONSUMED; } - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; + ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); + + if (!skb) + break; stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); if (unlikely(ice_test_staterr(rx_desc->wb.status_error0, @@ -1257,23 +1330,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) total_rx_pkts++; } - first = rx_ring->first_desc; - while (cached_ntc != first) { - struct ice_rx_buf *buf = &rx_ring->rx_buf[cached_ntc]; - - if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) { - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - xdp_xmit |= buf->act; - } else if (buf->act & ICE_XDP_CONSUMED) { - buf->pagecnt_bias++; - } else if (buf->act == ICE_XDP_PASS) { - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - } - - ice_put_rx_buf(rx_ring, buf); - if (++cached_ntc >= cnt) - cached_ntc = 0; - } rx_ring->next_to_clean = ntc; /* return up to cleaned_count buffers to hardware */ failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring)); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index cb347c852ba9e..806bce701df34 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -201,7 +201,6 @@ struct ice_rx_buf { struct page *page; unsigned int page_offset; unsigned int pgcnt; - unsigned int act; unsigned int pagecnt_bias; }; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 79f960c6680d1..6cf32b4041275 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -5,49 +5,6 @@ #define _ICE_TXRX_LIB_H_ #include "ice.h" -/** - * ice_set_rx_bufs_act - propagate Rx buffer action to frags - * @xdp: XDP buffer representing frame (linear and frags part) - * @rx_ring: Rx ring struct - * act: action to store onto Rx buffers related to XDP buffer parts - * - * Set action that should be taken before putting Rx buffer from first frag - * to the last. - */ -static inline void -ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring, - const unsigned int act) -{ - u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; - u32 nr_frags = rx_ring->nr_frags + 1; - u32 idx = rx_ring->first_desc; - u32 cnt = rx_ring->count; - struct ice_rx_buf *buf; - - for (int i = 0; i < nr_frags; i++) { - buf = &rx_ring->rx_buf[idx]; - buf->act = act; - - if (++idx == cnt) - idx = 0; - } - - /* adjust pagecnt_bias on frags freed by XDP prog */ - if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) { - u32 delta = rx_ring->nr_frags - sinfo_frags; - - while (delta) { - if (idx == 0) - idx = cnt - 1; - else - idx--; - buf = &rx_ring->rx_buf[idx]; - buf->pagecnt_bias--; - delta--; - } - } -} - /** * ice_test_staterr - tests bits in Rx descriptor status and error fields * @status_err_n: Rx descriptor status_error0 or status_error1 bits diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d04543e5697b0..b34ebb916b898 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2424,6 +2424,11 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) u32 chan = 0; u8 qmode = 0; + if (rxfifosz == 0) + rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + /* Split up the shared Tx/Rx FIFO memory on DW QoS Eth and DW XGMAC */ if (priv->plat->has_gmac4 || priv->plat->has_xgmac) { rxfifosz /= rx_channels_count; @@ -2892,6 +2897,11 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, int rxfifosz = priv->plat->rx_fifo_size; int txfifosz = priv->plat->tx_fifo_size; + if (rxfifosz == 0) + rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + /* Adjust for real per queue fifo size */ rxfifosz /= rx_channels_count; txfifosz /= tx_channels_count; @@ -5868,6 +5878,9 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) const int mtu = new_mtu; int ret; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + txfifosz /= priv->plat->tx_queues_to_use; if (stmmac_xdp_is_enabled(priv) && new_mtu > ETH_DATA_LEN) { @@ -7219,29 +7232,15 @@ static int stmmac_hw_init(struct stmmac_priv *priv) priv->plat->tx_queues_to_use = priv->dma_cap.number_tx_queues; } - if (!priv->plat->rx_fifo_size) { - if (priv->dma_cap.rx_fifo_size) { - priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size; - } else { - dev_err(priv->device, "Can't specify Rx FIFO size\n"); - return -ENODEV; - } - } else if (priv->dma_cap.rx_fifo_size && - priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) { + if (priv->dma_cap.rx_fifo_size && + priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) { dev_warn(priv->device, "Rx FIFO size (%u) exceeds dma capability\n", priv->plat->rx_fifo_size); priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size; } - if (!priv->plat->tx_fifo_size) { - if (priv->dma_cap.tx_fifo_size) { - priv->plat->tx_fifo_size = priv->dma_cap.tx_fifo_size; - } else { - dev_err(priv->device, "Can't specify Tx FIFO size\n"); - return -ENODEV; - } - } else if (priv->dma_cap.tx_fifo_size && - priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) { + if (priv->dma_cap.tx_fifo_size && + priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) { dev_warn(priv->device, "Tx FIFO size (%u) exceeds dma capability\n", priv->plat->tx_fifo_size); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 28624cca91f8d..acf96f2624887 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -574,18 +574,14 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, return ret; } -static inline bool tun_capable(struct tun_struct *tun) +static inline bool tun_not_capable(struct tun_struct *tun) { const struct cred *cred = current_cred(); struct net *net = dev_net(tun->dev); - if (ns_capable(net->user_ns, CAP_NET_ADMIN)) - return 1; - if (uid_valid(tun->owner) && uid_eq(cred->euid, tun->owner)) - return 1; - if (gid_valid(tun->group) && in_egroup_p(tun->group)) - return 1; - return 0; + return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || + (gid_valid(tun->group) && !in_egroup_p(tun->group))) && + !ns_capable(net->user_ns, CAP_NET_ADMIN); } static void tun_set_real_num_queues(struct tun_struct *tun) @@ -2782,7 +2778,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) !!(tun->flags & IFF_MULTI_QUEUE)) return -EINVAL; - if (!tun_capable(tun)) + if (tun_not_capable(tun)) return -EPERM; err = security_tun_dev_open(tun->security); if (err < 0) diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c index 1341374a4588a..616ecc38d1726 100644 --- a/drivers/net/vmxnet3/vmxnet3_xdp.c +++ b/drivers/net/vmxnet3/vmxnet3_xdp.c @@ -28,7 +28,7 @@ vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter) if (likely(cpu < tq_number)) tq = &adapter->tx_queue[cpu]; else - tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)]; + tq = &adapter->tx_queue[cpu % tq_number]; return tq; } @@ -124,6 +124,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, u32 buf_size; u32 dw2; + spin_lock_irq(&tq->tx_lock); dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; dw2 |= xdpf->len; ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill; @@ -134,6 +135,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) { tq->stats.tx_ring_full++; + spin_unlock_irq(&tq->tx_lock); return -ENOSPC; } @@ -142,8 +144,10 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, tbi->dma_addr = dma_map_single(&adapter->pdev->dev, xdpf->data, buf_size, DMA_TO_DEVICE); - if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) + if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) { + spin_unlock_irq(&tq->tx_lock); return -EFAULT; + } tbi->map_type |= VMXNET3_MAP_SINGLE; } else { /* XDP buffer from page pool */ page = virt_to_page(xdpf->data); @@ -182,6 +186,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, dma_wmb(); gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ VMXNET3_TXD_GEN); + spin_unlock_irq(&tq->tx_lock); /* No need to handle the case when tx_num_deferred doesn't reach * threshold. Backend driver at hypervisor side will poll and reset @@ -225,6 +230,7 @@ vmxnet3_xdp_xmit(struct net_device *dev, { struct vmxnet3_adapter *adapter = netdev_priv(dev); struct vmxnet3_tx_queue *tq; + struct netdev_queue *nq; int i; if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))) @@ -236,6 +242,9 @@ vmxnet3_xdp_xmit(struct net_device *dev, if (tq->stopped) return -ENETDOWN; + nq = netdev_get_tx_queue(adapter->netdev, tq->qid); + + __netif_tx_lock(nq, smp_processor_id()); for (i = 0; i < n; i++) { if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) { tq->stats.xdp_xmit_err++; @@ -243,6 +252,7 @@ vmxnet3_xdp_xmit(struct net_device *dev, } } tq->stats.xdp_xmit += i; + __netif_tx_unlock(nq); return i; } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 76b615d4d5b91..40046770f1bf0 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2132,15 +2132,16 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns, struct nvme_ns_info *info) { struct queue_limits lim; + unsigned int memflags; int ret; lim = queue_limits_start_update(ns->disk->queue); nvme_set_ctrl_limits(ns->ctrl, &lim); - blk_mq_freeze_queue(ns->disk->queue); + memflags = blk_mq_freeze_queue(ns->disk->queue); ret = queue_limits_commit_update(ns->disk->queue, &lim); set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); - blk_mq_unfreeze_queue(ns->disk->queue); + blk_mq_unfreeze_queue(ns->disk->queue, memflags); /* Hide the block-interface for these devices */ if (!ret) @@ -2155,6 +2156,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, struct nvme_id_ns_nvm *nvm = NULL; struct nvme_zone_info zi = {}; struct nvme_id_ns *id; + unsigned int memflags; sector_t capacity; unsigned lbaf; int ret; @@ -2186,7 +2188,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, lim = queue_limits_start_update(ns->disk->queue); - blk_mq_freeze_queue(ns->disk->queue); + memflags = blk_mq_freeze_queue(ns->disk->queue); ns->head->lba_shift = id->lbaf[lbaf].ds; ns->head->nuse = le64_to_cpu(id->nuse); capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze)); @@ -2219,7 +2221,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ret = queue_limits_commit_update(ns->disk->queue, &lim); if (ret) { - blk_mq_unfreeze_queue(ns->disk->queue); + blk_mq_unfreeze_queue(ns->disk->queue, memflags); goto out; } @@ -2235,7 +2237,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ns->head->features |= NVME_NS_DEAC; set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); set_bit(NVME_NS_READY, &ns->flags); - blk_mq_unfreeze_queue(ns->disk->queue); + blk_mq_unfreeze_queue(ns->disk->queue, memflags); if (blk_queue_is_zoned(ns->queue)) { ret = blk_revalidate_disk_zones(ns->disk); @@ -2291,9 +2293,10 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) if (!ret && nvme_ns_head_multipath(ns->head)) { struct queue_limits *ns_lim = &ns->disk->queue->limits; struct queue_limits lim; + unsigned int memflags; lim = queue_limits_start_update(ns->head->disk->queue); - blk_mq_freeze_queue(ns->head->disk->queue); + memflags = blk_mq_freeze_queue(ns->head->disk->queue); /* * queue_limits mixes values that are the hardware limitations * for bio splitting with what is the device configuration. @@ -2325,7 +2328,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); nvme_mpath_revalidate_paths(ns); - blk_mq_unfreeze_queue(ns->head->disk->queue); + blk_mq_unfreeze_queue(ns->head->disk->queue, memflags); } return ret; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index a85d190942bdf..2a76355650830 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -60,7 +60,7 @@ void nvme_mpath_unfreeze(struct nvme_subsystem *subsys) lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) if (h->disk) - blk_mq_unfreeze_queue(h->disk->queue); + blk_mq_unfreeze_queue_nomemrestore(h->disk->queue); } void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys) diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index d1d97a4bb36d3..3431a7df3e0d9 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -419,19 +419,12 @@ static void pcim_intx_restore(struct device *dev, void *data) pci_intx(pdev, res->orig_intx); } -static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev) +static void save_orig_intx(struct pci_dev *pdev, struct pcim_intx_devres *res) { - struct pcim_intx_devres *res; - - res = devres_find(dev, pcim_intx_restore, NULL, NULL); - if (res) - return res; + u16 pci_command; - res = devres_alloc(pcim_intx_restore, sizeof(*res), GFP_KERNEL); - if (res) - devres_add(dev, res); - - return res; + pci_read_config_word(pdev, PCI_COMMAND, &pci_command); + res->orig_intx = !(pci_command & PCI_COMMAND_INTX_DISABLE); } /** @@ -447,12 +440,23 @@ static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev) int pcim_intx(struct pci_dev *pdev, int enable) { struct pcim_intx_devres *res; + struct device *dev = &pdev->dev; - res = get_or_create_intx_devres(&pdev->dev); - if (!res) - return -ENOMEM; + /* + * pcim_intx() must only restore the INTx value that existed before the + * driver was loaded, i.e., before it called pcim_intx() for the + * first time. + */ + res = devres_find(dev, pcim_intx_restore, NULL, NULL); + if (!res) { + res = devres_alloc(pcim_intx_restore, sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + save_orig_intx(pdev, res); + devres_add(dev, res); + } - res->orig_intx = !enable; pci_intx(pdev, enable); return 0; diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index dfb5d4b8c0465..30bd366d7b58a 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1121,7 +1121,7 @@ static int ideapad_dytc_profile_init(struct ideapad_private *priv) /* Create platform_profile structure and register */ priv->dytc->ppdev = devm_platform_profile_register(&priv->platform_device->dev, - "ideapad-laptop", &priv->dytc, + "ideapad-laptop", priv->dytc, &dytc_profile_ops); if (IS_ERR(priv->dytc->ppdev)) { err = PTR_ERR(priv->dytc->ppdev); diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h index 5c3c0dfa1bf83..f369fb0d3d82f 100644 --- a/drivers/platform/x86/intel/ifs/ifs.h +++ b/drivers/platform/x86/intel/ifs/ifs.h @@ -23,12 +23,14 @@ * IFS Image * --------- * - * Intel provides a firmware file containing the scan tests via - * github [#f1]_. Similar to microcode there is a separate file for each + * Intel provides firmware files containing the scan tests via the webpage [#f1]_. + * Look under "In-Field Scan Test Images Download" section towards the + * end of the page. Similar to microcode, there are separate files for each * family-model-stepping. IFS Images are not applicable for some test types. * Wherever applicable the sysfs directory would provide a "current_batch" file * (see below) for loading the image. * + * .. [#f1] https://intel.com/InFieldScan * * IFS Image Loading * ----------------- @@ -125,9 +127,6 @@ * 2) Hardware allows for some number of cores to be tested in parallel. * The driver does not make use of this, it only tests one core at a time. * - * .. [#f1] https://github.com/intel/TBD - * - * * Structural Based Functional Test at Field (SBAF): * ------------------------------------------------- * diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 10f04b9441174..1ee0fb5f8250b 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -626,8 +626,8 @@ static u32 convert_ltr_scale(u32 val) static int pmc_core_ltr_show(struct seq_file *s, void *unused) { struct pmc_dev *pmcdev = s->private; - u64 decoded_snoop_ltr, decoded_non_snoop_ltr; - u32 ltr_raw_data, scale, val; + u64 decoded_snoop_ltr, decoded_non_snoop_ltr, val; + u32 ltr_raw_data, scale; u16 snoop_ltr, nonsnoop_ltr; unsigned int i, index, ltr_index = 0; diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index a60bcc791a480..0bbbf778ecfa3 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1316,7 +1316,7 @@ config RTC_DRV_SC27XX config RTC_DRV_SPEAR tristate "SPEAR ST RTC" depends on PLAT_SPEAR || COMPILE_TEST - default y + default PLAT_SPEAR help If you say Y here you will get support for the RTC found on spear diff --git a/drivers/rtc/rtc-88pm80x.c b/drivers/rtc/rtc-88pm80x.c index 5c39cf252392d..a3e52a5a708ff 100644 --- a/drivers/rtc/rtc-88pm80x.c +++ b/drivers/rtc/rtc-88pm80x.c @@ -308,7 +308,7 @@ static int pm80x_rtc_probe(struct platform_device *pdev) /* remember whether this power up is caused by PMIC RTC or not */ info->rtc_dev->dev.platform_data = &pdata->rtc_wakeup; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return 0; out_rtc: diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c index 814230d618427..964cd048fcdba 100644 --- a/drivers/rtc/rtc-88pm860x.c +++ b/drivers/rtc/rtc-88pm860x.c @@ -326,7 +326,7 @@ static int pm860x_rtc_probe(struct platform_device *pdev) schedule_delayed_work(&info->calib_work, VRTC_CALIB_INTERVAL); #endif /* VRTC_CALIBRATION */ - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return 0; } diff --git a/drivers/rtc/rtc-amlogic-a4.c b/drivers/rtc/rtc-amlogic-a4.c index 2278b4c98a711..09d78c2cc6918 100644 --- a/drivers/rtc/rtc-amlogic-a4.c +++ b/drivers/rtc/rtc-amlogic-a4.c @@ -361,7 +361,7 @@ static int aml_rtc_probe(struct platform_device *pdev) "failed to get_enable rtc sys clk\n"); aml_rtc_init(rtc); - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); platform_set_drvdata(pdev, rtc); rtc->rtc_dev = devm_rtc_allocate_device(dev); @@ -391,7 +391,7 @@ static int aml_rtc_probe(struct platform_device *pdev) return 0; err_clk: clk_disable_unprepare(rtc->sys_clk); - device_init_wakeup(dev, 0); + device_init_wakeup(dev, false); return ret; } @@ -426,7 +426,7 @@ static void aml_rtc_remove(struct platform_device *pdev) struct aml_rtc_data *rtc = dev_get_drvdata(&pdev->dev); clk_disable_unprepare(rtc->sys_clk); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); } static const struct aml_rtc_config a5_rtc_config = { diff --git a/drivers/rtc/rtc-armada38x.c b/drivers/rtc/rtc-armada38x.c index 569c1054d6b0b..713fa0d077cde 100644 --- a/drivers/rtc/rtc-armada38x.c +++ b/drivers/rtc/rtc-armada38x.c @@ -527,7 +527,7 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc); if (rtc->irq != -1) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); else clear_bit(RTC_FEATURE_ALARM, rtc->rtc_dev->features); diff --git a/drivers/rtc/rtc-as3722.c b/drivers/rtc/rtc-as3722.c index 0f21af27f4cfe..9682d6457b7fd 100644 --- a/drivers/rtc/rtc-as3722.c +++ b/drivers/rtc/rtc-as3722.c @@ -187,7 +187,7 @@ static int as3722_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); as3722_rtc->rtc = devm_rtc_device_register(&pdev->dev, "as3722-rtc", &as3722_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 9b3898b8de7cf..f6b0102a843ad 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -528,7 +528,7 @@ static int __init at91_rtc_probe(struct platform_device *pdev) * being wake-capable; if it didn't, do that here. */ if (!device_can_wakeup(&pdev->dev)) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); if (at91_rtc_config->has_correction) rtc->ops = &sama5d4_rtc_ops; diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c index 15b21da2788f6..38991cca59308 100644 --- a/drivers/rtc/rtc-at91sam9.c +++ b/drivers/rtc/rtc-at91sam9.c @@ -353,7 +353,7 @@ static int at91_rtc_probe(struct platform_device *pdev) /* platform setup code should have handled this; sigh */ if (!device_can_wakeup(&pdev->dev)) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); platform_set_drvdata(pdev, rtc); diff --git a/drivers/rtc/rtc-cadence.c b/drivers/rtc/rtc-cadence.c index bf2a9a1fdea74..8634eea799ab0 100644 --- a/drivers/rtc/rtc-cadence.c +++ b/drivers/rtc/rtc-cadence.c @@ -359,7 +359,7 @@ static void cdns_rtc_remove(struct platform_device *pdev) struct cdns_rtc *crtc = platform_get_drvdata(pdev); cdns_rtc_alarm_irq_enable(&pdev->dev, 0); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); clk_disable_unprepare(crtc->pclk); clk_disable_unprepare(crtc->ref_clk); diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 78f2ce12c75a7..8172869bd3d79 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -151,11 +151,6 @@ static inline int hpet_set_periodic_freq(unsigned long freq) return 0; } -static inline int hpet_rtc_dropped_irq(void) -{ - return 0; -} - static inline int hpet_rtc_timer_init(void) { return 0; @@ -864,7 +859,7 @@ static void acpi_cmos_wake_setup(struct device *dev) dev_info(dev, "RTC can wake from S4\n"); /* RTC always wakes from S1/S2/S3, and often S4/STD */ - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); } static void cmos_check_acpi_rtc_status(struct device *dev, diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c index afc8fcba8f888..568a89e79c114 100644 --- a/drivers/rtc/rtc-cpcap.c +++ b/drivers/rtc/rtc-cpcap.c @@ -295,7 +295,7 @@ static int cpcap_rtc_probe(struct platform_device *pdev) } disable_irq(rtc->update_irq); - err = device_init_wakeup(dev, 1); + err = device_init_wakeup(dev, true); if (err) { dev_err(dev, "wakeup initialization failed (%d)\n", err); /* ignore error and continue without wakeup support */ diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c index 60a48c3ba3ca5..865c2e82c7a5b 100644 --- a/drivers/rtc/rtc-cros-ec.c +++ b/drivers/rtc/rtc-cros-ec.c @@ -337,7 +337,7 @@ static int cros_ec_rtc_probe(struct platform_device *pdev) return ret; } - ret = device_init_wakeup(&pdev->dev, 1); + ret = device_init_wakeup(&pdev->dev, true); if (ret) { dev_err(&pdev->dev, "failed to initialize wakeup\n"); return ret; diff --git a/drivers/rtc/rtc-da9055.c b/drivers/rtc/rtc-da9055.c index 844168fcae1e2..05adec6b77bff 100644 --- a/drivers/rtc/rtc-da9055.c +++ b/drivers/rtc/rtc-da9055.c @@ -288,7 +288,7 @@ static int da9055_rtc_probe(struct platform_device *pdev) if (ret & DA9055_RTC_ALM_EN) rtc->alarm_enable = 1; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &da9055_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c index dd37b055693c0..19c09c4187462 100644 --- a/drivers/rtc/rtc-ds3232.c +++ b/drivers/rtc/rtc-ds3232.c @@ -508,7 +508,7 @@ static int ds3232_probe(struct device *dev, struct regmap *regmap, int irq, return ret; if (ds3232->irq > 0) - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); ds3232_hwmon_register(dev, name); diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c index 7b82e4a14b7a2..f71a6bb77b2a1 100644 --- a/drivers/rtc/rtc-isl1208.c +++ b/drivers/rtc/rtc-isl1208.c @@ -830,7 +830,7 @@ static int isl1208_setup_irq(struct i2c_client *client, int irq) isl1208_driver.driver.name, client); if (!rc) { - device_init_wakeup(&client->dev, 1); + device_init_wakeup(&client->dev, true); enable_irq_wake(irq); } else { dev_err(&client->dev, diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c index bafa7d1b9b883..44bba356268ca 100644 --- a/drivers/rtc/rtc-jz4740.c +++ b/drivers/rtc/rtc-jz4740.c @@ -367,7 +367,7 @@ static int jz4740_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc); - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); ret = dev_pm_set_wake_irq(dev, irq); if (ret) diff --git a/drivers/rtc/rtc-loongson.c b/drivers/rtc/rtc-loongson.c index 8d713e563d7c0..97e5625c064ce 100644 --- a/drivers/rtc/rtc-loongson.c +++ b/drivers/rtc/rtc-loongson.c @@ -114,6 +114,13 @@ static irqreturn_t loongson_rtc_isr(int irq, void *id) struct loongson_rtc_priv *priv = (struct loongson_rtc_priv *)id; rtc_update_irq(priv->rtcdev, 1, RTC_AF | RTC_IRQF); + + /* + * The TOY_MATCH0_REG should be cleared 0 here, + * otherwise the interrupt cannot be cleared. + */ + regmap_write(priv->regmap, TOY_MATCH0_REG, 0); + return IRQ_HANDLED; } @@ -131,11 +138,7 @@ static u32 loongson_rtc_handler(void *id) writel(RTC_STS, priv->pm_base + PM1_STS_REG); spin_unlock(&priv->lock); - /* - * The TOY_MATCH0_REG should be cleared 0 here, - * otherwise the interrupt cannot be cleared. - */ - return regmap_write(priv->regmap, TOY_MATCH0_REG, 0); + return ACPI_INTERRUPT_HANDLED; } static int loongson_rtc_set_enabled(struct device *dev) @@ -329,7 +332,7 @@ static int loongson_rtc_probe(struct platform_device *pdev) alarm_irq); priv->pm_base = regs - priv->config->pm_offset; - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); if (has_acpi_companion(dev)) acpi_install_fixed_event_handler(ACPI_EVENT_RTC, @@ -360,7 +363,7 @@ static void loongson_rtc_remove(struct platform_device *pdev) acpi_remove_fixed_event_handler(ACPI_EVENT_RTC, loongson_rtc_handler); - device_init_wakeup(dev, 0); + device_init_wakeup(dev, false); loongson_rtc_alarm_irq_enable(dev, 0); } diff --git a/drivers/rtc/rtc-lp8788.c b/drivers/rtc/rtc-lp8788.c index c0b8fbce10827..0793d70507f7c 100644 --- a/drivers/rtc/rtc-lp8788.c +++ b/drivers/rtc/rtc-lp8788.c @@ -293,7 +293,7 @@ static int lp8788_rtc_probe(struct platform_device *pdev) rtc->alarm = lp->pdata ? lp->pdata->alarm_sel : DEFAULT_ALARM_SEL; platform_set_drvdata(pdev, rtc); - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); rtc->rdev = devm_rtc_device_register(dev, "lp8788_rtc", &lp8788_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-lpc32xx.c b/drivers/rtc/rtc-lpc32xx.c index 76ad7031a13dd..74280bffe1b07 100644 --- a/drivers/rtc/rtc-lpc32xx.c +++ b/drivers/rtc/rtc-lpc32xx.c @@ -257,7 +257,7 @@ static int lpc32xx_rtc_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "Can't request interrupt.\n"); rtc->irq = -1; } else { - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); } } diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index a8f4b645c09d2..7bb044d2ac25a 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -770,7 +770,7 @@ static int max77686_rtc_probe(struct platform_device *pdev) goto err_rtc; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); info->rtc_dev = devm_rtc_device_register(&pdev->dev, id->name, &max77686_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-max8925.c b/drivers/rtc/rtc-max8925.c index 64bb8ac6ef62d..6ce8afbeac680 100644 --- a/drivers/rtc/rtc-max8925.c +++ b/drivers/rtc/rtc-max8925.c @@ -270,7 +270,7 @@ static int max8925_rtc_probe(struct platform_device *pdev) /* XXX - isn't this redundant? */ platform_set_drvdata(pdev, info); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); info->rtc_dev = devm_rtc_device_register(&pdev->dev, "max8925-rtc", &max8925_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-max8997.c b/drivers/rtc/rtc-max8997.c index 20e50d9fdf882..e7618d715bd89 100644 --- a/drivers/rtc/rtc-max8997.c +++ b/drivers/rtc/rtc-max8997.c @@ -473,7 +473,7 @@ static int max8997_rtc_probe(struct platform_device *pdev) max8997_rtc_enable_wtsr(info, true); max8997_rtc_enable_smpl(info, true); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); info->rtc_dev = devm_rtc_device_register(&pdev->dev, "max8997-rtc", &max8997_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-meson-vrtc.c b/drivers/rtc/rtc-meson-vrtc.c index 648fa362ec447..5849729f7d01d 100644 --- a/drivers/rtc/rtc-meson-vrtc.c +++ b/drivers/rtc/rtc-meson-vrtc.c @@ -74,7 +74,7 @@ static int meson_vrtc_probe(struct platform_device *pdev) if (IS_ERR(vrtc->io_alarm)) return PTR_ERR(vrtc->io_alarm); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); platform_set_drvdata(pdev, vrtc); diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c index 6003281316031..b90f8337a7e6d 100644 --- a/drivers/rtc/rtc-mpc5121.c +++ b/drivers/rtc/rtc-mpc5121.c @@ -303,7 +303,7 @@ static int mpc5121_rtc_probe(struct platform_device *op) return PTR_ERR(rtc->regs); } - device_init_wakeup(&op->dev, 1); + device_init_wakeup(&op->dev, true); platform_set_drvdata(op, rtc); diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c index 152699219a2b9..6979d225a78e4 100644 --- a/drivers/rtc/rtc-mt6397.c +++ b/drivers/rtc/rtc-mt6397.c @@ -286,7 +286,7 @@ static int mtk_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rtc->rtc_dev->ops = &mtk_rtc_ops; rtc->rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_1900; diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c index 51029c5362441..c27ad626d09fc 100644 --- a/drivers/rtc/rtc-mv.c +++ b/drivers/rtc/rtc-mv.c @@ -264,7 +264,7 @@ static int __init mv_rtc_probe(struct platform_device *pdev) } if (pdata->irq >= 0) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); else clear_bit(RTC_FEATURE_ALARM, pdata->rtc->features); @@ -287,7 +287,7 @@ static void __exit mv_rtc_remove(struct platform_device *pdev) struct rtc_plat_data *pdata = platform_get_drvdata(pdev); if (pdata->irq >= 0) - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); if (!IS_ERR(pdata->clk)) clk_disable_unprepare(pdata->clk); diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index dbb935dbbd8ab..608db97d450c5 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -377,7 +377,7 @@ static int mxc_rtc_probe(struct platform_device *pdev) } if (pdata->irq >= 0) { - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); ret = dev_pm_set_wake_irq(&pdev->dev, pdata->irq); if (ret) dev_err(&pdev->dev, "failed to enable irq wake\n"); diff --git a/drivers/rtc/rtc-mxc_v2.c b/drivers/rtc/rtc-mxc_v2.c index 13c041bb79f16..570f27af4732e 100644 --- a/drivers/rtc/rtc-mxc_v2.c +++ b/drivers/rtc/rtc-mxc_v2.c @@ -302,7 +302,7 @@ static int mxc_rtc_probe(struct platform_device *pdev) if (pdata->irq < 0) return pdata->irq; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); ret = dev_pm_set_wake_irq(&pdev->dev, pdata->irq); if (ret) dev_err(&pdev->dev, "failed to enable irq wake\n"); diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index c123778e2d9bc..0f90065e352cb 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -920,7 +920,7 @@ static void omap_rtc_remove(struct platform_device *pdev) omap_rtc_power_off_rtc = NULL; } - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); if (!IS_ERR(rtc->clk)) clk_disable_unprepare(rtc->clk); diff --git a/drivers/rtc/rtc-palmas.c b/drivers/rtc/rtc-palmas.c index 7256a88b490c9..aecada6bcf8b5 100644 --- a/drivers/rtc/rtc-palmas.c +++ b/drivers/rtc/rtc-palmas.c @@ -287,7 +287,7 @@ static int palmas_rtc_probe(struct platform_device *pdev) palmas_rtc->irq = platform_get_irq(pdev, 0); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); palmas_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &palmas_rtc_ops, THIS_MODULE); if (IS_ERR(palmas_rtc->rtc)) { diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 9c04c4e1a49c3..31c7dca8f4692 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ #define PCF2127_BIT_CTRL3_BLF BIT(2) #define PCF2127_BIT_CTRL3_BF BIT(3) #define PCF2127_BIT_CTRL3_BTSE BIT(4) +#define PCF2127_CTRL3_PM GENMASK(7, 5) /* Time and date registers */ #define PCF2127_REG_TIME_BASE 0x03 #define PCF2127_BIT_SC_OSF BIT(7) @@ -331,6 +333,84 @@ static int pcf2127_rtc_set_time(struct device *dev, struct rtc_time *tm) return 0; } +static int pcf2127_param_get(struct device *dev, struct rtc_param *param) +{ + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); + u32 value; + int ret; + + switch (param->param) { + case RTC_PARAM_BACKUP_SWITCH_MODE: + ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL3, &value); + if (ret < 0) + return ret; + + value = FIELD_GET(PCF2127_CTRL3_PM, value); + + if (value < 0x3) + param->uvalue = RTC_BSM_LEVEL; + else if (value < 0x6) + param->uvalue = RTC_BSM_DIRECT; + else + param->uvalue = RTC_BSM_DISABLED; + + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int pcf2127_param_set(struct device *dev, struct rtc_param *param) +{ + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); + u8 mode = 0; + u32 value; + int ret; + + switch (param->param) { + case RTC_PARAM_BACKUP_SWITCH_MODE: + ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL3, &value); + if (ret < 0) + return ret; + + value = FIELD_GET(PCF2127_CTRL3_PM, value); + + if (value > 5) + value -= 5; + else if (value > 2) + value -= 3; + + switch (param->uvalue) { + case RTC_BSM_LEVEL: + break; + case RTC_BSM_DIRECT: + mode = 3; + break; + case RTC_BSM_DISABLED: + if (value == 0) + value = 1; + mode = 5; + break; + default: + return -EINVAL; + } + + return regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL3, + PCF2127_CTRL3_PM, + FIELD_PREP(PCF2127_CTRL3_PM, mode + value)); + + break; + + default: + return -EINVAL; + } + + return 0; +} + static int pcf2127_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) { @@ -741,6 +821,8 @@ static const struct rtc_class_ops pcf2127_rtc_ops = { .read_alarm = pcf2127_rtc_read_alarm, .set_alarm = pcf2127_rtc_set_alarm, .alarm_irq_enable = pcf2127_rtc_alarm_irq_enable, + .param_get = pcf2127_param_get, + .param_set = pcf2127_param_set, }; /* sysfs interface */ diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index fdbc07f14036a..905986c616559 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -322,7 +322,16 @@ static const struct rtc_class_ops pcf85063_rtc_ops = { static int pcf85063_nvmem_read(void *priv, unsigned int offset, void *val, size_t bytes) { - return regmap_read(priv, PCF85063_REG_RAM, val); + unsigned int tmp; + int ret; + + ret = regmap_read(priv, PCF85063_REG_RAM, &tmp); + if (ret < 0) + return ret; + + *(u8 *)val = tmp; + + return 0; } static int pcf85063_nvmem_write(void *priv, unsigned int offset, diff --git a/drivers/rtc/rtc-pic32.c b/drivers/rtc/rtc-pic32.c index bed3c27e665f3..2812da2c50c51 100644 --- a/drivers/rtc/rtc-pic32.c +++ b/drivers/rtc/rtc-pic32.c @@ -330,7 +330,7 @@ static int pic32_rtc_probe(struct platform_device *pdev) pic32_rtc_enable(pdata, 1); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); pdata->rtc->ops = &pic32_rtcops; pdata->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index 2f32187ecc8d3..b2518aea4218f 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -503,7 +503,7 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc_dd); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rtc_dd->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(rtc_dd->rtc)) diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c index 34d8545c8e155..62ee6b8f9bcd6 100644 --- a/drivers/rtc/rtc-pxa.c +++ b/drivers/rtc/rtc-pxa.c @@ -360,7 +360,7 @@ static int __init pxa_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); return 0; } diff --git a/drivers/rtc/rtc-rc5t583.c b/drivers/rtc/rtc-rc5t583.c index eecb49bab56ad..8ba9cda74acf1 100644 --- a/drivers/rtc/rtc-rc5t583.c +++ b/drivers/rtc/rtc-rc5t583.c @@ -245,7 +245,7 @@ static int rc5t583_rtc_probe(struct platform_device *pdev) dev_err(&pdev->dev, "IRQ is not free.\n"); return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); ricoh_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &rc5t583_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-rc5t619.c b/drivers/rtc/rtc-rc5t619.c index 711f62eecd798..74d1691020741 100644 --- a/drivers/rtc/rtc-rc5t619.c +++ b/drivers/rtc/rtc-rc5t619.c @@ -414,7 +414,7 @@ static int rc5t619_rtc_probe(struct platform_device *pdev) } else { /* enable wake */ - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); enable_irq_wake(rtc->irq); } } else { diff --git a/drivers/rtc/rtc-renesas-rtca3.c b/drivers/rtc/rtc-renesas-rtca3.c index d127933bfc8ad..a056291d38876 100644 --- a/drivers/rtc/rtc-renesas-rtca3.c +++ b/drivers/rtc/rtc-renesas-rtca3.c @@ -768,7 +768,7 @@ static int rtca3_probe(struct platform_device *pdev) if (ret) return ret; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); priv->rtc_dev = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(priv->rtc_dev)) diff --git a/drivers/rtc/rtc-rk808.c b/drivers/rtc/rtc-rk808.c index 2d9bcb3ce1e3b..59b8e9a30fe67 100644 --- a/drivers/rtc/rtc-rk808.c +++ b/drivers/rtc/rtc-rk808.c @@ -418,7 +418,7 @@ static int rk808_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rk808_rtc->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(rk808_rtc->rtc)) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index c0ac3bdb2f427..58c957eb753d8 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -456,7 +456,7 @@ static int s3c_rtc_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "s3c2410_rtc: RTCCON=%02x\n", readw(info->base + S3C2410_RTCCON)); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); info->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(info->rtc)) { diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c index dad294a0ce2aa..36acca5b2639e 100644 --- a/drivers/rtc/rtc-s5m.c +++ b/drivers/rtc/rtc-s5m.c @@ -729,7 +729,7 @@ static int s5m_rtc_probe(struct platform_device *pdev) info->irq, ret); return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); } return devm_rtc_register_device(info->rtc_dev); diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c index 13799b1abca1a..1ad93648d69c0 100644 --- a/drivers/rtc/rtc-sa1100.c +++ b/drivers/rtc/rtc-sa1100.c @@ -292,7 +292,7 @@ static int sa1100_rtc_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, info); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return sa1100_rtc_init(pdev, info); } diff --git a/drivers/rtc/rtc-sc27xx.c b/drivers/rtc/rtc-sc27xx.c index ce7a2ddbbc16b..2b83561d4d280 100644 --- a/drivers/rtc/rtc-sc27xx.c +++ b/drivers/rtc/rtc-sc27xx.c @@ -613,14 +613,14 @@ static int sprd_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rtc->rtc->ops = &sprd_rtc_ops; rtc->rtc->range_min = 0; rtc->rtc->range_max = 5662310399LL; ret = devm_rtc_register_device(rtc->rtc); if (ret) { - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); return ret; } diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index a5df521876ba0..9ea40f40188f3 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -611,7 +611,7 @@ static int __init sh_rtc_probe(struct platform_device *pdev) if (ret) goto err_unmap; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return 0; err_unmap: diff --git a/drivers/rtc/rtc-spear.c b/drivers/rtc/rtc-spear.c index 26eed927f8b31..959acff8faff0 100644 --- a/drivers/rtc/rtc-spear.c +++ b/drivers/rtc/rtc-spear.c @@ -395,7 +395,7 @@ static int spear_rtc_probe(struct platform_device *pdev) goto err_disable_clock; if (!device_can_wakeup(&pdev->dev)) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return 0; @@ -411,7 +411,7 @@ static void spear_rtc_remove(struct platform_device *pdev) spear_rtc_disable_interrupt(config); clk_disable_unprepare(config->clk); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/rtc/rtc-stm32.c b/drivers/rtc/rtc-stm32.c index 9f1a019ec8afa..a0564d4435690 100644 --- a/drivers/rtc/rtc-stm32.c +++ b/drivers/rtc/rtc-stm32.c @@ -1074,26 +1074,18 @@ static int stm32_rtc_probe(struct platform_device *pdev) regs = &rtc->data->regs; if (rtc->data->need_dbp) { - rtc->dbp = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, - "st,syscfg"); + unsigned int args[2]; + + rtc->dbp = syscon_regmap_lookup_by_phandle_args(pdev->dev.of_node, + "st,syscfg", + 2, args); if (IS_ERR(rtc->dbp)) { dev_err(&pdev->dev, "no st,syscfg\n"); return PTR_ERR(rtc->dbp); } - ret = of_property_read_u32_index(pdev->dev.of_node, "st,syscfg", - 1, &rtc->dbp_reg); - if (ret) { - dev_err(&pdev->dev, "can't read DBP register offset\n"); - return ret; - } - - ret = of_property_read_u32_index(pdev->dev.of_node, "st,syscfg", - 2, &rtc->dbp_mask); - if (ret) { - dev_err(&pdev->dev, "can't read DBP register mask\n"); - return ret; - } + rtc->dbp_reg = args[0]; + rtc->dbp_mask = args[1]; } if (!rtc->data->has_pclk) { diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index e681c1745866e..e5e6013d080e6 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -826,7 +826,7 @@ static int sun6i_rtc_probe(struct platform_device *pdev) clk_prepare_enable(chip->losc); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); chip->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(chip->rtc)) diff --git a/drivers/rtc/rtc-sunplus.c b/drivers/rtc/rtc-sunplus.c index 9b1ce0e8ba27e..519a06e728d6c 100644 --- a/drivers/rtc/rtc-sunplus.c +++ b/drivers/rtc/rtc-sunplus.c @@ -269,7 +269,7 @@ static int sp_rtc_probe(struct platform_device *plat_dev) if (ret) goto free_reset_assert; - device_init_wakeup(&plat_dev->dev, 1); + device_init_wakeup(&plat_dev->dev, true); dev_set_drvdata(&plat_dev->dev, sp_rtc); sp_rtc->rtc = devm_rtc_allocate_device(&plat_dev->dev); @@ -307,7 +307,7 @@ static void sp_rtc_remove(struct platform_device *plat_dev) { struct sunplus_rtc *sp_rtc = dev_get_drvdata(&plat_dev->dev); - device_init_wakeup(&plat_dev->dev, 0); + device_init_wakeup(&plat_dev->dev, false); reset_control_assert(sp_rtc->rstc); clk_disable_unprepare(sp_rtc->rtcclk); } diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c index 79a3102c83549..46788db899533 100644 --- a/drivers/rtc/rtc-tegra.c +++ b/drivers/rtc/rtc-tegra.c @@ -319,7 +319,7 @@ static int tegra_rtc_probe(struct platform_device *pdev) writel(0xffffffff, info->base + TEGRA_RTC_REG_INTR_STATUS); writel(0, info->base + TEGRA_RTC_REG_INTR_MASK); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); ret = devm_request_irq(&pdev->dev, info->irq, tegra_rtc_irq_handler, IRQF_TRIGGER_HIGH, dev_name(&pdev->dev), diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c index 7e0d8fb264655..a68b8c8841023 100644 --- a/drivers/rtc/rtc-test.c +++ b/drivers/rtc/rtc-test.c @@ -132,7 +132,7 @@ static int test_probe(struct platform_device *plat_dev) break; default: rtd->rtc->ops = &test_rtc_ops; - device_init_wakeup(&plat_dev->dev, 1); + device_init_wakeup(&plat_dev->dev, true); } timer_setup(&rtd->alarm, test_rtc_alarm_handler, 0); diff --git a/drivers/rtc/rtc-tps6586x.c b/drivers/rtc/rtc-tps6586x.c index e796729fc817c..54c8429b16bfc 100644 --- a/drivers/rtc/rtc-tps6586x.c +++ b/drivers/rtc/rtc-tps6586x.c @@ -241,7 +241,7 @@ static int tps6586x_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); platform_set_drvdata(pdev, rtc); rtc->rtc = devm_rtc_allocate_device(&pdev->dev); diff --git a/drivers/rtc/rtc-tps65910.c b/drivers/rtc/rtc-tps65910.c index 2ea1bbfbbc2aa..284aa2f0392b3 100644 --- a/drivers/rtc/rtc-tps65910.c +++ b/drivers/rtc/rtc-tps65910.c @@ -418,7 +418,7 @@ static int tps65910_rtc_probe(struct platform_device *pdev) tps_rtc->irq = irq; if (irq != -1) { if (device_property_present(tps65910->dev, "wakeup-source")) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); else device_set_wakeup_capable(&pdev->dev, 1); } else { diff --git a/drivers/rtc/rtc-tps6594.c b/drivers/rtc/rtc-tps6594.c index e696676341378..7c6246e3f0292 100644 --- a/drivers/rtc/rtc-tps6594.c +++ b/drivers/rtc/rtc-tps6594.c @@ -37,7 +37,7 @@ #define MAX_OFFSET (277774) // Number of ticks per hour -#define TICKS_PER_HOUR (32768 * 3600) +#define TICKS_PER_HOUR (32768 * 3600LL) // Multiplier for ppb conversions #define PPB_MULT NANO diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 794429182b348..e6106e67e1f40 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -572,7 +572,7 @@ static int twl_rtc_probe(struct platform_device *pdev) return ret; platform_set_drvdata(pdev, twl_rtc); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); twl_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &twl_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c index 640833e210575..218316be942ae 100644 --- a/drivers/rtc/rtc-wm831x.c +++ b/drivers/rtc/rtc-wm831x.c @@ -420,7 +420,7 @@ static int wm831x_rtc_probe(struct platform_device *pdev) if (ret & WM831X_RTC_ALM_ENA) wm831x_rtc->alarm_enabled = 1; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); wm831x_rtc->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(wm831x_rtc->rtc)) diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c index 6797eb4d2e493..3bd60d067a5ee 100644 --- a/drivers/rtc/rtc-wm8350.c +++ b/drivers/rtc/rtc-wm8350.c @@ -420,7 +420,7 @@ static int wm8350_rtc_probe(struct platform_device *pdev) } } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); wm_rtc->rtc = devm_rtc_device_register(&pdev->dev, "wm8350", &wm8350_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-xgene.c b/drivers/rtc/rtc-xgene.c index 0813ea1a03c27..6660b664e8dd3 100644 --- a/drivers/rtc/rtc-xgene.c +++ b/drivers/rtc/rtc-xgene.c @@ -174,7 +174,7 @@ static int xgene_rtc_probe(struct platform_device *pdev) /* Turn on the clock and the crystal */ writel(RTC_CCR_EN, pdata->csr_base + RTC_CCR); - ret = device_init_wakeup(&pdev->dev, 1); + ret = device_init_wakeup(&pdev->dev, true); if (ret) { clk_disable_unprepare(pdata->clk); return ret; @@ -197,7 +197,7 @@ static void xgene_rtc_remove(struct platform_device *pdev) struct xgene_rtc_dev *pdata = platform_get_drvdata(pdev); xgene_rtc_alarm_irq_enable(&pdev->dev, 0); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); clk_disable_unprepare(pdata->clk); } diff --git a/drivers/rtc/rtc-zynqmp.c b/drivers/rtc/rtc-zynqmp.c index af1abb69d1e32..f39102b66eac2 100644 --- a/drivers/rtc/rtc-zynqmp.c +++ b/drivers/rtc/rtc-zynqmp.c @@ -318,8 +318,8 @@ static int xlnx_rtc_probe(struct platform_device *pdev) return ret; } - /* Getting the rtc_clk info */ - xrtcdev->rtc_clk = devm_clk_get_optional(&pdev->dev, "rtc_clk"); + /* Getting the rtc info */ + xrtcdev->rtc_clk = devm_clk_get_optional(&pdev->dev, "rtc"); if (IS_ERR(xrtcdev->rtc_clk)) { if (PTR_ERR(xrtcdev->rtc_clk) != -EPROBE_DEFER) dev_warn(&pdev->dev, "Device clock not found.\n"); @@ -337,7 +337,7 @@ static int xlnx_rtc_probe(struct platform_device *pdev) xlnx_init_rtc(xrtcdev); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return devm_rtc_register_device(xrtcdev->rtc); } @@ -345,7 +345,7 @@ static int xlnx_rtc_probe(struct platform_device *pdev) static void xlnx_rtc_remove(struct platform_device *pdev) { xlnx_rtc_alarm_irq_enable(&pdev->dev, 0); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); } static int __maybe_unused xlnx_rtc_suspend(struct device *dev) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e7ea1f04164a0..d776f13cd160b 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2736,6 +2736,7 @@ int scsi_device_quiesce(struct scsi_device *sdev) { struct request_queue *q = sdev->request_queue; + unsigned int memflags; int err; /* @@ -2750,7 +2751,7 @@ scsi_device_quiesce(struct scsi_device *sdev) blk_set_pm_only(q); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); /* * Ensure that the effect of blk_set_pm_only() will be visible * for percpu_ref_tryget() callers that occur after the queue @@ -2758,7 +2759,7 @@ scsi_device_quiesce(struct scsi_device *sdev) * was called. See also https://lwn.net/Articles/573497/. */ synchronize_rcu(); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); mutex_lock(&sdev->state_mutex); err = scsi_device_set_state(sdev, SDEV_QUIESCE); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index f2093982b3db8..087fcbfc9aaa3 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -220,6 +220,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, int new_shift = sbitmap_calculate_shift(depth); bool need_alloc = !sdev->budget_map.map; bool need_free = false; + unsigned int memflags; int ret; struct sbitmap sb_backup; @@ -240,7 +241,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, * and here disk isn't added yet, so freezing is pretty fast */ if (need_free) { - blk_mq_freeze_queue(sdev->request_queue); + memflags = blk_mq_freeze_queue(sdev->request_queue); sb_backup = sdev->budget_map; } ret = sbitmap_init_node(&sdev->budget_map, @@ -256,7 +257,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, else sbitmap_free(&sb_backup); ret = 0; - blk_mq_unfreeze_queue(sdev->request_queue); + blk_mq_unfreeze_queue(sdev->request_queue, memflags); } return ret; } diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index 796e37a1d859f..3438269a54405 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -1439,6 +1439,7 @@ static ssize_t max_number_of_rtt_store(struct device *dev, struct ufs_hba *hba = dev_get_drvdata(dev); struct ufs_dev_info *dev_info = &hba->dev_info; struct scsi_device *sdev; + unsigned int memflags; unsigned int rtt; int ret; @@ -1458,14 +1459,16 @@ static ssize_t max_number_of_rtt_store(struct device *dev, ufshcd_rpm_get_sync(hba); + memflags = memalloc_noio_save(); shost_for_each_device(sdev, hba->host) - blk_mq_freeze_queue(sdev->request_queue); + blk_mq_freeze_queue_nomemsave(sdev->request_queue); ret = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_WRITE_ATTR, QUERY_ATTR_IDN_MAX_NUM_OF_RTT, 0, 0, &rtt); shost_for_each_device(sdev, hba->host) - blk_mq_unfreeze_queue(sdev->request_queue); + blk_mq_unfreeze_queue_nomemrestore(sdev->request_queue); + memalloc_noio_restore(memflags); ufshcd_rpm_put_sync(hba); diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 42bd1cb7c9cdd..583ac81669c24 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -60,14 +60,14 @@ static struct inode *anon_inode_make_secure_inode( const struct inode *context_inode) { struct inode *inode; - const struct qstr qname = QSTR_INIT(name, strlen(name)); int error; inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); if (IS_ERR(inode)) return inode; inode->i_flags &= ~S_PRIVATE; - error = security_inode_init_security_anon(inode, &qname, context_inode); + error = security_inode_init_security_anon(inode, &QSTR(name), + context_inode); if (error) { iput(inode); return ERR_PTR(error); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 8fcf7c8e5ede7..53a421ff136d3 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -450,7 +450,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * return ret; struct bch_hash_info dir_hash = bch2_hash_info_init(c, &lostfound); - struct qstr name = (struct qstr) QSTR(name_buf); + struct qstr name = QSTR(name_buf); inode->bi_dir = lostfound.bi_inum; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 98825437381c6..71c786cdb192e 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -32,7 +32,6 @@ #include #include -#define QSTR(n) { { { .len = strlen(n) } }, .name = n } int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) { diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 1a1720116071c..e7c3541b38f3f 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -670,8 +670,6 @@ static inline int cmp_le32(__le32 l, __le32 r) #include -#define QSTR(n) { { { .len = strlen(n) } }, .name = n } - static inline bool qstr_eq(const struct qstr l, const struct qstr r) { return l.len == r.len && !memcmp(l.name, r.name, l.len); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 92071ca0655f0..3dc5a35dd19b3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1496,6 +1496,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (!p->skip_locking) { btrfs_unlock_up_safe(p, parent_level + 1); + btrfs_maybe_reset_lockdep_class(root, tmp); tmp_locked = true; btrfs_tree_read_lock(tmp); btrfs_release_path(p); @@ -1539,6 +1540,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (!p->skip_locking) { ASSERT(ret == -EAGAIN); + btrfs_maybe_reset_lockdep_class(root, tmp); tmp_locked = true; btrfs_tree_read_lock(tmp); btrfs_release_path(p); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 30eceaf829a7e..4aca7475fd82c 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1229,6 +1229,18 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent( */ if (WARN_ON_ONCE(len >= ordered->num_bytes)) return ERR_PTR(-EINVAL); + /* + * If our ordered extent had an error there's no point in continuing. + * The error may have come from a transaction abort done either by this + * task or some other concurrent task, and the transaction abort path + * iterates over all existing ordered extents and sets the flag + * BTRFS_ORDERED_IOERR on them. + */ + if (unlikely(flags & (1U << BTRFS_ORDERED_IOERR))) { + const int fs_error = BTRFS_FS_ERROR(fs_info); + + return fs_error ? ERR_PTR(fs_error) : ERR_PTR(-EIO); + } /* We cannot split partially completed ordered extents. */ if (ordered->bytes_left) { ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS)); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b90fabe302e61..f9d3766c809b4 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1880,11 +1880,7 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su * Commit current transaction to make sure all the rfer/excl numbers * get updated. */ - trans = btrfs_start_transaction(fs_info->quota_root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - ret = btrfs_commit_transaction(trans); + ret = btrfs_commit_current_transaction(fs_info->quota_root); if (ret < 0) return ret; @@ -1897,8 +1893,11 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su /* * It's squota and the subvolume still has numbers needed for future * accounting, in this case we can not delete it. Just skip it. + * + * Or the qgroup is already removed by a qgroup rescan. For both cases we're + * safe to ignore them. */ - if (ret == -EBUSY) + if (ret == -EBUSY || ret == -ENOENT) ret = 0; return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 15312013f2a34..aca83a98b75a2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -274,8 +274,10 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, cur_trans = fs_info->running_transaction; if (cur_trans) { if (TRANS_ABORTED(cur_trans)) { + const int abort_error = cur_trans->aborted; + spin_unlock(&fs_info->trans_lock); - return cur_trans->aborted; + return abort_error; } if (btrfs_blocked_trans_types[cur_trans->state] & type) { spin_unlock(&fs_info->trans_lock); diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index fdf9dc15eafae..fdd404fc81124 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -412,7 +412,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) { - char name[100]; + char name[NAME_MAX]; doutc(fsc->client, "begin\n"); fsc->debugfs_congestion_kb = diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d7a06b9aaef6b..54b3421501e9f 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2948,12 +2948,12 @@ static struct ceph_mds_request_head_legacy * find_legacy_request_head(void *p, u64 features) { bool legacy = !(features & CEPH_FEATURE_FS_BTIME); - struct ceph_mds_request_head_old *ohead; + struct ceph_mds_request_head *head; if (legacy) return (struct ceph_mds_request_head_legacy *)p; - ohead = (struct ceph_mds_request_head_old *)p; - return (struct ceph_mds_request_head_legacy *)&ohead->oldest_client_tid; + head = (struct ceph_mds_request_head *)p; + return (struct ceph_mds_request_head_legacy *)&head->oldest_client_tid; } /* @@ -3023,7 +3023,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, if (legacy) len = sizeof(struct ceph_mds_request_head_legacy); else if (request_head_version == 1) - len = sizeof(struct ceph_mds_request_head_old); + len = offsetofend(struct ceph_mds_request_head, args); else if (request_head_version == 2) len = offsetofend(struct ceph_mds_request_head, ext_num_fwd); else @@ -3107,11 +3107,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, msg->hdr.version = cpu_to_le16(3); p = msg->front.iov_base + sizeof(*lhead); } else if (request_head_version == 1) { - struct ceph_mds_request_head_old *ohead = msg->front.iov_base; + struct ceph_mds_request_head *nhead = msg->front.iov_base; msg->hdr.version = cpu_to_le16(4); - ohead->version = cpu_to_le16(1); - p = msg->front.iov_base + sizeof(*ohead); + nhead->version = cpu_to_le16(1); + p = msg->front.iov_base + offsetofend(struct ceph_mds_request_head, args); } else if (request_head_version == 2) { struct ceph_mds_request_head *nhead = msg->front.iov_base; @@ -3268,7 +3268,7 @@ static int __prepare_send_request(struct ceph_mds_session *session, * so we limit to retry at most 256 times. */ if (req->r_attempts) { - old_max_retry = sizeof_field(struct ceph_mds_request_head_old, + old_max_retry = sizeof_field(struct ceph_mds_request_head, num_retry); old_max_retry = 1 << (old_max_retry * BITS_PER_BYTE); if ((old_version && req->r_attempts >= old_max_retry) || @@ -5693,18 +5693,18 @@ static int ceph_mds_auth_match(struct ceph_mds_client *mdsc, * * All the other cases --> mismatch */ + bool path_matched = true; char *first = strstr(_tpath, auth->match.path); - if (first != _tpath) { - if (free_tpath) - kfree(_tpath); - return 0; + if (first != _tpath || + (tlen > len && _tpath[len] != '/')) { + path_matched = false; } - if (tlen > len && _tpath[len] != '/') { - if (free_tpath) - kfree(_tpath); + if (free_tpath) + kfree(_tpath); + + if (!path_matched) return 0; - } } } diff --git a/fs/dcache.c b/fs/dcache.c index 9cc0d47da321c..96b21a47312e6 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2966,11 +2966,11 @@ static int __d_unalias(struct dentry *dentry, struct dentry *alias) goto out_err; m2 = &alias->d_parent->d_inode->i_rwsem; out_unalias: - if (alias->d_op->d_unalias_trylock && + if (alias->d_op && alias->d_op->d_unalias_trylock && !alias->d_op->d_unalias_trylock(alias)) goto out_err; __d_move(alias, dentry, false); - if (alias->d_op->d_unalias_unlock) + if (alias->d_op && alias->d_op->d_unalias_unlock) alias->d_op->d_unalias_unlock(alias); ret = 0; out_err: diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index e33cc77699cd5..69e9ddcb113df 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -94,6 +94,7 @@ static int __debugfs_file_get(struct dentry *dentry, enum dbgfs_get_mode mode) fsd = d_fsd; } else { struct inode *inode = dentry->d_inode; + unsigned int methods = 0; if (WARN_ON(mode == DBGFS_GET_ALREADY)) return -EINVAL; @@ -106,25 +107,28 @@ static int __debugfs_file_get(struct dentry *dentry, enum dbgfs_get_mode mode) const struct debugfs_short_fops *ops; ops = fsd->short_fops = DEBUGFS_I(inode)->short_fops; if (ops->llseek) - fsd->methods |= HAS_LSEEK; + methods |= HAS_LSEEK; if (ops->read) - fsd->methods |= HAS_READ; + methods |= HAS_READ; if (ops->write) - fsd->methods |= HAS_WRITE; + methods |= HAS_WRITE; + fsd->real_fops = NULL; } else { const struct file_operations *ops; ops = fsd->real_fops = DEBUGFS_I(inode)->real_fops; if (ops->llseek) - fsd->methods |= HAS_LSEEK; + methods |= HAS_LSEEK; if (ops->read) - fsd->methods |= HAS_READ; + methods |= HAS_READ; if (ops->write) - fsd->methods |= HAS_WRITE; + methods |= HAS_WRITE; if (ops->unlocked_ioctl) - fsd->methods |= HAS_IOCTL; + methods |= HAS_IOCTL; if (ops->poll) - fsd->methods |= HAS_POLL; + methods |= HAS_POLL; + fsd->short_fops = NULL; } + fsd->methods = methods; refcount_set(&fsd->active_users, 1); init_completion(&fsd->active_users_drained); INIT_LIST_HEAD(&fsd->cancellations); diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index 7940241d9355d..df2777e056619 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -407,7 +407,7 @@ int erofs_getxattr(struct inode *inode, int index, const char *name, } it.index = index; - it.name = (struct qstr)QSTR_INIT(name, strlen(name)); + it.name = QSTR(name); if (it.name.len > EROFS_NAME_LEN) return -ERANGE; diff --git a/fs/file_table.c b/fs/file_table.c index 7f7c378c6e31a..f0291a66f9db4 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -351,9 +351,7 @@ static struct file *alloc_file(const struct path *path, int flags, static inline int alloc_path_pseudo(const char *name, struct inode *inode, struct vfsmount *mnt, struct path *path) { - struct qstr this = QSTR_INIT(name, strlen(name)); - - path->dentry = d_alloc_pseudo(mnt->mnt_sb, &this); + path->dentry = d_alloc_pseudo(mnt->mnt_sb, &QSTR(name)); if (!path->dentry) return -ENOMEM; path->mnt = mntget(mnt); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 7e51d2cec64b4..e0741e468956d 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -95,32 +95,17 @@ __uml_setup("hostfs=", hostfs_args, static char *__dentry_name(struct dentry *dentry, char *name) { char *p = dentry_path_raw(dentry, name, PATH_MAX); - char *root; - size_t len; - struct hostfs_fs_info *fsi; - - fsi = dentry->d_sb->s_fs_info; - root = fsi->host_root_path; - len = strlen(root); - if (IS_ERR(p)) { - __putname(name); - return NULL; - } + struct hostfs_fs_info *fsi = dentry->d_sb->s_fs_info; + char *root = fsi->host_root_path; + size_t len = strlen(root); - /* - * This function relies on the fact that dentry_path_raw() will place - * the path name at the end of the provided buffer. - */ - BUG_ON(p + strlen(p) + 1 != name + PATH_MAX); - - strscpy(name, root, PATH_MAX); - if (len > p - name) { + if (IS_ERR(p) || len > p - name) { __putname(name); return NULL; } - if (p > name + len) - strcpy(name + len, p); + memcpy(name, root, len); + memmove(name + len, p, name + PATH_MAX - p); return name; } @@ -410,38 +395,33 @@ static const struct file_operations hostfs_dir_fops = { .fsync = hostfs_fsync, }; -static int hostfs_writepage(struct page *page, struct writeback_control *wbc) +static int hostfs_writepages(struct address_space *mapping, + struct writeback_control *wbc) { - struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - char *buffer; - loff_t base = page_offset(page); - int count = PAGE_SIZE; - int end_index = inode->i_size >> PAGE_SHIFT; - int err; - - if (page->index >= end_index) - count = inode->i_size & (PAGE_SIZE-1); - - buffer = kmap_local_page(page); - - err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count); - if (err != count) { - if (err >= 0) - err = -EIO; - mapping_set_error(mapping, err); - goto out; + struct folio *folio = NULL; + loff_t i_size = i_size_read(inode); + int err = 0; + + while ((folio = writeback_iter(mapping, wbc, folio, &err))) { + loff_t pos = folio_pos(folio); + size_t count = folio_size(folio); + char *buffer; + int ret; + + if (count > i_size - pos) + count = i_size - pos; + + buffer = kmap_local_folio(folio, 0); + ret = write_file(HOSTFS_I(inode)->fd, &pos, buffer, count); + kunmap_local(buffer); + folio_unlock(folio); + if (ret != count) { + err = ret < 0 ? ret : -EIO; + mapping_set_error(mapping, err); + } } - if (base > inode->i_size) - inode->i_size = base; - - err = 0; - - out: - kunmap_local(buffer); - unlock_page(page); - return err; } @@ -506,11 +486,12 @@ static int hostfs_write_end(struct file *file, struct address_space *mapping, } static const struct address_space_operations hostfs_aops = { - .writepage = hostfs_writepage, + .writepages = hostfs_writepages, .read_folio = hostfs_read_folio, .dirty_folio = filemap_dirty_folio, .write_begin = hostfs_write_begin, .write_end = hostfs_write_end, + .migrate_folio = filemap_migrate_folio, }; static int hostfs_inode_update(struct inode *ino, const struct hostfs_stat *st) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 8502ef68459b9..0eb320617d7b1 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -927,7 +927,7 @@ static void kernfs_notify_workfn(struct work_struct *work) if (!inode) continue; - name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name)); + name = QSTR(kn->name); parent = kernfs_get_parent(kn); if (parent) { p_inode = ilookup(info->sb, kernfs_ino(parent)); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index e8015d24a82cd..6613b8fcceb0d 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -1186,7 +1186,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (size) { if (phys && blkphy << blkbits == phys + size) { /* The current extent goes on */ - size += n << blkbits; + size += (u64)n << blkbits; } else { /* Terminate the current extent */ ret = fiemap_fill_next_extent( @@ -1199,14 +1199,14 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags = FIEMAP_EXTENT_MERGED; logical = blkoff << blkbits; phys = blkphy << blkbits; - size = n << blkbits; + size = (u64)n << blkbits; } } else { /* Start a new extent */ flags = FIEMAP_EXTENT_MERGED; logical = blkoff << blkbits; phys = blkphy << blkbits; - size = n << blkbits; + size = (u64)n << blkbits; } blkoff += n; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index e0b91dbaa0acb..8bb5022f30824 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2285,7 +2285,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di, mlog(ML_ERROR, "found superblock with incorrect block " "size bits: found %u, should be 9, 10, 11, or 12\n", blksz_bits); - } else if ((1 << le32_to_cpu(blksz_bits)) != blksz) { + } else if ((1 << blksz_bits) != blksz) { mlog(ML_ERROR, "found superblock with incorrect block " "size: found %u, should be %u\n", 1 << blksz_bits, blksz); } else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) != diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 9729f071c5aaa..f52073022fae8 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -392,9 +392,9 @@ static ssize_t orangefs_debug_write(struct file *file, * Thwart users who try to jamb a ridiculous number * of bytes into the debug file... */ - if (count > ORANGEFS_MAX_DEBUG_STRING_LEN + 1) { + if (count > ORANGEFS_MAX_DEBUG_STRING_LEN) { silly = count; - count = ORANGEFS_MAX_DEBUG_STRING_LEN + 1; + count = ORANGEFS_MAX_DEBUG_STRING_LEN; } buf = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL); diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index cea820cb3b55b..be5c65d6f8484 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -14,8 +14,6 @@ #include #include "overlayfs.h" -#include "../internal.h" /* for vfs_path_lookup */ - struct ovl_lookup_data { struct super_block *sb; const struct ovl_layer *layer; diff --git a/fs/smb/client/asn1.c b/fs/smb/client/asn1.c index b5724ef9f182f..214a44509e7b9 100644 --- a/fs/smb/client/asn1.c +++ b/fs/smb/client/asn1.c @@ -52,6 +52,8 @@ int cifs_neg_token_init_mech_type(void *context, size_t hdrlen, server->sec_kerberos = true; else if (oid == OID_ntlmssp) server->sec_ntlmssp = true; + else if (oid == OID_IAKerb) + server->sec_iakerb = true; else { char buf[50]; diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index 28f568b5fc277..bc1c1e9b288ad 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -138,11 +138,13 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo, dp = description + strlen(description); - /* for now, only sec=krb5 and sec=mskrb5 are valid */ + /* for now, only sec=krb5 and sec=mskrb5 and iakerb are valid */ if (server->sec_kerberos) sprintf(dp, ";sec=krb5"); else if (server->sec_mskerberos) sprintf(dp, ";sec=mskrb5"); + else if (server->sec_iakerb) + sprintf(dp, ";sec=iakerb"); else { cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n"); sprintf(dp, ";sec=krb5"); diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index ba79aa2107cc9..699a3f76d0834 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -1395,7 +1395,7 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY struct smb_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, const struct cifs_fid *cifsfid, u32 *pacllen, - u32 __maybe_unused unused) + u32 info) { struct smb_ntsd *pntsd = NULL; unsigned int xid; @@ -1407,7 +1407,7 @@ struct smb_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, xid = get_xid(); rc = CIFSSMBGetCIFSACL(xid, tlink_tcon(tlink), cifsfid->netfid, &pntsd, - pacllen); + pacllen, info); free_xid(xid); cifs_put_tlink(tlink); @@ -1419,7 +1419,7 @@ struct smb_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, } static struct smb_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, - const char *path, u32 *pacllen) + const char *path, u32 *pacllen, u32 info) { struct smb_ntsd *pntsd = NULL; int oplock = 0; @@ -1446,9 +1446,12 @@ static struct smb_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, .fid = &fid, }; + if (info & SACL_SECINFO) + oparms.desired_access |= SYSTEM_SECURITY; + rc = CIFS_open(xid, &oparms, &oplock, NULL); if (!rc) { - rc = CIFSSMBGetCIFSACL(xid, tcon, fid.netfid, &pntsd, pacllen); + rc = CIFSSMBGetCIFSACL(xid, tcon, fid.netfid, &pntsd, pacllen, info); CIFSSMBClose(xid, tcon, fid.netfid); } @@ -1472,7 +1475,7 @@ struct smb_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, if (inode) open_file = find_readable_file(CIFS_I(inode), true); if (!open_file) - return get_cifs_acl_by_path(cifs_sb, path, pacllen); + return get_cifs_acl_by_path(cifs_sb, path, pacllen, info); pntsd = get_cifs_acl_by_fid(cifs_sb, &open_file->fid, pacllen, info); cifsFileInfo_put(open_file); @@ -1485,7 +1488,7 @@ int set_cifs_acl(struct smb_ntsd *pnntsd, __u32 acllen, { int oplock = 0; unsigned int xid; - int rc, access_flags; + int rc, access_flags = 0; struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); @@ -1498,10 +1501,12 @@ int set_cifs_acl(struct smb_ntsd *pnntsd, __u32 acllen, tcon = tlink_tcon(tlink); xid = get_xid(); - if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP) - access_flags = WRITE_OWNER; - else - access_flags = WRITE_DAC; + if (aclflag & CIFS_ACL_OWNER || aclflag & CIFS_ACL_GROUP) + access_flags |= WRITE_OWNER; + if (aclflag & CIFS_ACL_SACL) + access_flags |= SYSTEM_SECURITY; + if (aclflag & CIFS_ACL_DACL) + access_flags |= WRITE_DAC; oparms = (struct cifs_open_parms) { .tcon = tcon, diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index b800c9f585d8d..6a3bd652d251d 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -715,6 +715,12 @@ cifs_show_options(struct seq_file *s, struct dentry *root) cifs_sb->ctx->backupgid)); seq_show_option(s, "reparse", cifs_reparse_type_str(cifs_sb->ctx->reparse_type)); + if (cifs_sb->ctx->nonativesocket) + seq_puts(s, ",nonativesocket"); + else + seq_puts(s, ",nativesocket"); + seq_show_option(s, "symlink", + cifs_symlink_type_str(get_cifs_symlink_type(cifs_sb))); seq_printf(s, ",rsize=%u", cifs_sb->ctx->rsize); seq_printf(s, ",wsize=%u", cifs_sb->ctx->wsize); diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index a762dbbbd959b..831fee962c4d6 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -146,6 +146,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 52 -#define CIFS_VERSION "2.52" +#define SMB3_PRODUCT_BUILD 53 +#define CIFS_VERSION "2.53" #endif /* _CIFSFS_H */ diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 49ffc040f736c..a68434ad744ae 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -151,6 +151,7 @@ enum securityEnum { NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ Kerberos, /* Kerberos via SPNEGO */ + IAKerb, /* Kerberos proxy */ }; enum upcall_target_enum { @@ -160,6 +161,7 @@ enum upcall_target_enum { }; enum cifs_reparse_type { + CIFS_REPARSE_TYPE_NONE, CIFS_REPARSE_TYPE_NFS, CIFS_REPARSE_TYPE_WSL, CIFS_REPARSE_TYPE_DEFAULT = CIFS_REPARSE_TYPE_NFS, @@ -168,6 +170,8 @@ enum cifs_reparse_type { static inline const char *cifs_reparse_type_str(enum cifs_reparse_type type) { switch (type) { + case CIFS_REPARSE_TYPE_NONE: + return "none"; case CIFS_REPARSE_TYPE_NFS: return "nfs"; case CIFS_REPARSE_TYPE_WSL: @@ -177,6 +181,39 @@ static inline const char *cifs_reparse_type_str(enum cifs_reparse_type type) } } +enum cifs_symlink_type { + CIFS_SYMLINK_TYPE_DEFAULT, + CIFS_SYMLINK_TYPE_NONE, + CIFS_SYMLINK_TYPE_NATIVE, + CIFS_SYMLINK_TYPE_UNIX, + CIFS_SYMLINK_TYPE_MFSYMLINKS, + CIFS_SYMLINK_TYPE_SFU, + CIFS_SYMLINK_TYPE_NFS, + CIFS_SYMLINK_TYPE_WSL, +}; + +static inline const char *cifs_symlink_type_str(enum cifs_symlink_type type) +{ + switch (type) { + case CIFS_SYMLINK_TYPE_NONE: + return "none"; + case CIFS_SYMLINK_TYPE_NATIVE: + return "native"; + case CIFS_SYMLINK_TYPE_UNIX: + return "unix"; + case CIFS_SYMLINK_TYPE_MFSYMLINKS: + return "mfsymlinks"; + case CIFS_SYMLINK_TYPE_SFU: + return "sfu"; + case CIFS_SYMLINK_TYPE_NFS: + return "nfs"; + case CIFS_SYMLINK_TYPE_WSL: + return "wsl"; + default: + return "unknown"; + } +} + struct session_key { unsigned int len; char *response; @@ -215,10 +252,7 @@ struct cifs_cred { struct cifs_open_info_data { bool adjust_tz; - union { - bool reparse_point; - bool symlink; - }; + bool reparse_point; struct { /* ioctl response buffer */ struct { @@ -226,10 +260,7 @@ struct cifs_open_info_data { struct kvec iov; } io; __u32 tag; - union { - struct reparse_data_buffer *buf; - struct reparse_posix_data *posix; - }; + struct reparse_data_buffer *buf; } reparse; struct { __u8 eas[SMB2_WSL_MAX_QUERY_EA_RESP_SIZE]; @@ -751,6 +782,7 @@ struct TCP_Server_Info { bool sec_kerberosu2u; /* supports U2U Kerberos */ bool sec_kerberos; /* supports plain Kerberos */ bool sec_mskerberos; /* supports legacy MS Kerberos */ + bool sec_iakerb; /* supports pass-through auth for Kerberos (krb5 proxy) */ bool large_buf; /* is current buffer large? */ /* use SMBD connection instead of socket */ bool rdma; @@ -2118,6 +2150,8 @@ static inline char *get_security_type_str(enum securityEnum sectype) return "Kerberos"; case NTLMv2: return "NTLMv2"; + case IAKerb: + return "IAKerb"; default: return "Unknown"; } @@ -2173,11 +2207,13 @@ static inline size_t ntlmssp_workstation_name_size(const struct cifs_ses *ses) static inline void move_cifs_info_to_smb2(struct smb2_file_all_info *dst, const FILE_ALL_INFO *src) { - memcpy(dst, src, (size_t)((u8 *)&src->AccessFlags - (u8 *)src)); - dst->AccessFlags = src->AccessFlags; - dst->CurrentByteOffset = src->CurrentByteOffset; - dst->Mode = src->Mode; - dst->AlignmentRequirement = src->AlignmentRequirement; + memcpy(dst, src, (size_t)((u8 *)&src->EASize - (u8 *)src)); + dst->IndexNumber = 0; + dst->EASize = src->EASize; + dst->AccessFlags = 0; + dst->CurrentByteOffset = 0; + dst->Mode = 0; + dst->AlignmentRequirement = 0; dst->FileNameLength = src->FileNameLength; } diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 5c047b00516f2..48d0d6f439cf4 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -190,42 +190,82 @@ */ #define FILE_READ_DATA 0x00000001 /* Data can be read from the file */ + /* or directory child entries can */ + /* be listed together with the */ + /* associated child attributes */ + /* (so the FILE_READ_ATTRIBUTES on */ + /* the child entry is not needed) */ #define FILE_WRITE_DATA 0x00000002 /* Data can be written to the file */ + /* or new file can be created in */ + /* the directory */ #define FILE_APPEND_DATA 0x00000004 /* Data can be appended to the file */ + /* (for non-local files over SMB it */ + /* is same as FILE_WRITE_DATA) */ + /* or new subdirectory can be */ + /* created in the directory */ #define FILE_READ_EA 0x00000008 /* Extended attributes associated */ /* with the file can be read */ #define FILE_WRITE_EA 0x00000010 /* Extended attributes associated */ /* with the file can be written */ #define FILE_EXECUTE 0x00000020 /*Data can be read into memory from */ /* the file using system paging I/O */ -#define FILE_DELETE_CHILD 0x00000040 + /* for executing the file / script */ + /* or right to traverse directory */ + /* (but by default all users have */ + /* directory bypass traverse */ + /* privilege and do not need this */ + /* permission on directories at all)*/ +#define FILE_DELETE_CHILD 0x00000040 /* Child entry can be deleted from */ + /* the directory (so the DELETE on */ + /* the child entry is not needed) */ #define FILE_READ_ATTRIBUTES 0x00000080 /* Attributes associated with the */ - /* file can be read */ + /* file or directory can be read */ #define FILE_WRITE_ATTRIBUTES 0x00000100 /* Attributes associated with the */ - /* file can be written */ -#define DELETE 0x00010000 /* The file can be deleted */ -#define READ_CONTROL 0x00020000 /* The access control list and */ - /* ownership associated with the */ - /* file can be read */ -#define WRITE_DAC 0x00040000 /* The access control list and */ - /* ownership associated with the */ - /* file can be written. */ + /* file or directory can be written */ +#define DELETE 0x00010000 /* The file or dir can be deleted */ +#define READ_CONTROL 0x00020000 /* The discretionary access control */ + /* list and ownership associated */ + /* with the file or dir can be read */ +#define WRITE_DAC 0x00040000 /* The discretionary access control */ + /* list associated with the file or */ + /* directory can be written */ #define WRITE_OWNER 0x00080000 /* Ownership information associated */ - /* with the file can be written */ + /* with the file/dir can be written */ #define SYNCHRONIZE 0x00100000 /* The file handle can waited on to */ /* synchronize with the completion */ /* of an input/output request */ #define SYSTEM_SECURITY 0x01000000 /* The system access control list */ - /* can be read and changed */ -#define GENERIC_ALL 0x10000000 -#define GENERIC_EXECUTE 0x20000000 -#define GENERIC_WRITE 0x40000000 -#define GENERIC_READ 0x80000000 - /* In summary - Relevant file */ - /* access flags from CIFS are */ - /* file_read_data, file_write_data */ - /* file_execute, file_read_attributes*/ - /* write_dac, and delete. */ + /* associated with the file or */ + /* directory can be read or written */ + /* (cannot be in DACL, can in SACL) */ +#define MAXIMUM_ALLOWED 0x02000000 /* Maximal subset of GENERIC_ALL */ + /* permissions which can be granted */ + /* (cannot be in DACL nor SACL) */ +#define GENERIC_ALL 0x10000000 /* Same as: GENERIC_EXECUTE | */ + /* GENERIC_WRITE | */ + /* GENERIC_READ | */ + /* FILE_DELETE_CHILD | */ + /* DELETE | */ + /* WRITE_DAC | */ + /* WRITE_OWNER */ + /* So GENERIC_ALL contains all bits */ + /* mentioned above except these two */ + /* SYSTEM_SECURITY MAXIMUM_ALLOWED */ +#define GENERIC_EXECUTE 0x20000000 /* Same as: FILE_EXECUTE | */ + /* FILE_READ_ATTRIBUTES | */ + /* READ_CONTROL | */ + /* SYNCHRONIZE */ +#define GENERIC_WRITE 0x40000000 /* Same as: FILE_WRITE_DATA | */ + /* FILE_APPEND_DATA | */ + /* FILE_WRITE_EA | */ + /* FILE_WRITE_ATTRIBUTES | */ + /* READ_CONTROL | */ + /* SYNCHRONIZE */ +#define GENERIC_READ 0x80000000 /* Same as: FILE_READ_DATA | */ + /* FILE_READ_EA | */ + /* FILE_READ_ATTRIBUTES | */ + /* READ_CONTROL | */ + /* SYNCHRONIZE */ #define FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA | FILE_READ_ATTRIBUTES) #define FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \ @@ -1484,20 +1524,6 @@ struct file_notify_information { __u8 FileName[]; } __attribute__((packed)); -/* For IO_REPARSE_TAG_NFS */ -#define NFS_SPECFILE_LNK 0x00000000014B4E4C -#define NFS_SPECFILE_CHR 0x0000000000524843 -#define NFS_SPECFILE_BLK 0x00000000004B4C42 -#define NFS_SPECFILE_FIFO 0x000000004F464946 -#define NFS_SPECFILE_SOCK 0x000000004B434F53 -struct reparse_posix_data { - __le32 ReparseTag; - __le16 ReparseDataLength; - __u16 Reserved; - __le64 InodeType; /* LNK, FIFO, CHR etc. */ - __u8 DataBuffer[]; -} __attribute__((packed)); - struct cifs_quota_data { __u32 rsrvd1; /* 0 */ __u32 sid_size; @@ -2264,13 +2290,7 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */ __u8 DeletePending; __u8 Directory; __u16 Pad2; - __le64 IndexNumber; __le32 EASize; - __le32 AccessFlags; - __u64 IndexNumber1; - __le64 CurrentByteOffset; - __le32 Mode; - __le32 AlignmentRequirement; __le32 FileNameLength; union { char __pad; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 223e5e231f428..81680001944df 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -557,7 +557,7 @@ extern int CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nls_codepage, struct cifs_sb_info *cifs_sb); extern int CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, - __u16 fid, struct smb_ntsd **acl_inf, __u32 *buflen); + __u16 fid, struct smb_ntsd **acl_inf, __u32 *buflen, __u32 info); extern int CIFSSMBSetCIFSACL(const unsigned int, struct cifs_tcon *, __u16, struct smb_ntsd *pntsd, __u32 len, int aclflag); extern int cifs_do_get_acl(const unsigned int xid, struct cifs_tcon *tcon, @@ -656,7 +656,7 @@ char *extract_sharename(const char *unc); int parse_reparse_point(struct reparse_data_buffer *buf, u32 plen, struct cifs_sb_info *cifs_sb, const char *full_path, - bool unicode, struct cifs_open_info_data *data); + struct cifs_open_info_data *data); int __cifs_sfu_make_node(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t dev, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 7f1cacc89dbb0..3feaa0f681699 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -3369,7 +3369,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata, /* Get Security Descriptor (by handle) from remote server for a file or dir */ int CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, - struct smb_ntsd **acl_inf, __u32 *pbuflen) + struct smb_ntsd **acl_inf, __u32 *pbuflen, __u32 info) { int rc = 0; int buf_type = 0; @@ -3392,7 +3392,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, pSMB->MaxSetupCount = 0; pSMB->Fid = fid; /* file handle always le */ pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP | - CIFS_ACL_DACL); + CIFS_ACL_DACL | info); pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */ inc_rfc1001_len(pSMB, 11); iov[0].iov_base = (char *)pSMB; diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 880d7cf8b730d..f917de020dd5d 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2849,6 +2849,10 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) return 0; if (old->ctx->reparse_type != new->ctx->reparse_type) return 0; + if (old->ctx->nonativesocket != new->ctx->nonativesocket) + return 0; + if (old->ctx->symlink_type != new->ctx->symlink_type) + return 0; return 1; } diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 5381f05420bc2..e9b286d9a7ba3 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -133,6 +133,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = { fsparam_flag("rootfs", Opt_rootfs), fsparam_flag("compress", Opt_compress), fsparam_flag("witness", Opt_witness), + fsparam_flag_no("nativesocket", Opt_nativesocket), /* Mount options which take uid or gid */ fsparam_uid("backupuid", Opt_backupuid), @@ -185,6 +186,8 @@ const struct fs_parameter_spec smb3_fs_parameters[] = { fsparam_string("cache", Opt_cache), fsparam_string("reparse", Opt_reparse), fsparam_string("upcall_target", Opt_upcalltarget), + fsparam_string("symlink", Opt_symlink), + fsparam_string("symlinkroot", Opt_symlinkroot), /* Arguments that should be ignored */ fsparam_flag("guest", Opt_ignore), @@ -332,6 +335,7 @@ cifs_parse_cache_flavor(struct fs_context *fc, char *value, struct smb3_fs_conte static const match_table_t reparse_flavor_tokens = { { Opt_reparse_default, "default" }, + { Opt_reparse_none, "none" }, { Opt_reparse_nfs, "nfs" }, { Opt_reparse_wsl, "wsl" }, { Opt_reparse_err, NULL }, @@ -346,6 +350,9 @@ static int parse_reparse_flavor(struct fs_context *fc, char *value, case Opt_reparse_default: ctx->reparse_type = CIFS_REPARSE_TYPE_DEFAULT; break; + case Opt_reparse_none: + ctx->reparse_type = CIFS_REPARSE_TYPE_NONE; + break; case Opt_reparse_nfs: ctx->reparse_type = CIFS_REPARSE_TYPE_NFS; break; @@ -359,6 +366,55 @@ static int parse_reparse_flavor(struct fs_context *fc, char *value, return 0; } +static const match_table_t symlink_flavor_tokens = { + { Opt_symlink_default, "default" }, + { Opt_symlink_none, "none" }, + { Opt_symlink_native, "native" }, + { Opt_symlink_unix, "unix" }, + { Opt_symlink_mfsymlinks, "mfsymlinks" }, + { Opt_symlink_sfu, "sfu" }, + { Opt_symlink_nfs, "nfs" }, + { Opt_symlink_wsl, "wsl" }, + { Opt_symlink_err, NULL }, +}; + +static int parse_symlink_flavor(struct fs_context *fc, char *value, + struct smb3_fs_context *ctx) +{ + substring_t args[MAX_OPT_ARGS]; + + switch (match_token(value, symlink_flavor_tokens, args)) { + case Opt_symlink_default: + ctx->symlink_type = CIFS_SYMLINK_TYPE_DEFAULT; + break; + case Opt_symlink_none: + ctx->symlink_type = CIFS_SYMLINK_TYPE_NONE; + break; + case Opt_symlink_native: + ctx->symlink_type = CIFS_SYMLINK_TYPE_NATIVE; + break; + case Opt_symlink_unix: + ctx->symlink_type = CIFS_SYMLINK_TYPE_UNIX; + break; + case Opt_symlink_mfsymlinks: + ctx->symlink_type = CIFS_SYMLINK_TYPE_MFSYMLINKS; + break; + case Opt_symlink_sfu: + ctx->symlink_type = CIFS_SYMLINK_TYPE_SFU; + break; + case Opt_symlink_nfs: + ctx->symlink_type = CIFS_SYMLINK_TYPE_NFS; + break; + case Opt_symlink_wsl: + ctx->symlink_type = CIFS_SYMLINK_TYPE_WSL; + break; + default: + cifs_errorf(fc, "bad symlink= option: %s\n", value); + return 1; + } + return 0; +} + #define DUP_CTX_STR(field) \ do { \ if (ctx->field) { \ @@ -386,6 +442,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx new_ctx->iocharset = NULL; new_ctx->leaf_fullpath = NULL; new_ctx->dns_dom = NULL; + new_ctx->symlinkroot = NULL; /* * Make sure to stay in sync with smb3_cleanup_fs_context_contents() */ @@ -401,6 +458,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx DUP_CTX_STR(iocharset); DUP_CTX_STR(leaf_fullpath); DUP_CTX_STR(dns_dom); + DUP_CTX_STR(symlinkroot); return 0; } @@ -1727,6 +1785,23 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, if (parse_reparse_flavor(fc, param->string, ctx)) goto cifs_parse_mount_err; break; + case Opt_nativesocket: + ctx->nonativesocket = result.negated; + break; + case Opt_symlink: + if (parse_symlink_flavor(fc, param->string, ctx)) + goto cifs_parse_mount_err; + break; + case Opt_symlinkroot: + if (param->string[0] != '/') { + cifs_errorf(fc, "symlinkroot mount options must be absolute path\n"); + goto cifs_parse_mount_err; + } + kfree(ctx->symlinkroot); + ctx->symlinkroot = kstrdup(param->string, GFP_KERNEL); + if (!ctx->symlinkroot) + goto cifs_parse_mount_err; + break; } /* case Opt_ignore: - is ignored as expected ... */ @@ -1735,6 +1810,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, goto cifs_parse_mount_err; } + /* + * By default resolve all native absolute symlinks relative to "/mnt/". + * Same default has drvfs driver running in WSL for resolving SMB shares. + */ + if (!ctx->symlinkroot) + ctx->symlinkroot = kstrdup("/mnt/", GFP_KERNEL); + return 0; cifs_parse_mount_err: @@ -1745,6 +1827,24 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, return -EINVAL; } +enum cifs_symlink_type get_cifs_symlink_type(struct cifs_sb_info *cifs_sb) +{ + if (cifs_sb->ctx->symlink_type == CIFS_SYMLINK_TYPE_DEFAULT) { + if (cifs_sb->ctx->mfsymlinks) + return CIFS_SYMLINK_TYPE_MFSYMLINKS; + else if (cifs_sb->ctx->sfu_emul) + return CIFS_SYMLINK_TYPE_SFU; + else if (cifs_sb->ctx->linux_ext && !cifs_sb->ctx->no_linux_ext) + return CIFS_SYMLINK_TYPE_UNIX; + else if (cifs_sb->ctx->reparse_type != CIFS_REPARSE_TYPE_NONE) + return CIFS_SYMLINK_TYPE_NATIVE; + else + return CIFS_SYMLINK_TYPE_NONE; + } else { + return cifs_sb->ctx->symlink_type; + } +} + int smb3_init_fs_context(struct fs_context *fc) { struct smb3_fs_context *ctx; @@ -1821,6 +1921,8 @@ int smb3_init_fs_context(struct fs_context *fc) ctx->retrans = 1; ctx->reparse_type = CIFS_REPARSE_TYPE_DEFAULT; + ctx->symlink_type = CIFS_SYMLINK_TYPE_DEFAULT; + ctx->nonativesocket = 0; /* * short int override_uid = -1; @@ -1867,6 +1969,8 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx) ctx->leaf_fullpath = NULL; kfree(ctx->dns_dom); ctx->dns_dom = NULL; + kfree(ctx->symlinkroot); + ctx->symlinkroot = NULL; } void diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 8813533345ee7..881bfc08667e7 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -43,11 +43,24 @@ enum { enum cifs_reparse_parm { Opt_reparse_default, + Opt_reparse_none, Opt_reparse_nfs, Opt_reparse_wsl, Opt_reparse_err }; +enum cifs_symlink_parm { + Opt_symlink_default, + Opt_symlink_none, + Opt_symlink_native, + Opt_symlink_unix, + Opt_symlink_mfsymlinks, + Opt_symlink_sfu, + Opt_symlink_nfs, + Opt_symlink_wsl, + Opt_symlink_err +}; + enum cifs_sec_param { Opt_sec_krb5, Opt_sec_krb5i, @@ -166,6 +179,9 @@ enum cifs_param { Opt_cache, Opt_reparse, Opt_upcalltarget, + Opt_nativesocket, + Opt_symlink, + Opt_symlinkroot, /* Mount options to be ignored */ Opt_ignore, @@ -294,12 +310,17 @@ struct smb3_fs_context { struct cifs_ses *dfs_root_ses; bool dfs_automount:1; /* set for dfs automount only */ enum cifs_reparse_type reparse_type; + enum cifs_symlink_type symlink_type; + bool nonativesocket:1; bool dfs_conn:1; /* set for dfs mounts */ char *dns_dom; + char *symlinkroot; /* top level directory for native SMB symlinks in absolute format */ }; extern const struct fs_parameter_spec smb3_fs_parameters[]; +extern enum cifs_symlink_type get_cifs_symlink_type(struct cifs_sb_info *cifs_sb); + extern int smb3_init_fs_context(struct fs_context *fc); extern void smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx); extern void smb3_cleanup_fs_context(struct smb3_fs_context *ctx); diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 93e9188b2632f..9cc31cf6ebd07 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -990,7 +990,7 @@ cifs_get_file_info(struct file *filp) /* TODO: add support to query reparse tag */ data.adjust_tz = false; if (data.symlink_target) { - data.symlink = true; + data.reparse_point = true; data.reparse.tag = IO_REPARSE_TAG_SYMLINK; } path = build_path_from_dentry(dentry, page); @@ -1216,6 +1216,11 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, full_path, iov, data); } + + if (data->reparse.tag == IO_REPARSE_TAG_SYMLINK && !rc) { + bool directory = le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY; + rc = smb2_fix_symlink_target_type(&data->symlink_target, directory, cifs_sb); + } break; } diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index 47ddeb7fa1116..6e6c09cc5ce7a 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -18,6 +18,7 @@ #include "cifs_unicode.h" #include "smb2proto.h" #include "cifs_ioctl.h" +#include "fs_context.h" /* * M-F Symlink Functions - Begin @@ -604,22 +605,53 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, cifs_dbg(FYI, "symname is %s\n", symname); /* BB what if DFS and this volume is on different share? BB */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { - rc = create_mf_symlink(xid, pTcon, cifs_sb, full_path, symname); - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { - rc = __cifs_sfu_make_node(xid, inode, direntry, pTcon, - full_path, S_IFLNK, 0, symname); + rc = -EOPNOTSUPP; + switch (get_cifs_symlink_type(cifs_sb)) { + case CIFS_SYMLINK_TYPE_DEFAULT: + /* should not happen, get_cifs_symlink_type() resolves the default */ + break; + + case CIFS_SYMLINK_TYPE_NONE: + break; + + case CIFS_SYMLINK_TYPE_UNIX: #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY - } else if (pTcon->unix_ext) { - rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, symname, - cifs_sb->local_nls, - cifs_remap(cifs_sb)); + if (pTcon->unix_ext) { + rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, + symname, + cifs_sb->local_nls, + cifs_remap(cifs_sb)); + } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ - } else if (server->ops->create_reparse_symlink) { - rc = server->ops->create_reparse_symlink(xid, inode, direntry, - pTcon, full_path, - symname); - goto symlink_exit; + break; + + case CIFS_SYMLINK_TYPE_MFSYMLINKS: + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { + rc = create_mf_symlink(xid, pTcon, cifs_sb, + full_path, symname); + } + break; + + case CIFS_SYMLINK_TYPE_SFU: + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { + rc = __cifs_sfu_make_node(xid, inode, direntry, pTcon, + full_path, S_IFLNK, + 0, symname); + } + break; + + case CIFS_SYMLINK_TYPE_NATIVE: + case CIFS_SYMLINK_TYPE_NFS: + case CIFS_SYMLINK_TYPE_WSL: + if (server->ops->create_reparse_symlink) { + rc = server->ops->create_reparse_symlink(xid, inode, + direntry, + pTcon, + full_path, + symname); + goto symlink_exit; + } + break; } if (rc == 0) { diff --git a/fs/smb/client/netmisc.c b/fs/smb/client/netmisc.c index 17b3e21ea8689..9ec20601cee2e 100644 --- a/fs/smb/client/netmisc.c +++ b/fs/smb/client/netmisc.c @@ -313,7 +313,6 @@ static const struct { ERRDOS, 2215, NT_STATUS_NO_LOGON_SERVERS}, { ERRHRD, ERRgeneral, NT_STATUS_NO_SUCH_LOGON_SESSION}, { ERRHRD, ERRgeneral, NT_STATUS_NO_SUCH_PRIVILEGE}, { - ERRDOS, ERRnoaccess, NT_STATUS_PRIVILEGE_NOT_HELD}, { ERRHRD, ERRgeneral, NT_STATUS_INVALID_ACCOUNT_NAME}, { ERRHRD, ERRgeneral, NT_STATUS_USER_EXISTS}, /* { This NT error code was 'sqashed' @@ -871,6 +870,15 @@ map_smb_to_linux_error(char *buf, bool logErr) } /* else ERRHRD class errors or junk - return EIO */ + /* special cases for NT status codes which cannot be translated to DOS codes */ + if (smb->Flags2 & SMBFLG2_ERR_STATUS) { + __u32 err = le32_to_cpu(smb->Status.CifsError); + if (err == (NT_STATUS_NOT_A_REPARSE_POINT)) + rc = -ENODATA; + else if (err == (NT_STATUS_PRIVILEGE_NOT_HELD)) + rc = -EPERM; + } + cifs_dbg(FYI, "Mapping smb error code 0x%x to POSIX err %d\n", le32_to_cpu(smb->Status.CifsError), rc); diff --git a/fs/smb/client/nterr.c b/fs/smb/client/nterr.c index d396a8e98a81c..8f0bc441295ef 100644 --- a/fs/smb/client/nterr.c +++ b/fs/smb/client/nterr.c @@ -674,6 +674,7 @@ const struct nt_err_code_struct nt_errs[] = { {"NT_STATUS_QUOTA_LIST_INCONSISTENT", NT_STATUS_QUOTA_LIST_INCONSISTENT}, {"NT_STATUS_FILE_IS_OFFLINE", NT_STATUS_FILE_IS_OFFLINE}, + {"NT_STATUS_NOT_A_REPARSE_POINT", NT_STATUS_NOT_A_REPARSE_POINT}, {"NT_STATUS_NO_MORE_ENTRIES", NT_STATUS_NO_MORE_ENTRIES}, {"NT_STATUS_MORE_ENTRIES", NT_STATUS_MORE_ENTRIES}, {"NT_STATUS_SOME_UNMAPPED", NT_STATUS_SOME_UNMAPPED}, diff --git a/fs/smb/client/nterr.h b/fs/smb/client/nterr.h index edd4741cab0a1..180602c22355e 100644 --- a/fs/smb/client/nterr.h +++ b/fs/smb/client/nterr.h @@ -546,6 +546,7 @@ extern const struct nt_err_code_struct nt_errs[]; #define NT_STATUS_TOO_MANY_LINKS 0xC0000000 | 0x0265 #define NT_STATUS_QUOTA_LIST_INCONSISTENT 0xC0000000 | 0x0266 #define NT_STATUS_FILE_IS_OFFLINE 0xC0000000 | 0x0267 +#define NT_STATUS_NOT_A_REPARSE_POINT 0xC0000000 | 0x0275 #define NT_STATUS_NO_SUCH_JOB 0xC0000000 | 0xEDE /* scheduler */ #endif /* _NTERR_H */ diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index d88b41133e00c..0a5a52a8a7dd1 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -14,6 +14,20 @@ #include "fs_context.h" #include "reparse.h" +static int mknod_nfs(unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, + const char *full_path, umode_t mode, dev_t dev, + const char *symname); + +static int mknod_wsl(unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, + const char *full_path, umode_t mode, dev_t dev, + const char *symname); + +static int create_native_symlink(const unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, + const char *full_path, const char *symname); + static int detect_directory_symlink_target(struct cifs_sb_info *cifs_sb, const unsigned int xid, const char *full_path, @@ -23,38 +37,149 @@ static int detect_directory_symlink_target(struct cifs_sb_info *cifs_sb, int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, const char *symname) +{ + switch (get_cifs_symlink_type(CIFS_SB(inode->i_sb))) { + case CIFS_SYMLINK_TYPE_NATIVE: + return create_native_symlink(xid, inode, dentry, tcon, full_path, symname); + case CIFS_SYMLINK_TYPE_NFS: + return mknod_nfs(xid, inode, dentry, tcon, full_path, S_IFLNK, 0, symname); + case CIFS_SYMLINK_TYPE_WSL: + return mknod_wsl(xid, inode, dentry, tcon, full_path, S_IFLNK, 0, symname); + default: + return -EOPNOTSUPP; + } +} + +static int create_native_symlink(const unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, + const char *full_path, const char *symname) { struct reparse_symlink_data_buffer *buf = NULL; - struct cifs_open_info_data data; + struct cifs_open_info_data data = {}; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct inode *new; struct kvec iov; - __le16 *path; + __le16 *path = NULL; bool directory; - char *sym, sep = CIFS_DIR_SEP(cifs_sb); - u16 len, plen; + char *symlink_target = NULL; + char *sym = NULL; + char sep = CIFS_DIR_SEP(cifs_sb); + u16 len, plen, poff, slen; int rc = 0; if (strlen(symname) > REPARSE_SYM_PATH_MAX) return -ENAMETOOLONG; - sym = kstrdup(symname, GFP_KERNEL); - if (!sym) - return -ENOMEM; + symlink_target = kstrdup(symname, GFP_KERNEL); + if (!symlink_target) { + rc = -ENOMEM; + goto out; + } data = (struct cifs_open_info_data) { .reparse_point = true, .reparse = { .tag = IO_REPARSE_TAG_SYMLINK, }, - .symlink_target = sym, + .symlink_target = symlink_target, }; - convert_delimiter(sym, sep); + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { + /* + * This is a request to create an absolute symlink on the server + * which does not support POSIX paths, and expects symlink in + * NT-style path. So convert absolute Linux symlink target path + * to the absolute NT-style path. Root of the NT-style path for + * symlinks is specified in "symlinkroot" mount option. This will + * ensure compatibility of this symlink stored in absolute form + * on the SMB server. + */ + if (!strstarts(symname, cifs_sb->ctx->symlinkroot)) { + /* + * If the absolute Linux symlink target path is not + * inside "symlinkroot" location then there is no way + * to convert such Linux symlink to NT-style path. + */ + cifs_dbg(VFS, + "absolute symlink '%s' cannot be converted to NT format " + "because it is outside of symlinkroot='%s'\n", + symname, cifs_sb->ctx->symlinkroot); + rc = -EINVAL; + goto out; + } + len = strlen(cifs_sb->ctx->symlinkroot); + if (cifs_sb->ctx->symlinkroot[len-1] != '/') + len++; + if (symname[len] >= 'a' && symname[len] <= 'z' && + (symname[len+1] == '/' || symname[len+1] == '\0')) { + /* + * Symlink points to Linux target /symlinkroot/x/path/... + * where 'x' is the lowercase local Windows drive. + * NT-style path for 'x' has common form \??\X:\path\... + * with uppercase local Windows drive. + */ + int common_path_len = strlen(symname+len+1)+1; + sym = kzalloc(6+common_path_len, GFP_KERNEL); + if (!sym) { + rc = -ENOMEM; + goto out; + } + memcpy(sym, "\\??\\", 4); + sym[4] = symname[len] - ('a'-'A'); + sym[5] = ':'; + memcpy(sym+6, symname+len+1, common_path_len); + } else { + /* Unhandled absolute symlink. Report an error. */ + cifs_dbg( + VFS, + "absolute symlink '%s' cannot be converted to NT format " + "because it points to unknown target\n", + symname); + rc = -EINVAL; + goto out; + } + } else { + /* + * This is request to either create an absolute symlink on + * server which expects POSIX paths or it is an request to + * create a relative symlink from the current directory. + * These paths have same format as relative SMB symlinks, + * so no conversion is needed. So just take symname as-is. + */ + sym = kstrdup(symname, GFP_KERNEL); + if (!sym) { + rc = -ENOMEM; + goto out; + } + } + + if (sep == '\\') + convert_delimiter(sym, sep); + + /* + * For absolute NT symlinks it is required to pass also leading + * backslash and to not mangle NT object prefix "\\??\\" and not to + * mangle colon in drive letter. But cifs_convert_path_to_utf16() + * removes leading backslash and replaces '?' and ':'. So temporary + * mask these characters in NT object prefix by '_' and then change + * them back. + */ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') + sym[0] = sym[1] = sym[2] = sym[5] = '_'; + path = cifs_convert_path_to_utf16(sym, cifs_sb); if (!path) { rc = -ENOMEM; goto out; } + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { + sym[0] = '\\'; + sym[1] = sym[2] = '?'; + sym[5] = ':'; + path[0] = cpu_to_le16('\\'); + path[1] = path[2] = cpu_to_le16('?'); + path[5] = cpu_to_le16(':'); + } + /* * SMB distinguish between symlink to directory and symlink to file. * They cannot be exchanged (symlink of file type which points to @@ -67,8 +192,18 @@ int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode, if (rc < 0) goto out; - plen = 2 * UniStrnlen((wchar_t *)path, REPARSE_SYM_PATH_MAX); - len = sizeof(*buf) + plen * 2; + slen = 2 * UniStrnlen((wchar_t *)path, REPARSE_SYM_PATH_MAX); + poff = 0; + plen = slen; + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { + /* + * For absolute NT symlinks skip leading "\\??\\" in PrintName as + * PrintName is user visible location in DOS/Win32 format (not in NT format). + */ + poff = 4; + plen -= 2 * poff; + } + len = sizeof(*buf) + plen + slen; buf = kzalloc(len, GFP_KERNEL); if (!buf) { rc = -ENOMEM; @@ -77,17 +212,17 @@ int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode, buf->ReparseTag = cpu_to_le32(IO_REPARSE_TAG_SYMLINK); buf->ReparseDataLength = cpu_to_le16(len - sizeof(struct reparse_data_buffer)); + buf->SubstituteNameOffset = cpu_to_le16(plen); - buf->SubstituteNameLength = cpu_to_le16(plen); - memcpy(&buf->PathBuffer[plen], path, plen); + buf->SubstituteNameLength = cpu_to_le16(slen); + memcpy(&buf->PathBuffer[plen], path, slen); + buf->PrintNameOffset = 0; buf->PrintNameLength = cpu_to_le16(plen); - memcpy(buf->PathBuffer, path, plen); + memcpy(buf->PathBuffer, path+poff, plen); + buf->Flags = cpu_to_le32(*symname != '/' ? SYMLINK_FLAG_RELATIVE : 0); - if (*sym != sep) - buf->Flags = cpu_to_le32(SYMLINK_FLAG_RELATIVE); - convert_delimiter(sym, '/'); iov.iov_base = buf; iov.iov_len = len; new = smb2_get_reparse_inode(&data, inode->i_sb, xid, @@ -98,6 +233,7 @@ int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode, else rc = PTR_ERR(new); out: + kfree(sym); kfree(path); cifs_free_open_info(&data); kfree(buf); @@ -242,8 +378,39 @@ static int detect_directory_symlink_target(struct cifs_sb_info *cifs_sb, return 0; } -static int nfs_set_reparse_buf(struct reparse_posix_data *buf, +static int create_native_socket(const unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, + const char *full_path) +{ + struct reparse_data_buffer buf = { + .ReparseTag = cpu_to_le32(IO_REPARSE_TAG_AF_UNIX), + .ReparseDataLength = cpu_to_le16(0), + }; + struct cifs_open_info_data data = { + .reparse_point = true, + .reparse = { .tag = IO_REPARSE_TAG_AF_UNIX, .buf = &buf, }, + }; + struct kvec iov = { + .iov_base = &buf, + .iov_len = sizeof(buf), + }; + struct inode *new; + int rc = 0; + + new = smb2_get_reparse_inode(&data, inode->i_sb, xid, + tcon, full_path, false, &iov, NULL); + if (!IS_ERR(new)) + d_instantiate(dentry, new); + else + rc = PTR_ERR(new); + cifs_free_open_info(&data); + return rc; +} + +static int nfs_set_reparse_buf(struct reparse_nfs_data_buffer *buf, mode_t mode, dev_t dev, + __le16 *symname_utf16, + int symname_utf16_len, struct kvec *iov) { u64 type; @@ -254,7 +421,13 @@ static int nfs_set_reparse_buf(struct reparse_posix_data *buf, switch ((type = reparse_mode_nfs_type(mode))) { case NFS_SPECFILE_BLK: case NFS_SPECFILE_CHR: - dlen = sizeof(__le64); + dlen = 2 * sizeof(__le32); + ((__le32 *)buf->DataBuffer)[0] = cpu_to_le32(MAJOR(dev)); + ((__le32 *)buf->DataBuffer)[1] = cpu_to_le32(MINOR(dev)); + break; + case NFS_SPECFILE_LNK: + dlen = symname_utf16_len; + memcpy(buf->DataBuffer, symname_utf16, symname_utf16_len); break; case NFS_SPECFILE_FIFO: case NFS_SPECFILE_SOCK: @@ -269,8 +442,6 @@ static int nfs_set_reparse_buf(struct reparse_posix_data *buf, buf->InodeType = cpu_to_le64(type); buf->ReparseDataLength = cpu_to_le16(len + dlen - sizeof(struct reparse_data_buffer)); - *(__le64 *)buf->DataBuffer = cpu_to_le64(((u64)MINOR(dev) << 32) | - MAJOR(dev)); iov->iov_base = buf; iov->iov_len = len + dlen; return 0; @@ -278,23 +449,45 @@ static int nfs_set_reparse_buf(struct reparse_posix_data *buf, static int mknod_nfs(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, - const char *full_path, umode_t mode, dev_t dev) + const char *full_path, umode_t mode, dev_t dev, + const char *symname) { + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifs_open_info_data data; - struct reparse_posix_data *p; + struct reparse_nfs_data_buffer *p = NULL; + __le16 *symname_utf16 = NULL; + int symname_utf16_len = 0; struct inode *new; struct kvec iov; __u8 buf[sizeof(*p) + sizeof(__le64)]; int rc; - p = (struct reparse_posix_data *)buf; - rc = nfs_set_reparse_buf(p, mode, dev, &iov); + if (S_ISLNK(mode)) { + symname_utf16 = cifs_strndup_to_utf16(symname, strlen(symname), + &symname_utf16_len, + cifs_sb->local_nls, + NO_MAP_UNI_RSVD); + if (!symname_utf16) { + rc = -ENOMEM; + goto out; + } + symname_utf16_len -= 2; /* symlink is without trailing wide-nul */ + p = kzalloc(sizeof(*p) + symname_utf16_len, GFP_KERNEL); + if (!p) { + rc = -ENOMEM; + goto out; + } + } else { + p = (struct reparse_nfs_data_buffer *)buf; + } + rc = nfs_set_reparse_buf(p, mode, dev, symname_utf16, symname_utf16_len, &iov); if (rc) - return rc; + goto out; data = (struct cifs_open_info_data) { .reparse_point = true, - .reparse = { .tag = IO_REPARSE_TAG_NFS, .posix = p, }, + .reparse = { .tag = IO_REPARSE_TAG_NFS, .buf = (struct reparse_data_buffer *)p, }, + .symlink_target = kstrdup(symname, GFP_KERNEL), }; new = smb2_get_reparse_inode(&data, inode->i_sb, xid, @@ -304,12 +497,25 @@ static int mknod_nfs(unsigned int xid, struct inode *inode, else rc = PTR_ERR(new); cifs_free_open_info(&data); +out: + if (S_ISLNK(mode)) { + kfree(symname_utf16); + kfree(p); + } return rc; } -static int wsl_set_reparse_buf(struct reparse_data_buffer *buf, - mode_t mode, struct kvec *iov) +static int wsl_set_reparse_buf(struct reparse_data_buffer **buf, + mode_t mode, const char *symname, + struct cifs_sb_info *cifs_sb, + struct kvec *iov) { + struct reparse_wsl_symlink_data_buffer *symlink_buf; + __le16 *symname_utf16; + int symname_utf16_len; + int symname_utf8_maxlen; + int symname_utf8_len; + size_t buf_len; u32 tag; switch ((tag = reparse_mode_wsl_tag(mode))) { @@ -317,16 +523,45 @@ static int wsl_set_reparse_buf(struct reparse_data_buffer *buf, case IO_REPARSE_TAG_LX_CHR: case IO_REPARSE_TAG_LX_FIFO: case IO_REPARSE_TAG_AF_UNIX: + buf_len = sizeof(struct reparse_data_buffer); + *buf = kzalloc(buf_len, GFP_KERNEL); + if (!*buf) + return -ENOMEM; + break; + case IO_REPARSE_TAG_LX_SYMLINK: + symname_utf16 = cifs_strndup_to_utf16(symname, strlen(symname), + &symname_utf16_len, + cifs_sb->local_nls, + NO_MAP_UNI_RSVD); + if (!symname_utf16) + return -ENOMEM; + symname_utf8_maxlen = symname_utf16_len/2*3; + symlink_buf = kzalloc(sizeof(struct reparse_wsl_symlink_data_buffer) + + symname_utf8_maxlen, GFP_KERNEL); + if (!symlink_buf) { + kfree(symname_utf16); + return -ENOMEM; + } + /* Flag 0x02000000 is unknown, but all wsl symlinks have this value */ + symlink_buf->Flags = cpu_to_le32(0x02000000); + /* PathBuffer is in UTF-8 but without trailing null-term byte */ + symname_utf8_len = utf16s_to_utf8s((wchar_t *)symname_utf16, symname_utf16_len/2, + UTF16_LITTLE_ENDIAN, + symlink_buf->PathBuffer, + symname_utf8_maxlen); + *buf = (struct reparse_data_buffer *)symlink_buf; + buf_len = sizeof(struct reparse_wsl_symlink_data_buffer) + symname_utf8_len; + kfree(symname_utf16); break; default: return -EOPNOTSUPP; } - buf->ReparseTag = cpu_to_le32(tag); - buf->Reserved = 0; - buf->ReparseDataLength = 0; - iov->iov_base = buf; - iov->iov_len = sizeof(*buf); + (*buf)->ReparseTag = cpu_to_le32(tag); + (*buf)->Reserved = 0; + (*buf)->ReparseDataLength = cpu_to_le16(buf_len - sizeof(struct reparse_data_buffer)); + iov->iov_base = *buf; + iov->iov_len = buf_len; return 0; } @@ -415,27 +650,32 @@ static int wsl_set_xattrs(struct inode *inode, umode_t _mode, static int mknod_wsl(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, - const char *full_path, umode_t mode, dev_t dev) + const char *full_path, umode_t mode, dev_t dev, + const char *symname) { + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifs_open_info_data data; - struct reparse_data_buffer buf; + struct reparse_data_buffer *buf; struct smb2_create_ea_ctx *cc; struct inode *new; unsigned int len; struct kvec reparse_iov, xattr_iov; int rc; - rc = wsl_set_reparse_buf(&buf, mode, &reparse_iov); + rc = wsl_set_reparse_buf(&buf, mode, symname, cifs_sb, &reparse_iov); if (rc) return rc; rc = wsl_set_xattrs(inode, mode, dev, &xattr_iov); - if (rc) + if (rc) { + kfree(buf); return rc; + } data = (struct cifs_open_info_data) { .reparse_point = true, - .reparse = { .tag = le32_to_cpu(buf.ReparseTag), .buf = &buf, }, + .reparse = { .tag = le32_to_cpu(buf->ReparseTag), .buf = buf, }, + .symlink_target = kstrdup(symname, GFP_KERNEL), }; cc = xattr_iov.iov_base; @@ -452,6 +692,7 @@ static int mknod_wsl(unsigned int xid, struct inode *inode, rc = PTR_ERR(new); cifs_free_open_info(&data); kfree(xattr_iov.iov_base); + kfree(buf); return rc; } @@ -460,21 +701,22 @@ int smb2_mknod_reparse(unsigned int xid, struct inode *inode, const char *full_path, umode_t mode, dev_t dev) { struct smb3_fs_context *ctx = CIFS_SB(inode->i_sb)->ctx; - int rc = -EOPNOTSUPP; + + if (S_ISSOCK(mode) && !ctx->nonativesocket && ctx->reparse_type != CIFS_REPARSE_TYPE_NONE) + return create_native_socket(xid, inode, dentry, tcon, full_path); switch (ctx->reparse_type) { case CIFS_REPARSE_TYPE_NFS: - rc = mknod_nfs(xid, inode, dentry, tcon, full_path, mode, dev); - break; + return mknod_nfs(xid, inode, dentry, tcon, full_path, mode, dev, NULL); case CIFS_REPARSE_TYPE_WSL: - rc = mknod_wsl(xid, inode, dentry, tcon, full_path, mode, dev); - break; + return mknod_wsl(xid, inode, dentry, tcon, full_path, mode, dev, NULL); + default: + return -EOPNOTSUPP; } - return rc; } /* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */ -static int parse_reparse_posix(struct reparse_posix_data *buf, +static int parse_reparse_nfs(struct reparse_nfs_data_buffer *buf, struct cifs_sb_info *cifs_sb, struct cifs_open_info_data *data) { @@ -536,43 +778,160 @@ static int parse_reparse_posix(struct reparse_posix_data *buf, } int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, - bool unicode, bool relative, + bool relative, const char *full_path, struct cifs_sb_info *cifs_sb) { char sep = CIFS_DIR_SEP(cifs_sb); char *linux_target = NULL; char *smb_target = NULL; + int symlinkroot_len; + int abs_path_len; + char *abs_path; int levels; int rc; int i; - /* Check that length it valid for unicode/non-unicode mode */ - if (!len || (unicode && (len % 2))) { + /* Check that length it valid */ + if (!len || (len % 2)) { cifs_dbg(VFS, "srv returned malformed symlink buffer\n"); rc = -EIO; goto out; } /* - * Check that buffer does not contain UTF-16 null codepoint in unicode - * mode or null byte in non-unicode mode because Linux cannot process - * symlink with null byte. + * Check that buffer does not contain UTF-16 null codepoint + * because Linux cannot process symlink with null byte. */ - if ((unicode && UniStrnlen((wchar_t *)buf, len/2) != len/2) || - (!unicode && strnlen(buf, len) != len)) { + if (UniStrnlen((wchar_t *)buf, len/2) != len/2) { cifs_dbg(VFS, "srv returned null byte in native symlink target location\n"); rc = -EIO; goto out; } - smb_target = cifs_strndup_from_utf16(buf, len, unicode, cifs_sb->local_nls); + smb_target = cifs_strndup_from_utf16(buf, len, true, cifs_sb->local_nls); if (!smb_target) { rc = -ENOMEM; goto out; } - if (smb_target[0] == sep && relative) { + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && !relative) { + /* + * This is an absolute symlink from the server which does not + * support POSIX paths, so the symlink is in NT-style path. + * So convert it to absolute Linux symlink target path. Root of + * the NT-style path for symlinks is specified in "symlinkroot" + * mount option. + * + * Root of the DOS and Win32 paths is at NT path \??\ + * It means that DOS/Win32 path C:\folder\file.txt is + * NT path \??\C:\folder\file.txt + * + * NT systems have some well-known object symlinks in their NT + * hierarchy, which is needed to take into account when resolving + * other symlinks. Most commonly used symlink paths are: + * \?? -> \GLOBAL?? + * \DosDevices -> \?? + * \GLOBAL??\GLOBALROOT -> \ + * \GLOBAL??\Global -> \GLOBAL?? + * \GLOBAL??\NUL -> \Device\Null + * \GLOBAL??\UNC -> \Device\Mup + * \GLOBAL??\PhysicalDrive0 -> \Device\Harddisk0\DR0 (for each harddisk) + * \GLOBAL??\A: -> \Device\Floppy0 (if A: is the first floppy) + * \GLOBAL??\C: -> \Device\HarddiskVolume1 (if C: is the first harddisk) + * \GLOBAL??\D: -> \Device\CdRom0 (if D: is first cdrom) + * \SystemRoot -> \Device\Harddisk0\Partition1\WINDOWS (or where is NT system installed) + * \Volume{...} -> \Device\HarddiskVolume1 (where ... is system generated guid) + * + * In most common cases, absolute NT symlinks points to path on + * DOS/Win32 drive letter, system-specific Volume or on UNC share. + * Here are few examples of commonly used absolute NT symlinks + * created by mklink.exe tool: + * \??\C:\folder\file.txt + * \??\\C:\folder\file.txt + * \??\UNC\server\share\file.txt + * \??\\UNC\server\share\file.txt + * \??\Volume{b75e2c83-0000-0000-0000-602f00000000}\folder\file.txt + * + * It means that the most common path prefix \??\ is also NT path + * symlink (to \GLOBAL??). It is less common that second path + * separator is double backslash, but it is valid. + * + * Volume guid is randomly generated by the target system and so + * only the target system knows the mapping between guid and the + * hardisk number. Over SMB it is not possible to resolve this + * mapping, therefore symlinks pointing to target location of + * volume guids are totally unusable over SMB. + * + * For now parse only symlink paths available for DOS and Win32. + * Those are paths with \??\ prefix or paths which points to \??\ + * via other NT symlink (\DosDevices\, \GLOBAL??\, ...). + */ + abs_path = smb_target; +globalroot: + if (strstarts(abs_path, "\\??\\")) + abs_path += sizeof("\\??\\")-1; + else if (strstarts(abs_path, "\\DosDevices\\")) + abs_path += sizeof("\\DosDevices\\")-1; + else if (strstarts(abs_path, "\\GLOBAL??\\")) + abs_path += sizeof("\\GLOBAL??\\")-1; + else { + /* Unhandled absolute symlink, points outside of DOS/Win32 */ + cifs_dbg(VFS, + "absolute symlink '%s' cannot be converted from NT format " + "because points to unknown target\n", + smb_target); + rc = -EIO; + goto out; + } + + /* Sometimes path separator after \?? is double backslash */ + if (abs_path[0] == '\\') + abs_path++; + + while (strstarts(abs_path, "Global\\")) + abs_path += sizeof("Global\\")-1; + + if (strstarts(abs_path, "GLOBALROOT\\")) { + /* Label globalroot requires path with leading '\\', so do not trim '\\' */ + abs_path += sizeof("GLOBALROOT")-1; + goto globalroot; + } + + /* For now parse only paths to drive letters */ + if (((abs_path[0] >= 'A' && abs_path[0] <= 'Z') || + (abs_path[0] >= 'a' && abs_path[0] <= 'z')) && + abs_path[1] == ':' && + (abs_path[2] == '\\' || abs_path[2] == '\0')) { + /* Convert drive letter to lowercase and drop colon */ + char drive_letter = abs_path[0]; + if (drive_letter >= 'A' && drive_letter <= 'Z') + drive_letter += 'a'-'A'; + abs_path++; + abs_path[0] = drive_letter; + } else { + /* Unhandled absolute symlink. Report an error. */ + cifs_dbg(VFS, + "absolute symlink '%s' cannot be converted from NT format " + "because points to unknown target\n", + smb_target); + rc = -EIO; + goto out; + } + + abs_path_len = strlen(abs_path)+1; + symlinkroot_len = strlen(cifs_sb->ctx->symlinkroot); + if (cifs_sb->ctx->symlinkroot[symlinkroot_len-1] == '/') + symlinkroot_len--; + linux_target = kmalloc(symlinkroot_len + 1 + abs_path_len, GFP_KERNEL); + if (!linux_target) { + rc = -ENOMEM; + goto out; + } + memcpy(linux_target, cifs_sb->ctx->symlinkroot, symlinkroot_len); + linux_target[symlinkroot_len] = '/'; + memcpy(linux_target + symlinkroot_len + 1, abs_path, abs_path_len); + } else if (smb_target[0] == sep && relative) { /* * This is a relative SMB symlink from the top of the share, * which is the top level directory of the Linux mount point. @@ -601,6 +960,12 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, } memcpy(linux_target + levels*3, smb_target+1, smb_target_len); /* +1 to skip leading sep */ } else { + /* + * This is either an absolute symlink in POSIX-style format + * or relative SMB symlink from the current directory. + * These paths have same format as Linux symlinks, so no + * conversion is needed. + */ linux_target = smb_target; smb_target = NULL; } @@ -620,8 +985,8 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, return rc; } -static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym, - u32 plen, bool unicode, +static int parse_reparse_native_symlink(struct reparse_symlink_data_buffer *sym, + u32 plen, struct cifs_sb_info *cifs_sb, const char *full_path, struct cifs_open_info_data *data) @@ -641,7 +1006,6 @@ static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym, return smb2_parse_native_symlink(&data->symlink_target, sym->PathBuffer + offs, len, - unicode, le32_to_cpu(sym->Flags) & SYMLINK_FLAG_RELATIVE, full_path, cifs_sb); @@ -696,7 +1060,7 @@ static int parse_reparse_wsl_symlink(struct reparse_wsl_symlink_data_buffer *buf int parse_reparse_point(struct reparse_data_buffer *buf, u32 plen, struct cifs_sb_info *cifs_sb, const char *full_path, - bool unicode, struct cifs_open_info_data *data) + struct cifs_open_info_data *data) { struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); @@ -705,12 +1069,12 @@ int parse_reparse_point(struct reparse_data_buffer *buf, /* See MS-FSCC 2.1.2 */ switch (le32_to_cpu(buf->ReparseTag)) { case IO_REPARSE_TAG_NFS: - return parse_reparse_posix((struct reparse_posix_data *)buf, + return parse_reparse_nfs((struct reparse_nfs_data_buffer *)buf, cifs_sb, data); case IO_REPARSE_TAG_SYMLINK: - return parse_reparse_symlink( + return parse_reparse_native_symlink( (struct reparse_symlink_data_buffer *)buf, - plen, unicode, cifs_sb, full_path, data); + plen, cifs_sb, full_path, data); case IO_REPARSE_TAG_LX_SYMLINK: return parse_reparse_wsl_symlink( (struct reparse_wsl_symlink_data_buffer *)buf, @@ -744,14 +1108,15 @@ int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, buf = (struct reparse_data_buffer *)((u8 *)io + le32_to_cpu(io->OutputOffset)); - return parse_reparse_point(buf, plen, cifs_sb, full_path, true, data); + return parse_reparse_point(buf, plen, cifs_sb, full_path, data); } -static void wsl_to_fattr(struct cifs_open_info_data *data, +static bool wsl_to_fattr(struct cifs_open_info_data *data, struct cifs_sb_info *cifs_sb, u32 tag, struct cifs_fattr *fattr) { struct smb2_file_full_ea_info *ea; + bool have_xattr_dev = false; u32 next = 0; switch (tag) { @@ -794,21 +1159,31 @@ static void wsl_to_fattr(struct cifs_open_info_data *data, fattr->cf_uid = wsl_make_kuid(cifs_sb, v); else if (!strncmp(name, SMB2_WSL_XATTR_GID, nlen)) fattr->cf_gid = wsl_make_kgid(cifs_sb, v); - else if (!strncmp(name, SMB2_WSL_XATTR_MODE, nlen)) + else if (!strncmp(name, SMB2_WSL_XATTR_MODE, nlen)) { + /* File type in reparse point tag and in xattr mode must match. */ + if (S_DT(fattr->cf_mode) != S_DT(le32_to_cpu(*(__le32 *)v))) + return false; fattr->cf_mode = (umode_t)le32_to_cpu(*(__le32 *)v); - else if (!strncmp(name, SMB2_WSL_XATTR_DEV, nlen)) + } else if (!strncmp(name, SMB2_WSL_XATTR_DEV, nlen)) { fattr->cf_rdev = reparse_mkdev(v); + have_xattr_dev = true; + } } while (next); out: + + /* Major and minor numbers for char and block devices are mandatory. */ + if (!have_xattr_dev && (tag == IO_REPARSE_TAG_LX_CHR || tag == IO_REPARSE_TAG_LX_BLK)) + return false; + fattr->cf_dtype = S_DT(fattr->cf_mode); + return true; } static bool posix_reparse_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, struct cifs_open_info_data *data) { - struct reparse_posix_data *buf = data->reparse.posix; - + struct reparse_nfs_data_buffer *buf = (struct reparse_nfs_data_buffer *)data->reparse.buf; if (buf == NULL) return true; @@ -874,7 +1249,9 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, case IO_REPARSE_TAG_AF_UNIX: case IO_REPARSE_TAG_LX_CHR: case IO_REPARSE_TAG_LX_BLK: - wsl_to_fattr(data, cifs_sb, tag, fattr); + ok = wsl_to_fattr(data, cifs_sb, tag, fattr); + if (!ok) + return false; break; case IO_REPARSE_TAG_NFS: ok = posix_reparse_to_fattr(cifs_sb, fattr, data); diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h index ff05b0e75c928..5a753fec7e2c2 100644 --- a/fs/smb/client/reparse.h +++ b/fs/smb/client/reparse.h @@ -50,6 +50,7 @@ static inline kgid_t wsl_make_kgid(struct cifs_sb_info *cifs_sb, static inline u64 reparse_mode_nfs_type(mode_t mode) { switch (mode & S_IFMT) { + case S_IFLNK: return NFS_SPECFILE_LNK; case S_IFBLK: return NFS_SPECFILE_BLK; case S_IFCHR: return NFS_SPECFILE_CHR; case S_IFIFO: return NFS_SPECFILE_FIFO; @@ -61,6 +62,7 @@ static inline u64 reparse_mode_nfs_type(mode_t mode) static inline u32 reparse_mode_wsl_tag(mode_t mode) { switch (mode & S_IFMT) { + case S_IFLNK: return IO_REPARSE_TAG_LX_SYMLINK; case S_IFBLK: return IO_REPARSE_TAG_LX_BLK; case S_IFCHR: return IO_REPARSE_TAG_LX_CHR; case S_IFIFO: return IO_REPARSE_TAG_LX_FIFO; diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 91d4d409cb1dc..faa80e7d54a6e 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -1235,12 +1235,13 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested) switch (requested) { case Kerberos: case RawNTLMSSP: + case IAKerb: return requested; case Unspecified: if (server->sec_ntlmssp && (global_secflags & CIFSSEC_MAY_NTLMSSP)) return RawNTLMSSP; - if ((server->sec_kerberos || server->sec_mskerberos) && + if ((server->sec_kerberos || server->sec_mskerberos || server->sec_iakerb) && (global_secflags & CIFSSEC_MAY_KRB5)) return Kerberos; fallthrough; diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 749a83cd0deb0..9756b876a75e1 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -551,7 +551,7 @@ static int cifs_query_path_info(const unsigned int xid, int rc; FILE_ALL_INFO fi = {}; - data->symlink = false; + data->reparse_point = false; data->adjust_tz = false; /* could do find first instead but this returns more info */ @@ -569,32 +569,8 @@ static int cifs_query_path_info(const unsigned int xid, } if (!rc) { - int tmprc; - int oplock = 0; - struct cifs_fid fid; - struct cifs_open_parms oparms; - move_cifs_info_to_smb2(&data->fi, &fi); - - if (!(le32_to_cpu(fi.Attributes) & ATTR_REPARSE)) - return 0; - - oparms = (struct cifs_open_parms) { - .tcon = tcon, - .cifs_sb = cifs_sb, - .desired_access = FILE_READ_ATTRIBUTES, - .create_options = cifs_create_options(cifs_sb, 0), - .disposition = FILE_OPEN, - .path = full_path, - .fid = &fid, - }; - - /* Need to check if this is a symbolic link or not */ - tmprc = CIFS_open(xid, &oparms, &oplock, NULL); - if (tmprc == -EOPNOTSUPP) - data->symlink = true; - else if (tmprc == 0) - CIFSSMBClose(xid, tcon, fid.netfid); + data->reparse_point = le32_to_cpu(fi.Attributes) & ATTR_REPARSE; } return rc; @@ -1010,7 +986,7 @@ static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb, buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol + le32_to_cpu(io->DataOffset)); - return parse_reparse_point(buf, plen, cifs_sb, full_path, true, data); + return parse_reparse_point(buf, plen, cifs_sb, full_path, data); } static bool diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c index 9ec44eab8dbca..d609a20fb98a9 100644 --- a/fs/smb/client/smb2file.c +++ b/fs/smb/client/smb2file.c @@ -63,6 +63,52 @@ static struct smb2_symlink_err_rsp *symlink_data(const struct kvec *iov) return sym; } +int smb2_fix_symlink_target_type(char **target, bool directory, struct cifs_sb_info *cifs_sb) +{ + char *buf; + int len; + + /* + * POSIX server does not distinguish between symlinks to file and + * symlink directory. So nothing is needed to fix on the client side. + */ + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) + return 0; + + if (!*target) + return -EIO; + + len = strlen(*target); + if (!len) + return -EIO; + + /* + * If this is directory symlink and it does not have trailing slash then + * append it. Trailing slash simulates Windows/SMB behavior which do not + * allow resolving directory symlink to file. + */ + if (directory && (*target)[len-1] != '/') { + buf = krealloc(*target, len+2, GFP_KERNEL); + if (!buf) + return -ENOMEM; + buf[len] = '/'; + buf[len+1] = '\0'; + *target = buf; + len++; + } + + /* + * If this is a file (non-directory) symlink and it points to path name + * with trailing slash then this is an invalid symlink because file name + * cannot contain slash character. File name with slash is invalid on + * both Windows and Linux systems. So return an error for such symlink. + */ + if (!directory && (*target)[len-1] == '/') + return -EIO; + + return 0; +} + int smb2_parse_symlink_response(struct cifs_sb_info *cifs_sb, const struct kvec *iov, const char *full_path, char **path) { @@ -89,7 +135,6 @@ int smb2_parse_symlink_response(struct cifs_sb_info *cifs_sb, const struct kvec return smb2_parse_native_symlink(path, (char *)sym->PathBuffer + sub_offs, sub_len, - true, le32_to_cpu(sym->Flags) & SYMLINK_FLAG_RELATIVE, full_path, cifs_sb); @@ -133,6 +178,11 @@ int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 NULL, NULL, NULL); oparms->create_options &= ~OPEN_REPARSE_POINT; } + if (!rc) { + bool directory = le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY; + rc = smb2_fix_symlink_target_type(&data->symlink_target, + directory, oparms->cifs_sb); + } } } diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index c97f14757c27c..5dfb30b0a852c 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -1010,6 +1010,11 @@ int smb2_query_path_info(const unsigned int xid, else rc = -EOPNOTSUPP; } + + if (data->reparse.tag == IO_REPARSE_TAG_SYMLINK && !rc) { + bool directory = le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY; + rc = smb2_fix_symlink_target_type(&data->symlink_target, directory, cifs_sb); + } break; case -EREMOTE: break; diff --git a/fs/smb/client/smb2maperror.c b/fs/smb/client/smb2maperror.c index b05313acf9b2b..12c2b868789fd 100644 --- a/fs/smb/client/smb2maperror.c +++ b/fs/smb/client/smb2maperror.c @@ -380,7 +380,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_NO_LOGON_SERVERS, -EIO, "STATUS_NO_LOGON_SERVERS"}, {STATUS_NO_SUCH_LOGON_SESSION, -EIO, "STATUS_NO_SUCH_LOGON_SESSION"}, {STATUS_NO_SUCH_PRIVILEGE, -EIO, "STATUS_NO_SUCH_PRIVILEGE"}, - {STATUS_PRIVILEGE_NOT_HELD, -EIO, "STATUS_PRIVILEGE_NOT_HELD"}, + {STATUS_PRIVILEGE_NOT_HELD, -EPERM, "STATUS_PRIVILEGE_NOT_HELD"}, {STATUS_INVALID_ACCOUNT_NAME, -EIO, "STATUS_INVALID_ACCOUNT_NAME"}, {STATUS_USER_EXISTS, -EIO, "STATUS_USER_EXISTS"}, {STATUS_NO_SUCH_USER, -EIO, "STATUS_NO_SUCH_USER"}, @@ -871,7 +871,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_VALIDATE_CONTINUE, -EIO, "STATUS_VALIDATE_CONTINUE"}, {STATUS_NO_MATCH, -EIO, "STATUS_NO_MATCH"}, {STATUS_NO_MORE_MATCHES, -EIO, "STATUS_NO_MORE_MATCHES"}, - {STATUS_NOT_A_REPARSE_POINT, -EIO, "STATUS_NOT_A_REPARSE_POINT"}, + {STATUS_NOT_A_REPARSE_POINT, -ENODATA, "STATUS_NOT_A_REPARSE_POINT"}, {STATUS_IO_REPARSE_TAG_INVALID, -EIO, "STATUS_IO_REPARSE_TAG_INVALID"}, {STATUS_IO_REPARSE_TAG_MISMATCH, -EIO, "STATUS_IO_REPARSE_TAG_MISMATCH"}, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index d640dcabc305e..77309217dab45 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -5077,6 +5077,7 @@ int __cifs_sfu_make_node(unsigned int xid, struct inode *inode, { struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + struct cifs_open_info_data idata; struct cifs_io_parms io_parms = {}; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifs_fid fid; @@ -5146,10 +5147,20 @@ int __cifs_sfu_make_node(unsigned int xid, struct inode *inode, CREATE_OPTION_SPECIAL, ACL_NO_MODE); oparms.fid = &fid; - rc = server->ops->open(xid, &oparms, &oplock, NULL); + rc = server->ops->open(xid, &oparms, &oplock, &idata); if (rc) goto out; + /* + * Check if the server honored ATTR_SYSTEM flag by CREATE_OPTION_SPECIAL + * option. If not then server does not support ATTR_SYSTEM and newly + * created file is not SFU compatible, which means that the call failed. + */ + if (!(le32_to_cpu(idata.fi.Attributes) & ATTR_SYSTEM)) { + rc = -EOPNOTSUPP; + goto out_close; + } + if (type_len + data_len > 0) { io_parms.pid = current->tgid; io_parms.tcon = tcon; @@ -5164,8 +5175,18 @@ int __cifs_sfu_make_node(unsigned int xid, struct inode *inode, iov, ARRAY_SIZE(iov)-1); } +out_close: server->ops->close(xid, tcon, &fid); + /* + * If CREATE was successful but either setting ATTR_SYSTEM failed or + * writing type/data information failed then remove the intermediate + * object created by CREATE. Otherwise intermediate empty object stay + * on the server. + */ + if (rc) + server->ops->unlink(xid, tcon, full_path, cifs_sb, NULL); + out: kfree(symname_utf16); return rc; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 9f54596a6866c..40ad9e79437a4 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -1429,7 +1429,7 @@ smb2_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested) if (server->sec_ntlmssp && (global_secflags & CIFSSEC_MAY_NTLMSSP)) return RawNTLMSSP; - if ((server->sec_kerberos || server->sec_mskerberos) && + if ((server->sec_kerberos || server->sec_mskerberos || server->sec_iakerb) && (global_secflags & CIFSSEC_MAY_KRB5)) return Kerberos; fallthrough; diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 09349fa8da039..2336dfb23f363 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -111,8 +111,9 @@ extern int smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const unsigned char *path, char *pbuf, unsigned int *pbytes_read); +int smb2_fix_symlink_target_type(char **target, bool directory, struct cifs_sb_info *cifs_sb); int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, - bool unicode, bool relative, + bool relative, const char *full_path, struct cifs_sb_info *cifs_sb); int smb2_parse_symlink_response(struct cifs_sb_info *cifs_sb, diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 3c7c706c797d2..3336df2ea5d4a 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1550,7 +1550,19 @@ struct reparse_symlink_data_buffer { __u8 PathBuffer[]; /* Variable Length */ } __packed; -/* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */ +/* For IO_REPARSE_TAG_NFS - see MS-FSCC 2.1.2.6 */ +#define NFS_SPECFILE_LNK 0x00000000014B4E4C +#define NFS_SPECFILE_CHR 0x0000000000524843 +#define NFS_SPECFILE_BLK 0x00000000004B4C42 +#define NFS_SPECFILE_FIFO 0x000000004F464946 +#define NFS_SPECFILE_SOCK 0x000000004B434F53 +struct reparse_nfs_data_buffer { + __le32 ReparseTag; + __le16 ReparseDataLength; + __u16 Reserved; + __le64 InodeType; /* NFS_SPECFILE_* */ + __u8 DataBuffer[]; +} __packed; /* For IO_REPARSE_TAG_LX_SYMLINK */ struct reparse_wsl_symlink_data_buffer { diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 5cc69beaa62ec..b01f382ce8db0 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -863,7 +863,6 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) out: vfree(buf); - return; } void ubifs_dump_znode(const struct ubifs_info *c, @@ -946,16 +945,20 @@ void ubifs_dump_tnc(struct ubifs_info *c) pr_err("\n"); pr_err("(pid %d) start dumping TNC tree\n", current->pid); - znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); - level = znode->level; - pr_err("== Level %d ==\n", level); - while (znode) { - if (level != znode->level) { - level = znode->level; - pr_err("== Level %d ==\n", level); + if (c->zroot.znode) { + znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); + level = znode->level; + pr_err("== Level %d ==\n", level); + while (znode) { + if (level != znode->level) { + level = znode->level; + pr_err("== Level %d ==\n", level); + } + ubifs_dump_znode(c, znode); + znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); } - ubifs_dump_znode(c, znode); - znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); + } else { + pr_err("empty TNC tree in memory\n"); } pr_err("(pid %d) finish dumping TNC tree\n", current->pid); } diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index aa8837e6247cf..f2cb214581fd9 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -1932,7 +1932,6 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); out: vfree(buf); - return; } /** diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 40ad22fb808b9..0ef19f1469ec9 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3563,12 +3563,12 @@ xfs_bmap_btalloc_at_eof( int error; /* - * If there are already extents in the file, try an exact EOF block - * allocation to extend the file as a contiguous extent. If that fails, - * or it's the first allocation in a file, just try for a stripe aligned - * allocation. + * If there are already extents in the file, and xfs_bmap_adjacent() has + * given a better blkno, try an exact EOF block allocation to extend the + * file as a contiguous extent. If that fails, or it's the first + * allocation in a file, just try for a stripe aligned allocation. */ - if (ap->offset) { + if (ap->eof) { xfs_extlen_t nextminlen = 0; /* @@ -3736,7 +3736,8 @@ xfs_bmap_btalloc_best_length( int error; ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino); - xfs_bmap_adjacent(ap); + if (!xfs_bmap_adjacent(ap)) + ap->eof = false; /* * Search for an allocation group with a single extent large enough for diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d1d4a0a22e137..15bb790359f81 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache; * * xfs_buf_rele: * b_lock - * pag_buf_lock - * lru_lock + * lru_lock * * xfs_buftarg_drain_rele * lru_lock @@ -220,23 +219,25 @@ _xfs_buf_alloc( */ flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD); - spin_lock_init(&bp->b_lock); + /* + * A new buffer is held and locked by the owner. This ensures that the + * buffer is owned by the caller and racing RCU lookups right after + * inserting into the hash table are safe (and will have to wait for + * the unlock to do anything non-trivial). + */ bp->b_hold = 1; + sema_init(&bp->b_sema, 0); /* held, no waiters */ + + spin_lock_init(&bp->b_lock); atomic_set(&bp->b_lru_ref, 1); init_completion(&bp->b_iowait); INIT_LIST_HEAD(&bp->b_lru); INIT_LIST_HEAD(&bp->b_list); INIT_LIST_HEAD(&bp->b_li_list); - sema_init(&bp->b_sema, 0); /* held, no waiters */ bp->b_target = target; bp->b_mount = target->bt_mount; bp->b_flags = flags; - /* - * Set length and io_length to the same value initially. - * I/O routines should use io_length, which will be the same in - * most cases but may be reset (e.g. XFS recovery). - */ error = xfs_buf_get_maps(bp, nmaps); if (error) { kmem_cache_free(xfs_buf_cache, bp); @@ -502,7 +503,6 @@ int xfs_buf_cache_init( struct xfs_buf_cache *bch) { - spin_lock_init(&bch->bc_lock); return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params); } @@ -652,17 +652,20 @@ xfs_buf_find_insert( if (error) goto out_free_buf; - spin_lock(&bch->bc_lock); + /* The new buffer keeps the perag reference until it is freed. */ + new_bp->b_pag = pag; + + rcu_read_lock(); bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash, &new_bp->b_rhash_head, xfs_buf_hash_params); if (IS_ERR(bp)) { + rcu_read_unlock(); error = PTR_ERR(bp); - spin_unlock(&bch->bc_lock); goto out_free_buf; } if (bp && xfs_buf_try_hold(bp)) { /* found an existing buffer */ - spin_unlock(&bch->bc_lock); + rcu_read_unlock(); error = xfs_buf_find_lock(bp, flags); if (error) xfs_buf_rele(bp); @@ -670,10 +673,8 @@ xfs_buf_find_insert( *bpp = bp; goto out_free_buf; } + rcu_read_unlock(); - /* The new buffer keeps the perag reference until it is freed. */ - new_bp->b_pag = pag; - spin_unlock(&bch->bc_lock); *bpp = new_bp; return 0; @@ -1090,7 +1091,6 @@ xfs_buf_rele_cached( } /* we are asked to drop the last reference */ - spin_lock(&bch->bc_lock); __xfs_buf_ioacct_dec(bp); if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { /* @@ -1102,7 +1102,6 @@ xfs_buf_rele_cached( bp->b_state &= ~XFS_BSTATE_DISPOSE; else bp->b_hold--; - spin_unlock(&bch->bc_lock); } else { bp->b_hold--; /* @@ -1120,7 +1119,6 @@ xfs_buf_rele_cached( ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head, xfs_buf_hash_params); - spin_unlock(&bch->bc_lock); if (pag) xfs_perag_put(pag); freebuf = true; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 7e73663c5d4a5..3b4ed42e11c01 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t; #define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ struct xfs_buf_cache { - spinlock_t bc_lock; struct rhashtable bc_hash; }; diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index f340a2015c4c7..0b41bdfecdfbc 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -329,22 +329,6 @@ xfs_exchrange_mappings( * successfully but before locks are dropped. */ -/* Verify that we have security clearance to perform this operation. */ -static int -xfs_exchange_range_verify_area( - struct xfs_exchrange *fxr) -{ - int ret; - - ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length, - true); - if (ret) - return ret; - - return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length, - true); -} - /* * Performs necessary checks before doing a range exchange, having stabilized * mutable inode attributes via i_rwsem. @@ -355,11 +339,13 @@ xfs_exchange_range_checks( unsigned int alloc_unit) { struct inode *inode1 = file_inode(fxr->file1); + loff_t size1 = i_size_read(inode1); struct inode *inode2 = file_inode(fxr->file2); + loff_t size2 = i_size_read(inode2); uint64_t allocmask = alloc_unit - 1; int64_t test_len; uint64_t blen; - loff_t size1, size2, tmp; + loff_t tmp; int error; /* Don't touch certain kinds of inodes */ @@ -368,24 +354,25 @@ xfs_exchange_range_checks( if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2)) return -ETXTBSY; - size1 = i_size_read(inode1); - size2 = i_size_read(inode2); - /* Ranges cannot start after EOF. */ if (fxr->file1_offset > size1 || fxr->file2_offset > size2) return -EINVAL; - /* - * If the caller said to exchange to EOF, we set the length of the - * request large enough to cover everything to the end of both files. - */ if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) { + /* + * If the caller said to exchange to EOF, we set the length of + * the request large enough to cover everything to the end of + * both files. + */ fxr->length = max_t(int64_t, size1 - fxr->file1_offset, size2 - fxr->file2_offset); - - error = xfs_exchange_range_verify_area(fxr); - if (error) - return error; + } else { + /* + * Otherwise we require both ranges to end within EOF. + */ + if (fxr->file1_offset + fxr->length > size1 || + fxr->file2_offset + fxr->length > size2) + return -EINVAL; } /* @@ -401,15 +388,6 @@ xfs_exchange_range_checks( check_add_overflow(fxr->file2_offset, fxr->length, &tmp)) return -EINVAL; - /* - * We require both ranges to end within EOF, unless we're exchanging - * to EOF. - */ - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) && - (fxr->file1_offset + fxr->length > size1 || - fxr->file2_offset + fxr->length > size2)) - return -EINVAL; - /* * Make sure we don't hit any file size limits. If we hit any size * limits such that test_length was adjusted, we abort the whole @@ -747,6 +725,7 @@ xfs_exchange_range( { struct inode *inode1 = file_inode(fxr->file1); struct inode *inode2 = file_inode(fxr->file2); + loff_t check_len = fxr->length; int ret; BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS & @@ -779,14 +758,18 @@ xfs_exchange_range( return -EBADF; /* - * If we're not exchanging to EOF, we can check the areas before - * stabilizing both files' i_size. + * If we're exchanging to EOF we can't calculate the length until taking + * the iolock. Pass a 0 length to remap_verify_area similar to the + * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well. */ - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) { - ret = xfs_exchange_range_verify_area(fxr); - if (ret) - return ret; - } + if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) + check_len = 0; + ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true); + if (ret) + return ret; + ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true); + if (ret) + return ret; /* Update cmtime if the fd/inode don't forbid it. */ if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1)) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c95fe1b1de4e6..b1f9f156ec888 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1404,8 +1404,11 @@ xfs_inactive( goto out; /* Try to clean out the cow blocks if there are any. */ - if (xfs_inode_has_cow_data(ip)) - xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (xfs_inode_has_cow_data(ip)) { + error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (error) + goto out; + } if (VFS_I(ip)->i_nlink != 0) { /* diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 50fa3ef89f6c9..d61460309a783 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -976,10 +976,8 @@ xfs_dax_write_iomap_end( if (!xfs_is_cow_inode(ip)) return 0; - if (!written) { - xfs_reflink_cancel_cow_range(ip, pos, length, true); - return 0; - } + if (!written) + return xfs_reflink_cancel_cow_range(ip, pos, length, true); return xfs_reflink_end_cow(ip, pos, written); } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a0a9007cc1e36..9ebb53f031cdb 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -900,8 +900,22 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset); -void blk_mq_freeze_queue(struct request_queue *q); -void blk_mq_unfreeze_queue(struct request_queue *q); +void blk_mq_freeze_queue_nomemsave(struct request_queue *q); +void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q); +static inline unsigned int __must_check +blk_mq_freeze_queue(struct request_queue *q) +{ + unsigned int memflags = memalloc_noio_save(); + + blk_mq_freeze_queue_nomemsave(q); + return memflags; +} +static inline void +blk_mq_unfreeze_queue(struct request_queue *q, unsigned int memflags) +{ + blk_mq_unfreeze_queue_nomemrestore(q); + memalloc_noio_restore(memflags); +} void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 76f0a4e7c2e5d..248416ecd01c9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -561,7 +561,6 @@ struct request_queue { struct list_head flush_list; struct mutex sysfs_lock; - struct mutex sysfs_dir_lock; struct mutex limits_lock; /* @@ -605,8 +604,6 @@ struct request_queue { * Serializes all debugfs metadata operations using the above dentries. */ struct mutex debugfs_mutex; - - bool mq_sysfs_init_done; }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 2d7d86f0290d9..c7f2c63b3bc3f 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -504,20 +504,6 @@ struct ceph_mds_request_head_legacy { #define CEPH_MDS_REQUEST_HEAD_VERSION 3 -struct ceph_mds_request_head_old { - __le16 version; /* struct version */ - __le64 oldest_client_tid; - __le32 mdsmap_epoch; /* on client */ - __le32 flags; /* CEPH_MDS_FLAG_* */ - __u8 num_retry, num_fwd; /* count retry, fwd attempts */ - __le16 num_releases; /* # include cap/lease release records */ - __le32 op; /* mds op code */ - __le32 caller_uid, caller_gid; - __le64 ino; /* use this ino for openc, mkdir, mknod, - etc. (if replaying) */ - union ceph_mds_request_args_ext args; -} __attribute__ ((packed)); - struct ceph_mds_request_head { __le16 version; /* struct version */ __le64 oldest_client_tid; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index bdcec17324452..6a0a8f1c7c903 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -77,6 +77,7 @@ extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 9a1a308577632..4afb603656756 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -57,6 +57,7 @@ struct qstr { }; #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } +#define QSTR(n) (struct qstr)QSTR_INIT(n, strlen(n)) extern const struct qstr empty_name; extern const struct qstr slash_name; diff --git a/include/linux/export.h b/include/linux/export.h index 2633df4d31e62..a8c23d945634b 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -52,9 +52,24 @@ #else +#ifdef CONFIG_GENDWARFKSYMS +/* + * With CONFIG_GENDWARFKSYMS, ensure the compiler emits debugging + * information for all exported symbols, including those defined in + * different TUs, by adding a __gendwarfksyms_ptr_ pointer + * that's discarded during the final link. + */ +#define __GENDWARFKSYMS_EXPORT(sym) \ + static typeof(sym) *__gendwarfksyms_ptr_##sym __used \ + __section(".discard.gendwarfksyms") = &sym; +#else +#define __GENDWARFKSYMS_EXPORT(sym) +#endif + #define __EXPORT_SYMBOL(sym, license, ns) \ extern typeof(sym) sym; \ __ADDRESSABLE(sym) \ + __GENDWARFKSYMS_EXPORT(sym) \ asm(__stringify(___EXPORT_SYMBOL(sym, license, ns))) #endif diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h index c3b4b7ed7c163..84a5045f80f36 100644 --- a/include/linux/hrtimer_defs.h +++ b/include/linux/hrtimer_defs.h @@ -125,6 +125,7 @@ struct hrtimer_cpu_base { ktime_t softirq_expires_next; struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; + call_single_data_t csd; } ____cacheline_aligned; diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index a3ce553413de8..abd0c8bd950ba 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -19,8 +19,8 @@ struct io_uring_cmd { }; struct io_uring_cmd_data { - struct io_uring_sqe sqes[2]; void *op_data; + struct io_uring_sqe sqes[2]; }; static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 623d8e798a11a..3def525a1da37 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -222,7 +222,8 @@ struct io_alloc_cache { void **entries; unsigned int nr_cached; unsigned int max_cached; - size_t elem_size; + unsigned int elem_size; + unsigned int init_clear; }; struct io_ring_ctx { diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index ed945f42e064a..0ea8c9887429f 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -537,7 +537,7 @@ static __always_inline unsigned long msecs_to_jiffies(const unsigned int m) * * Return: jiffies value */ -#define secs_to_jiffies(_secs) ((_secs) * HZ) +#define secs_to_jiffies(_secs) (unsigned long)((_secs) * HZ) extern unsigned long __usecs_to_jiffies(const unsigned int u); #if !(USEC_PER_SEC % HZ) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5f1b2dc788e24..6b27db7f94963 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1406,6 +1406,7 @@ enum tlb_flush_reason { TLB_LOCAL_SHOOTDOWN, TLB_LOCAL_MM_SHOOTDOWN, TLB_REMOTE_SEND_IPI, + TLB_REMOTE_WRONG_CPU, NR_TLB_FLUSH_REASONS, }; diff --git a/include/linux/module.h b/include/linux/module.h index 37eb5d88f6ebe..23792d5d7b74b 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -430,7 +430,7 @@ struct module { /* Exported symbols */ const struct kernel_symbol *syms; - const s32 *crcs; + const u32 *crcs; unsigned int num_syms; #ifdef CONFIG_ARCH_USES_CFI_TRAPS @@ -448,7 +448,7 @@ struct module { /* GPL-only exported symbols. */ unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; - const s32 *gpl_crcs; + const u32 *gpl_crcs; bool using_gplonly_symbols; #ifdef CONFIG_MODULE_SIG diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1dcc76af75203..5429581f22995 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2905,9 +2905,9 @@ struct pcpu_sw_netstats { struct pcpu_dstats { u64_stats_t rx_packets; u64_stats_t rx_bytes; - u64_stats_t rx_drops; u64_stats_t tx_packets; u64_stats_t tx_bytes; + u64_stats_t rx_drops; u64_stats_t tx_drops; struct u64_stats_sync syncp; } __aligned(8 * sizeof(u64)); diff --git a/include/linux/pm.h b/include/linux/pm.h index 08c37b83fea88..78855d794342d 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -384,12 +384,8 @@ const struct dev_pm_ops name = { \ #ifdef CONFIG_PM #define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) -#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, "ns") #else #define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) -#define EXPORT_PM_FN_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) #endif #ifdef CONFIG_PM_SLEEP @@ -684,6 +680,7 @@ struct dev_pm_info { bool no_pm_callbacks:1; /* Owned by the PM core */ bool async_in_progress:1; /* Owned by the PM core */ bool must_resume:1; /* Owned by the PM core */ + bool set_active:1; /* Owned by the PM core */ bool may_skip_resume:1; /* Set by subsystems */ #else bool should_wakeup:1; diff --git a/include/linux/swap.h b/include/linux/swap.h index a5f475335aea8..b13b72645db33 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -222,6 +222,7 @@ enum { }; #define SWAP_CLUSTER_MAX 32UL +#define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10) #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX /* Bit flag in swap_map */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d635c5b47ebaf..d48c657191cd0 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -851,7 +851,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, } static inline void _bstats_update(struct gnet_stats_basic_sync *bstats, - __u64 bytes, __u32 packets) + __u64 bytes, __u64 packets) { u64_stats_update_begin(&bstats->syncp); u64_stats_add(&bstats->bytes, bytes); diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 2f119d18a061f..cad50d91077ef 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -219,6 +219,7 @@ EM(rxrpc_conn_get_conn_input, "GET inp-conn") \ EM(rxrpc_conn_get_idle, "GET idle ") \ EM(rxrpc_conn_get_poke_abort, "GET pk-abort") \ + EM(rxrpc_conn_get_poke_secured, "GET secured ") \ EM(rxrpc_conn_get_poke_timer, "GET poke ") \ EM(rxrpc_conn_get_service_conn, "GET svc-conn") \ EM(rxrpc_conn_new_client, "NEW client ") \ diff --git a/include/uapi/mtd/ubi-user.h b/include/uapi/mtd/ubi-user.h index e1571603175e7..aa872a41ffb9b 100644 --- a/include/uapi/mtd/ubi-user.h +++ b/include/uapi/mtd/ubi-user.h @@ -175,6 +175,8 @@ #define UBI_IOCRPEB _IOW(UBI_IOC_MAGIC, 4, __s32) /* Force scrubbing on the specified PEB */ #define UBI_IOCSPEB _IOW(UBI_IOC_MAGIC, 5, __s32) +/* Read detailed device erase counter information */ +#define UBI_IOCECNFO _IOWR(UBI_IOC_MAGIC, 6, struct ubi_ecinfo_req) /* ioctl commands of the UBI control character device */ @@ -412,6 +414,37 @@ struct ubi_rnvol_req { } ents[UBI_MAX_RNVOL]; } __packed; +/** + * struct ubi_ecinfo_req - a data structure used for requesting and receiving + * erase block counter information from a UBI device. + * + * @start: index of first physical erase block to read (in) + * @length: number of erase counters to read (in) + * @read_length: number of erase counters that was actually read (out) + * @padding: reserved for future, not used, has to be zeroed + * @erase_counters: array of erase counter values (out) + * + * This structure is used to retrieve erase counter information for a specified + * range of PEBs on a UBI device. + * Erase counters are read from @start and attempts to read @length number of + * erase counters. + * The retrieved values are stored in the @erase_counters array. It is the + * responsibility of the caller to allocate enough memory for storing @length + * elements in the @erase_counters array. + * If a block is bad or if the erase counter is unknown the corresponding value + * in the array will be set to -1. + * The @read_length field will indicate the number of erase counters actually + * read. Typically @read_length will be limited due to memory or the number of + * PEBs on the UBI device. + */ +struct ubi_ecinfo_req { + __s32 start; + __s32 length; + __s32 read_length; + __s8 padding[16]; + __s32 erase_counters[]; +} __packed; + /** * struct ubi_leb_change_req - a data structure used in atomic LEB change * requests. diff --git a/init/Kconfig b/init/Kconfig index 4dbc059d2de5c..d0d021b3fa3b3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1969,7 +1969,8 @@ config RUST bool "Rust support" depends on HAVE_RUST depends on RUST_IS_AVAILABLE - depends on !MODVERSIONS + select EXTENDED_MODVERSIONS if MODVERSIONS + depends on !MODVERSIONS || GENDWARFKSYMS depends on !GCC_PLUGIN_RANDSTRUCT depends on !RANDSTRUCT depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE diff --git a/io_uring/Makefile b/io_uring/Makefile index 53167bef37d77..d695b60dba4f0 100644 --- a/io_uring/Makefile +++ b/io_uring/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \ sync.o msg_ring.o advise.o openclose.o \ epoll.o statx.o timeout.o fdinfo.o \ cancel.o waitid.o register.o \ - truncate.o memmap.o + truncate.o memmap.o alloc_cache.o obj-$(CONFIG_IO_WQ) += io-wq.o obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o diff --git a/io_uring/alloc_cache.c b/io_uring/alloc_cache.c new file mode 100644 index 0000000000000..58423888b736e --- /dev/null +++ b/io_uring/alloc_cache.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "alloc_cache.h" + +void io_alloc_cache_free(struct io_alloc_cache *cache, + void (*free)(const void *)) +{ + void *entry; + + if (!cache->entries) + return; + + while ((entry = io_alloc_cache_get(cache)) != NULL) + free(entry); + + kvfree(cache->entries); + cache->entries = NULL; +} + +/* returns false if the cache was initialized properly */ +bool io_alloc_cache_init(struct io_alloc_cache *cache, + unsigned max_nr, unsigned int size, + unsigned int init_bytes) +{ + cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL); + if (!cache->entries) + return true; + + cache->nr_cached = 0; + cache->max_cached = max_nr; + cache->elem_size = size; + cache->init_clear = init_bytes; + return false; +} + +void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp) +{ + void *obj; + + obj = kmalloc(cache->elem_size, gfp); + if (obj && cache->init_clear) + memset(obj, 0, cache->init_clear); + return obj; +} diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h index a3a8cfec32ce1..0dd17d8ba93a8 100644 --- a/io_uring/alloc_cache.h +++ b/io_uring/alloc_cache.h @@ -1,11 +1,30 @@ #ifndef IOU_ALLOC_CACHE_H #define IOU_ALLOC_CACHE_H +#include + /* * Don't allow the cache to grow beyond this size. */ #define IO_ALLOC_CACHE_MAX 128 +void io_alloc_cache_free(struct io_alloc_cache *cache, + void (*free)(const void *)); +bool io_alloc_cache_init(struct io_alloc_cache *cache, + unsigned max_nr, unsigned int size, + unsigned int init_bytes); + +void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp); + +static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr) +{ + if (IS_ENABLED(CONFIG_KASAN)) { + kfree(*iov); + *iov = NULL; + *nr = 0; + } +} + static inline bool io_alloc_cache_put(struct io_alloc_cache *cache, void *entry) { @@ -23,52 +42,30 @@ static inline void *io_alloc_cache_get(struct io_alloc_cache *cache) if (cache->nr_cached) { void *entry = cache->entries[--cache->nr_cached]; + /* + * If KASAN is enabled, always clear the initial bytes that + * must be zeroed post alloc, in case any of them overlap + * with KASAN storage. + */ +#if defined(CONFIG_KASAN) kasan_mempool_unpoison_object(entry, cache->elem_size); + if (cache->init_clear) + memset(entry, 0, cache->init_clear); +#endif return entry; } return NULL; } -static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp, - void (*init_once)(void *obj)) +static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp) { - if (unlikely(!cache->nr_cached)) { - void *obj = kmalloc(cache->elem_size, gfp); + void *obj; - if (obj && init_once) - init_once(obj); + obj = io_alloc_cache_get(cache); + if (obj) return obj; - } - return io_alloc_cache_get(cache); + return io_cache_alloc_new(cache, gfp); } -/* returns false if the cache was initialized properly */ -static inline bool io_alloc_cache_init(struct io_alloc_cache *cache, - unsigned max_nr, size_t size) -{ - cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL); - if (cache->entries) { - cache->nr_cached = 0; - cache->max_cached = max_nr; - cache->elem_size = size; - return false; - } - return true; -} - -static inline void io_alloc_cache_free(struct io_alloc_cache *cache, - void (*free)(const void *)) -{ - void *entry; - - if (!cache->entries) - return; - - while ((entry = io_alloc_cache_get(cache)) != NULL) - free(entry); - - kvfree(cache->entries); - cache->entries = NULL; -} #endif diff --git a/io_uring/filetable.c b/io_uring/filetable.c index a21660e3145ab..dd8eeec97acf6 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -68,7 +68,7 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, if (slot_index >= ctx->file_table.data.nr) return -EINVAL; - node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); + node = io_rsrc_node_alloc(IORING_RSRC_FILE); if (!node) return -ENOMEM; diff --git a/io_uring/futex.c b/io_uring/futex.c index 30139cc150f22..3159a2b7eeca1 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -36,7 +36,7 @@ struct io_futex_data { bool io_futex_cache_init(struct io_ring_ctx *ctx) { return io_alloc_cache_init(&ctx->futex_cache, IO_FUTEX_ALLOC_CACHE_MAX, - sizeof(struct io_futex_data)); + sizeof(struct io_futex_data), 0); } void io_futex_cache_free(struct io_ring_ctx *ctx) @@ -320,7 +320,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) } io_ring_submit_lock(ctx, issue_flags); - ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT, NULL); + ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT); if (!ifd) { ret = -ENOMEM; goto done_unlock; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5a0f8a5041d66..ceacf6230e342 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -315,16 +315,18 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->cq_overflow_list); INIT_LIST_HEAD(&ctx->io_buffers_cache); ret = io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX, - sizeof(struct async_poll)); + sizeof(struct async_poll), 0); ret |= io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_async_msghdr)); + sizeof(struct io_async_msghdr), + offsetof(struct io_async_msghdr, clear)); ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_async_rw)); + sizeof(struct io_async_rw), + offsetof(struct io_async_rw, clear)); ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_uring_cmd_data)); + sizeof(struct io_uring_cmd_data), 0); spin_lock_init(&ctx->msg_lock); ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_kiocb)); + sizeof(struct io_kiocb), 0); ret |= io_futex_cache_init(ctx); if (ret) goto free_ref; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index f65e3f3ede517..ab619e63ef39c 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -226,21 +226,16 @@ static inline void io_req_set_res(struct io_kiocb *req, s32 res, u32 cflags) } static inline void *io_uring_alloc_async_data(struct io_alloc_cache *cache, - struct io_kiocb *req, - void (*init_once)(void *obj)) + struct io_kiocb *req) { - req->async_data = io_cache_alloc(cache, GFP_KERNEL, init_once); - if (req->async_data) - req->flags |= REQ_F_ASYNC_DATA; - return req->async_data; -} + if (cache) { + req->async_data = io_cache_alloc(cache, GFP_KERNEL); + } else { + const struct io_issue_def *def = &io_issue_defs[req->opcode]; -static inline void *io_uring_alloc_async_data_nocache(struct io_kiocb *req) -{ - const struct io_issue_def *def = &io_issue_defs[req->opcode]; - - WARN_ON_ONCE(!def->async_size); - req->async_data = kmalloc(def->async_size, GFP_KERNEL); + WARN_ON_ONCE(!def->async_size); + req->async_data = kmalloc(def->async_size, GFP_KERNEL); + } if (req->async_data) req->flags |= REQ_F_ASYNC_DATA; return req->async_data; diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index bd3cd78d2dba3..7e6f68e911f10 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -89,8 +89,7 @@ static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts) static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, int res, u32 cflags, u64 user_data) { - req->tctx = READ_ONCE(ctx->submitter_task->io_uring); - if (!req->tctx) { + if (!READ_ONCE(ctx->submitter_task)) { kmem_cache_free(req_cachep, req); return -EOWNERDEAD; } @@ -98,6 +97,7 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, io_req_set_res(req, res, cflags); percpu_ref_get(&ctx->refs); req->ctx = ctx; + req->tctx = NULL; req->io_task_work.func = io_msg_tw_complete; io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE); return 0; diff --git a/io_uring/net.c b/io_uring/net.c index 85f55fbc25c94..17852a6616ffe 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -137,7 +137,6 @@ static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr *hdr = req->async_data; - struct iovec *iov; /* can't recycle, ensure we free the iovec if we have one */ if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { @@ -146,44 +145,30 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) } /* Let normal cleanup path reap it if we fail adding to the cache */ - iov = hdr->free_iov; + io_alloc_cache_kasan(&hdr->free_iov, &hdr->free_iov_nr); if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { - if (iov) - kasan_mempool_poison_object(iov); req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; } } -static void io_msg_async_data_init(void *obj) -{ - struct io_async_msghdr *hdr = (struct io_async_msghdr *)obj; - - hdr->free_iov = NULL; - hdr->free_iov_nr = 0; -} - static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct io_async_msghdr *hdr; - hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req, - io_msg_async_data_init); + hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req); if (!hdr) return NULL; /* If the async data was cached, we might have an iov cached inside. */ - if (hdr->free_iov) { - kasan_mempool_unpoison_object(hdr->free_iov, - hdr->free_iov_nr * sizeof(struct iovec)); + if (hdr->free_iov) req->flags |= REQ_F_NEED_CLEANUP; - } return hdr; } /* assign new iovec to kmsg, if we need to */ -static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, +static void io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, struct iovec *iov) { if (iov) { @@ -193,7 +178,6 @@ static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, kfree(kmsg->free_iov); kmsg->free_iov = iov; } - return 0; } static inline void io_mshot_prep_retry(struct io_kiocb *req, @@ -255,7 +239,8 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req, if (unlikely(ret < 0)) return ret; - return io_net_vec_assign(req, iomsg, iov); + io_net_vec_assign(req, iomsg, iov); + return 0; } #endif @@ -295,11 +280,12 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, ret = -EINVAL; goto ua_end; } else { + struct iovec __user *uiov = msg->msg_iov; + /* we only need the length for provided buffers */ - if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t))) + if (!access_ok(&uiov->iov_len, sizeof(uiov->iov_len))) goto ua_end; - unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len, - ua_end); + unsafe_get_user(iov->iov_len, &uiov->iov_len, ua_end); sr->len = iov->iov_len; } ret = 0; @@ -314,7 +300,8 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, if (unlikely(ret < 0)) return ret; - return io_net_vec_assign(req, iomsg, iov); + io_net_vec_assign(req, iomsg, iov); + return 0; } static int io_sendmsg_copy_hdr(struct io_kiocb *req, @@ -579,6 +566,54 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } +static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags, + struct io_async_msghdr *kmsg) +{ + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + + int ret; + struct buf_sel_arg arg = { + .iovs = &kmsg->fast_iov, + .max_len = min_not_zero(sr->len, INT_MAX), + .nr_iovs = 1, + }; + + if (kmsg->free_iov) { + arg.nr_iovs = kmsg->free_iov_nr; + arg.iovs = kmsg->free_iov; + arg.mode = KBUF_MODE_FREE; + } + + if (!(sr->flags & IORING_RECVSEND_BUNDLE)) + arg.nr_iovs = 1; + else + arg.mode |= KBUF_MODE_EXPAND; + + ret = io_buffers_select(req, &arg, issue_flags); + if (unlikely(ret < 0)) + return ret; + + if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { + kmsg->free_iov_nr = ret; + kmsg->free_iov = arg.iovs; + req->flags |= REQ_F_NEED_CLEANUP; + } + sr->len = arg.out_len; + + if (ret == 1) { + sr->buf = arg.iovs[0].iov_base; + ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, + &kmsg->msg.msg_iter); + if (unlikely(ret)) + return ret; + } else { + iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, + arg.iovs, ret, arg.out_len); + } + + return 0; +} + int io_send(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); @@ -602,44 +637,9 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) retry_bundle: if (io_do_buffer_select(req)) { - struct buf_sel_arg arg = { - .iovs = &kmsg->fast_iov, - .max_len = min_not_zero(sr->len, INT_MAX), - .nr_iovs = 1, - }; - - if (kmsg->free_iov) { - arg.nr_iovs = kmsg->free_iov_nr; - arg.iovs = kmsg->free_iov; - arg.mode = KBUF_MODE_FREE; - } - - if (!(sr->flags & IORING_RECVSEND_BUNDLE)) - arg.nr_iovs = 1; - else - arg.mode |= KBUF_MODE_EXPAND; - - ret = io_buffers_select(req, &arg, issue_flags); - if (unlikely(ret < 0)) + ret = io_send_select_buffer(req, issue_flags, kmsg); + if (ret) return ret; - - if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { - kmsg->free_iov_nr = ret; - kmsg->free_iov = arg.iovs; - req->flags |= REQ_F_NEED_CLEANUP; - } - sr->len = arg.out_len; - - if (ret == 1) { - sr->buf = arg.iovs[0].iov_base; - ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, - &kmsg->msg.msg_iter); - if (unlikely(ret)) - return ret; - } else { - iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, - arg.iovs, ret, arg.out_len); - } } /* @@ -1710,6 +1710,11 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) int ret; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + if (unlikely(req->flags & REQ_F_FAIL)) { + ret = -ECONNRESET; + goto out; + } + file_flags = force_nonblock ? O_NONBLOCK : 0; ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, @@ -1813,11 +1818,8 @@ void io_netmsg_cache_free(const void *entry) { struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; - if (kmsg->free_iov) { - kasan_mempool_unpoison_object(kmsg->free_iov, - kmsg->free_iov_nr * sizeof(struct iovec)); + if (kmsg->free_iov) io_netmsg_iovec_free(kmsg); - } kfree(kmsg); } #endif diff --git a/io_uring/net.h b/io_uring/net.h index 52bfee05f06a1..b804c2b36e605 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -5,16 +5,20 @@ struct io_async_msghdr { #if defined(CONFIG_NET) - struct iovec fast_iov; - /* points to an allocated iov, if NULL we use fast_iov instead */ struct iovec *free_iov; + /* points to an allocated iov, if NULL we use fast_iov instead */ int free_iov_nr; - int namelen; - __kernel_size_t controllen; - __kernel_size_t payloadlen; - struct sockaddr __user *uaddr; - struct msghdr msg; - struct sockaddr_storage addr; + struct_group(clear, + int namelen; + struct iovec fast_iov; + __kernel_size_t controllen; + __kernel_size_t payloadlen; + struct sockaddr __user *uaddr; + struct msghdr msg; + struct sockaddr_storage addr; + ); +#else + struct_group(clear); #endif }; diff --git a/io_uring/poll.c b/io_uring/poll.c index cc01c40b43d31..bb1c0cd4f809a 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -273,6 +273,8 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts) return IOU_POLL_REISSUE; } } + if (unlikely(req->cqe.res & EPOLLERR)) + req_set_fail(req); if (req->apoll_events & EPOLLONESHOT) return IOU_POLL_DONE; @@ -315,8 +317,10 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts) ret = io_poll_check_events(req, ts); if (ret == IOU_POLL_NO_ACTION) { + io_kbuf_recycle(req, 0); return; } else if (ret == IOU_POLL_REQUEUE) { + io_kbuf_recycle(req, 0); __io_poll_execute(req, 0); return; } @@ -650,7 +654,7 @@ static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req, kfree(apoll->double_poll); } else { if (!(issue_flags & IO_URING_F_UNLOCKED)) - apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC, NULL); + apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC); else apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); if (!apoll) diff --git a/io_uring/register.c b/io_uring/register.c index 05025047d1dab..9a4d2fbce4aec 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -552,7 +552,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) ctx->cqe_cached = ctx->cqe_sentinel = NULL; WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped)); - WRITE_ONCE(n.rings->sq_flags, READ_ONCE(o.rings->sq_flags)); + atomic_set(&n.rings->sq_flags, atomic_read(&o.rings->sq_flags)); WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags)); WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow)); @@ -853,6 +853,8 @@ struct file *io_uring_register_get_file(unsigned int fd, bool registered) return ERR_PTR(-EINVAL); fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); file = tctx->registered_rings[fd]; + if (file) + get_file(file); } else { file = fget(fd); } @@ -919,7 +921,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr, ctx->buf_table.nr, ret); mutex_unlock(&ctx->uring_lock); - if (!use_registered_ring) - fput(file); + + fput(file); return ret; } diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index e32ac58533914..af39b69eb4fde 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -118,7 +118,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_rsrc_node *node) } } -struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type) +struct io_rsrc_node *io_rsrc_node_alloc(int type) { struct io_rsrc_node *node; @@ -203,7 +203,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } - node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); + node = io_rsrc_node_alloc(IORING_RSRC_FILE); if (!node) { err = -ENOMEM; fput(file); @@ -444,8 +444,6 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags) void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node) { - lockdep_assert_held(&ctx->uring_lock); - if (node->tag) io_post_aux_cqe(ctx, node->tag, 0, 0); @@ -525,7 +523,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, goto fail; } ret = -ENOMEM; - node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); + node = io_rsrc_node_alloc(IORING_RSRC_FILE); if (!node) { fput(file); goto fail; @@ -730,7 +728,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, if (!iov->iov_base) return NULL; - node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); + node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); if (!node) return ERR_PTR(-ENOMEM); node->buf = NULL; @@ -921,6 +919,16 @@ int io_import_fixed(int ddir, struct iov_iter *iter, return 0; } +/* Lock two rings at once. The rings must be different! */ +static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2) +{ + if (ctx1 > ctx2) + swap(ctx1, ctx2); + mutex_lock(&ctx1->uring_lock); + mutex_lock_nested(&ctx2->uring_lock, SINGLE_DEPTH_NESTING); +} + +/* Both rings are locked by the caller. */ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx, struct io_uring_clone_buffers *arg) { @@ -928,6 +936,9 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx int i, ret, off, nr; unsigned int nbufs; + lockdep_assert_held(&ctx->uring_lock); + lockdep_assert_held(&src_ctx->uring_lock); + /* * Accounting state is shared between the two rings; that only works if * both rings are accounted towards the same counters. @@ -942,7 +953,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE)) return -EBUSY; - nbufs = READ_ONCE(src_ctx->buf_table.nr); + nbufs = src_ctx->buf_table.nr; if (!arg->nr) arg->nr = nbufs; else if (arg->nr > nbufs) @@ -966,27 +977,20 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx } } - /* - * Drop our own lock here. We'll setup the data we need and reference - * the source buffers, then re-grab, check, and assign at the end. - */ - mutex_unlock(&ctx->uring_lock); - - mutex_lock(&src_ctx->uring_lock); ret = -ENXIO; nbufs = src_ctx->buf_table.nr; if (!nbufs) - goto out_unlock; + goto out_free; ret = -EINVAL; if (!arg->nr) arg->nr = nbufs; else if (arg->nr > nbufs) - goto out_unlock; + goto out_free; ret = -EOVERFLOW; if (check_add_overflow(arg->nr, arg->src_off, &off)) - goto out_unlock; + goto out_free; if (off > nbufs) - goto out_unlock; + goto out_free; off = arg->dst_off; i = arg->src_off; @@ -998,10 +1002,10 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx if (!src_node) { dst_node = NULL; } else { - dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); + dst_node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); if (!dst_node) { ret = -ENOMEM; - goto out_unlock; + goto out_free; } refcount_inc(&src_node->buf->refs); @@ -1011,10 +1015,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx i++; } - /* Have a ref on the bufs now, drop src lock and re-grab our own lock */ - mutex_unlock(&src_ctx->uring_lock); - mutex_lock(&ctx->uring_lock); - /* * If asked for replace, put the old table. data->nodes[] holds both * old and new nodes at this point. @@ -1023,24 +1023,17 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx io_rsrc_data_free(ctx, &ctx->buf_table); /* - * ctx->buf_table should be empty now - either the contents are being - * replaced and we just freed the table, or someone raced setting up - * a buffer table while the clone was happening. If not empty, fall - * through to failure handling. + * ctx->buf_table must be empty now - either the contents are being + * replaced and we just freed the table, or the contents are being + * copied to a ring that does not have buffers yet (checked at function + * entry). */ - if (!ctx->buf_table.nr) { - ctx->buf_table = data; - return 0; - } + WARN_ON_ONCE(ctx->buf_table.nr); + ctx->buf_table = data; + return 0; - mutex_unlock(&ctx->uring_lock); - mutex_lock(&src_ctx->uring_lock); - /* someone raced setting up buffers, dump ours */ - ret = -EBUSY; -out_unlock: +out_free: io_rsrc_data_free(ctx, &data); - mutex_unlock(&src_ctx->uring_lock); - mutex_lock(&ctx->uring_lock); return ret; } @@ -1054,6 +1047,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg) { struct io_uring_clone_buffers buf; + struct io_ring_ctx *src_ctx; bool registered_src; struct file *file; int ret; @@ -1071,8 +1065,18 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg) file = io_uring_register_get_file(buf.src_fd, registered_src); if (IS_ERR(file)) return PTR_ERR(file); - ret = io_clone_buffers(ctx, file->private_data, &buf); - if (!registered_src) - fput(file); + + src_ctx = file->private_data; + if (src_ctx != ctx) { + mutex_unlock(&ctx->uring_lock); + lock_two_rings(ctx, src_ctx); + } + + ret = io_clone_buffers(ctx, src_ctx, &buf); + + if (src_ctx != ctx) + mutex_unlock(&src_ctx->uring_lock); + + fput(file); return ret; } diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index c8b0935844618..190f7ee45de93 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -2,6 +2,8 @@ #ifndef IOU_RSRC_H #define IOU_RSRC_H +#include + #define IO_NODE_ALLOC_CACHE_MAX 32 #define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3) @@ -43,7 +45,7 @@ struct io_imu_folio_data { unsigned int nr_folios; }; -struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type); +struct io_rsrc_node *io_rsrc_node_alloc(int type); void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node); void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data); int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr); @@ -80,6 +82,7 @@ static inline struct io_rsrc_node *io_rsrc_node_lookup(struct io_rsrc_data *data static inline void io_put_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node) { + lockdep_assert_held(&ctx->uring_lock); if (node && !--node->refs) io_free_rsrc_node(ctx, node); } diff --git a/io_uring/rw.c b/io_uring/rw.c index a9a2733be8420..7aa1e4c9f64a3 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -146,28 +146,15 @@ static inline int io_import_iovec(int rw, struct io_kiocb *req, return 0; } -static void io_rw_iovec_free(struct io_async_rw *rw) -{ - if (rw->free_iovec) { - kfree(rw->free_iovec); - rw->free_iov_nr = 0; - rw->free_iovec = NULL; - } -} - static void io_rw_recycle(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_rw *rw = req->async_data; - struct iovec *iov; - if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { - io_rw_iovec_free(rw); + if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) return; - } - iov = rw->free_iovec; + + io_alloc_cache_kasan(&rw->free_iovec, &rw->free_iov_nr); if (io_alloc_cache_put(&req->ctx->rw_cache, rw)) { - if (iov) - kasan_mempool_poison_object(iov); req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; } @@ -208,27 +195,16 @@ static void io_req_rw_cleanup(struct io_kiocb *req, unsigned int issue_flags) } } -static void io_rw_async_data_init(void *obj) -{ - struct io_async_rw *rw = (struct io_async_rw *)obj; - - rw->free_iovec = NULL; - rw->bytes_done = 0; -} - static int io_rw_alloc_async(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct io_async_rw *rw; - rw = io_uring_alloc_async_data(&ctx->rw_cache, req, io_rw_async_data_init); + rw = io_uring_alloc_async_data(&ctx->rw_cache, req); if (!rw) return -ENOMEM; - if (rw->free_iovec) { - kasan_mempool_unpoison_object(rw->free_iovec, - rw->free_iov_nr * sizeof(struct iovec)); + if (rw->free_iovec) req->flags |= REQ_F_NEED_CLEANUP; - } rw->bytes_done = 0; return 0; } @@ -1323,10 +1299,7 @@ void io_rw_cache_free(const void *entry) { struct io_async_rw *rw = (struct io_async_rw *) entry; - if (rw->free_iovec) { - kasan_mempool_unpoison_object(rw->free_iovec, - rw->free_iov_nr * sizeof(struct iovec)); - io_rw_iovec_free(rw); - } + if (rw->free_iovec) + kfree(rw->free_iovec); kfree(rw); } diff --git a/io_uring/rw.h b/io_uring/rw.h index 2d7656bd268d6..eaa59bd648709 100644 --- a/io_uring/rw.h +++ b/io_uring/rw.h @@ -9,19 +9,24 @@ struct io_meta_state { struct io_async_rw { size_t bytes_done; - struct iov_iter iter; - struct iov_iter_state iter_state; - struct iovec fast_iov; struct iovec *free_iovec; - int free_iov_nr; - /* wpq is for buffered io, while meta fields are used with direct io */ - union { - struct wait_page_queue wpq; - struct { - struct uio_meta meta; - struct io_meta_state meta_state; + struct_group(clear, + struct iov_iter iter; + struct iov_iter_state iter_state; + struct iovec fast_iov; + int free_iov_nr; + /* + * wpq is for buffered io, while meta fields are used with + * direct io + */ + union { + struct wait_page_queue wpq; + struct { + struct uio_meta meta; + struct io_meta_state meta_state; + }; }; - }; + ); }; int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe); diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 2bd7e0a317bba..48fc8cf707843 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -544,7 +544,7 @@ static int __io_timeout_prep(struct io_kiocb *req, if (WARN_ON_ONCE(req_has_async_data(req))) return -EFAULT; - data = io_uring_alloc_async_data_nocache(req); + data = io_uring_alloc_async_data(NULL, req); if (!data) return -ENOMEM; data->req = req; diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index fc94c465a9850..1f6a82128b475 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -168,23 +168,16 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2, } EXPORT_SYMBOL_GPL(io_uring_cmd_done); -static void io_uring_cmd_init_once(void *obj) -{ - struct io_uring_cmd_data *data = obj; - - data->op_data = NULL; -} - static int io_uring_cmd_prep_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); struct io_uring_cmd_data *cache; - cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req, - io_uring_cmd_init_once); + cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req); if (!cache) return -ENOMEM; + cache->op_data = NULL; if (!(req->flags & REQ_F_FORCE_ASYNC)) { /* defer memcpy until we need it */ @@ -192,8 +185,8 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req, return 0; } - memcpy(req->async_data, sqe, uring_sqe_size(req->ctx)); - ioucmd->sqe = req->async_data; + memcpy(cache->sqes, sqe, uring_sqe_size(req->ctx)); + ioucmd->sqe = cache->sqes; return 0; } @@ -260,7 +253,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) struct io_uring_cmd_data *cache = req->async_data; if (ioucmd->sqe != (void *) cache) - memcpy(cache, ioucmd->sqe, uring_sqe_size(req->ctx)); + memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); return -EAGAIN; } else if (ret == -EIOCBQUEUED) { return -EIOCBQUEUED; @@ -350,7 +343,7 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags) if (!prot || !prot->ioctl) return -EOPNOTSUPP; - switch (cmd->sqe->cmd_op) { + switch (cmd->cmd_op) { case SOCKET_URING_OP_SIOCINQ: ret = prot->ioctl(sk, SIOCINQ, &arg); if (ret) diff --git a/io_uring/waitid.c b/io_uring/waitid.c index 6778c0ee76c42..853e97a7b0ecb 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -303,7 +303,7 @@ int io_waitid(struct io_kiocb *req, unsigned int issue_flags) struct io_waitid_async *iwa; int ret; - iwa = io_uring_alloc_async_data_nocache(req); + iwa = io_uring_alloc_async_data(NULL, req); if (!iwa) return -ENOMEM; diff --git a/kernel/audit.c b/kernel/audit.c index 13d0144efaa3c..5f5bf85bcc905 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1221,7 +1221,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; struct audit_sig_info *sig_data; - struct lsm_context lsmctx; + struct lsm_context lsmctx = { NULL, 0, 0 }; err = audit_netlink_ok(skb, msg_type); if (err) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index e421a5f2ec7d0..2ca797cbe465f 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -28,6 +28,7 @@ #include #include #include +#include /* check_stable_address_space */ #include @@ -1260,6 +1261,9 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) * returns NULL in find_active_uprobe_rcu(). */ mmap_write_lock(mm); + if (check_stable_address_space(mm)) + goto unlock; + vma = find_vma(mm, info->vaddr); if (!vma || !valid_vma(vma, is_register) || file_inode(vma->vm_file) != uprobe->inode) diff --git a/kernel/fork.c b/kernel/fork.c index cba5ede2c6398..735405a9c5f32 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -760,7 +760,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, mt_set_in_rcu(vmi.mas.tree); ksm_fork(mm, oldmm); khugepaged_fork(mm, oldmm); - } else if (mpnt) { + } else { + /* * The entire maple tree has already been duplicated. If the * mmap duplication fails, mark the failure point with @@ -768,8 +769,18 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, * stop releasing VMAs that have not been duplicated after this * point. */ - mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1); - mas_store(&vmi.mas, XA_ZERO_ENTRY); + if (mpnt) { + mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1); + mas_store(&vmi.mas, XA_ZERO_ENTRY); + /* Avoid OOM iterating a broken tree */ + set_bit(MMF_OOM_SKIP, &mm->flags); + } + /* + * The mm_struct is going to exit, but the locks will be dropped + * first. Set the mm_struct as unstable is advisable as it is + * not fully initialised. + */ + set_bit(MMF_UNSTABLE, &mm->flags); } out: mmap_write_unlock(mm); diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 7e1340da5acae..00529c81cc401 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -7,20 +7,13 @@ set -e sfile="$(readlink -f "$0")" outdir="$(pwd)" tarfile=$1 -cpio_dir=$outdir/${tarfile%/*}/.tmp_cpio_dir +tmpdir=$outdir/${tarfile%/*}/.tmp_dir dir_list=" include/ arch/$SRCARCH/include/ " -if ! command -v cpio >/dev/null; then - echo >&2 "***" - echo >&2 "*** 'cpio' could not be found." - echo >&2 "***" - exit 1 -fi - # Support incremental builds by skipping archive generation # if timestamps of files being archived are not changed. @@ -48,9 +41,9 @@ all_dirs="$all_dirs $dir_list" # check include/generated/autoconf.h explicitly. # # Ignore them for md5 calculation to avoid pointless regeneration. -headers_md5="$(find $all_dirs -name "*.h" | - grep -v "include/generated/utsversion.h" | - grep -v "include/generated/autoconf.h" | +headers_md5="$(find $all_dirs -name "*.h" -a \ + ! -path include/generated/utsversion.h -a \ + ! -path include/generated/autoconf.h | xargs ls -l | md5sum | cut -d ' ' -f1)" # Any changes to this script will also cause a rebuild of the archive. @@ -65,36 +58,43 @@ fi echo " GEN $tarfile" -rm -rf $cpio_dir -mkdir $cpio_dir +rm -rf "${tmpdir}" +mkdir "${tmpdir}" if [ "$building_out_of_srctree" ]; then ( cd $srctree for f in $dir_list do find "$f" -name "*.h"; - done | cpio --quiet -pd $cpio_dir + done | tar -c -f - -T - | tar -xf - -C "${tmpdir}" ) fi -# The second CPIO can complain if files already exist which can happen with out -# of tree builds having stale headers in srctree. Just silence CPIO for now. for f in $dir_list; do find "$f" -name "*.h"; -done | cpio --quiet -pdu $cpio_dir >/dev/null 2>&1 +done | tar -c -f - -T - | tar -xf - -C "${tmpdir}" + +# Always exclude include/generated/utsversion.h +# Otherwise, the contents of the tarball may vary depending on the build steps. +rm -f "${tmpdir}/include/generated/utsversion.h" # Remove comments except SDPX lines -find $cpio_dir -type f -print0 | - xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;' +# Use a temporary file to store directory contents to prevent find/xargs from +# seeing temporary files created by perl. +find "${tmpdir}" -type f -print0 > "${tmpdir}.contents.txt" +xargs -0 -P8 -n1 \ + perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;' \ + < "${tmpdir}.contents.txt" +rm -f "${tmpdir}.contents.txt" # Create archive and try to normalize metadata for reproducibility. tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ --exclude=".__afs*" --exclude=".nfs*" \ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ - -I $XZ -cf $tarfile -C $cpio_dir/ . > /dev/null + -I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null echo $headers_md5 > kernel/kheaders.md5 echo "$this_file_md5" >> kernel/kheaders.md5 echo "$(md5sum $tarfile | cut -d ' ' -f1)" >> kernel/kheaders.md5 -rm -rf $cpio_dir +rm -rf "${tmpdir}" diff --git a/kernel/kthread.c b/kernel/kthread.c index 4005b13ebd7ff..5dc5b0d7238e8 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -859,7 +859,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) struct kthread *kthread = to_kthread(p); cpumask_var_t affinity; unsigned long flags; - int ret; + int ret = 0; if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE) || kthread->started) { WARN_ON(1); @@ -892,7 +892,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) out: free_cpumask_var(affinity); - return 0; + return ret; } /* diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index 74fe976e3b010..d7762ef5949a2 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -169,6 +169,36 @@ config MODVERSIONS make them incompatible with the kernel you are running. If unsure, say N. +choice + prompt "Module versioning implementation" + depends on MODVERSIONS + help + Select the tool used to calculate symbol versions for modules. + + If unsure, select GENKSYMS. + +config GENKSYMS + bool "genksyms (from source code)" + help + Calculate symbol versions from pre-processed source code using + genksyms. + + If unsure, say Y. + +config GENDWARFKSYMS + bool "gendwarfksyms (from debugging information)" + depends on DEBUG_INFO + # Requires full debugging information, split DWARF not supported. + depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT + # Requires ELF object files. + depends on !LTO + help + Calculate symbol versions from DWARF debugging information using + gendwarfksyms. Requires DEBUG_INFO to be enabled. + + If unsure, say N. +endchoice + config ASM_MODVERSIONS bool default HAVE_ASM_MODVERSIONS && MODVERSIONS @@ -177,6 +207,31 @@ config ASM_MODVERSIONS assembly. This can be enabled only when the target architecture supports it. +config EXTENDED_MODVERSIONS + bool "Extended Module Versioning Support" + depends on MODVERSIONS + help + This enables extended MODVERSIONs support, allowing long symbol + names to be versioned. + + The most likely reason you would enable this is to enable Rust + support. If unsure, say N. + +config BASIC_MODVERSIONS + bool "Basic Module Versioning Support" + depends on MODVERSIONS + default y + help + This enables basic MODVERSIONS support, allowing older tools or + kernels to potentially load modules. + + Disabling this may cause older `modprobe` or `kmod` to be unable + to read MODVERSIONS information from built modules. With this + disabled, older kernels may treat this module as unversioned. + + This is enabled by default when MODVERSIONS are enabled. + If unsure, say Y. + config MODULE_SRCVERSION_ALL bool "Source checksum for all modules" help diff --git a/kernel/module/internal.h b/kernel/module/internal.h index b35c0ec54a895..d09b46ef032f0 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -55,8 +55,8 @@ extern const struct kernel_symbol __start___ksymtab[]; extern const struct kernel_symbol __stop___ksymtab[]; extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; -extern const s32 __start___kcrctab[]; -extern const s32 __start___kcrctab_gpl[]; +extern const u32 __start___kcrctab[]; +extern const u32 __start___kcrctab_gpl[]; struct load_info { const char *name; @@ -86,6 +86,8 @@ struct load_info { unsigned int vers; unsigned int info; unsigned int pcpu; + unsigned int vers_ext_crc; + unsigned int vers_ext_name; } index; }; @@ -102,7 +104,7 @@ struct find_symbol_arg { /* Output */ struct module *owner; - const s32 *crc; + const u32 *crc; const struct kernel_symbol *sym; enum mod_license license; }; @@ -385,16 +387,25 @@ static inline void init_param_lock(struct module *mod) { } #ifdef CONFIG_MODVERSIONS int check_version(const struct load_info *info, - const char *symname, struct module *mod, const s32 *crc); + const char *symname, struct module *mod, const u32 *crc); void module_layout(struct module *mod, struct modversion_info *ver, struct kernel_param *kp, struct kernel_symbol *ks, struct tracepoint * const *tp); int check_modstruct_version(const struct load_info *info, struct module *mod); int same_magic(const char *amagic, const char *bmagic, bool has_crcs); +struct modversion_info_ext { + size_t remaining; + const u32 *crc; + const char *name; +}; +void modversion_ext_start(const struct load_info *info, struct modversion_info_ext *ver); +void modversion_ext_advance(struct modversion_info_ext *ver); +#define for_each_modversion_info_ext(ver, info) \ + for (modversion_ext_start(info, &ver); ver.remaining > 0; modversion_ext_advance(&ver)) #else /* !CONFIG_MODVERSIONS */ static inline int check_version(const struct load_info *info, const char *symname, struct module *mod, - const s32 *crc) + const u32 *crc) { return 1; } diff --git a/kernel/module/main.c b/kernel/module/main.c index 8808b6906d5a0..1fb9ad289a6f8 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -86,7 +86,7 @@ struct mod_tree_root mod_tree __cacheline_aligned = { struct symsearch { const struct kernel_symbol *start, *stop; - const s32 *crcs; + const u32 *crcs; enum mod_license license; }; @@ -2073,6 +2073,82 @@ static int elf_validity_cache_index_str(struct load_info *info) return 0; } +/** + * elf_validity_cache_index_versions() - Validate and cache version indices + * @info: Load info to cache version indices in. + * Must have &load_info->sechdrs and &load_info->secstrings populated. + * @flags: Load flags, relevant to suppress version loading, see + * uapi/linux/module.h + * + * If we're ignoring modversions based on @flags, zero all version indices + * and return validity. Othewrise check: + * + * * If "__version_ext_crcs" is present, "__version_ext_names" is present + * * There is a name present for every crc + * + * Then populate: + * + * * &load_info->index.vers + * * &load_info->index.vers_ext_crc + * * &load_info->index.vers_ext_names + * + * if present. + * + * Return: %0 if valid, %-ENOEXEC on failure. + */ +static int elf_validity_cache_index_versions(struct load_info *info, int flags) +{ + unsigned int vers_ext_crc; + unsigned int vers_ext_name; + size_t crc_count; + size_t remaining_len; + size_t name_size; + char *name; + + /* If modversions were suppressed, pretend we didn't find any */ + if (flags & MODULE_INIT_IGNORE_MODVERSIONS) { + info->index.vers = 0; + info->index.vers_ext_crc = 0; + info->index.vers_ext_name = 0; + return 0; + } + + vers_ext_crc = find_sec(info, "__version_ext_crcs"); + vers_ext_name = find_sec(info, "__version_ext_names"); + + /* If we have one field, we must have the other */ + if (!!vers_ext_crc != !!vers_ext_name) { + pr_err("extended version crc+name presence does not match"); + return -ENOEXEC; + } + + /* + * If we have extended version information, we should have the same + * number of entries in every section. + */ + if (vers_ext_crc) { + crc_count = info->sechdrs[vers_ext_crc].sh_size / sizeof(u32); + name = (void *)info->hdr + + info->sechdrs[vers_ext_name].sh_offset; + remaining_len = info->sechdrs[vers_ext_name].sh_size; + + while (crc_count--) { + name_size = strnlen(name, remaining_len) + 1; + if (name_size > remaining_len) { + pr_err("more extended version crcs than names"); + return -ENOEXEC; + } + remaining_len -= name_size; + name += name_size; + } + } + + info->index.vers = find_sec(info, "__versions"); + info->index.vers_ext_crc = vers_ext_crc; + info->index.vers_ext_name = vers_ext_name; + return 0; +} + /** * elf_validity_cache_index() - Resolve, validate, cache section indices * @info: Load info to read from and update. @@ -2087,9 +2163,7 @@ static int elf_validity_cache_index_str(struct load_info *info) * * elf_validity_cache_index_mod() * * elf_validity_cache_index_sym() * * elf_validity_cache_index_str() - * - * If versioning is not suppressed via flags, load the version index from - * a section called "__versions" with no validation. + * * elf_validity_cache_index_versions() * * If CONFIG_SMP is enabled, load the percpu section by name with no * validation. @@ -2112,11 +2186,9 @@ static int elf_validity_cache_index(struct load_info *info, int flags) err = elf_validity_cache_index_str(info); if (err < 0) return err; - - if (flags & MODULE_INIT_IGNORE_MODVERSIONS) - info->index.vers = 0; /* Pretend no __versions section! */ - else - info->index.vers = find_sec(info, "__versions"); + err = elf_validity_cache_index_versions(info, flags); + if (err < 0) + return err; info->index.pcpu = find_pcpusec(info); @@ -2327,6 +2399,10 @@ static int rewrite_section_headers(struct load_info *info, int flags) /* Track but don't keep modinfo and version sections. */ info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; + info->sechdrs[info->index.vers_ext_crc].sh_flags &= + ~(unsigned long)SHF_ALLOC; + info->sechdrs[info->index.vers_ext_name].sh_flags &= + ~(unsigned long)SHF_ALLOC; info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; return 0; diff --git a/kernel/module/version.c b/kernel/module/version.c index 53f43ac5a73e9..3718a88683219 100644 --- a/kernel/module/version.c +++ b/kernel/module/version.c @@ -13,17 +13,34 @@ int check_version(const struct load_info *info, const char *symname, struct module *mod, - const s32 *crc) + const u32 *crc) { Elf_Shdr *sechdrs = info->sechdrs; unsigned int versindex = info->index.vers; unsigned int i, num_versions; struct modversion_info *versions; + struct modversion_info_ext version_ext; /* Exporting module didn't supply crcs? OK, we're already tainted. */ if (!crc) return 1; + /* If we have extended version info, rely on it */ + if (info->index.vers_ext_crc) { + for_each_modversion_info_ext(version_ext, info) { + if (strcmp(version_ext.name, symname) != 0) + continue; + if (*version_ext.crc == *crc) + return 1; + pr_debug("Found checksum %X vs module %X\n", + *crc, *version_ext.crc); + goto bad_version; + } + pr_warn_once("%s: no extended symbol version for %s\n", + info->name, symname); + return 1; + } + /* No versions at all? modprobe --force does this. */ if (versindex == 0) return try_to_force_load(mod, symname) == 0; @@ -87,6 +104,34 @@ int same_magic(const char *amagic, const char *bmagic, return strcmp(amagic, bmagic) == 0; } +void modversion_ext_start(const struct load_info *info, + struct modversion_info_ext *start) +{ + unsigned int crc_idx = info->index.vers_ext_crc; + unsigned int name_idx = info->index.vers_ext_name; + Elf_Shdr *sechdrs = info->sechdrs; + + /* + * Both of these fields are needed for this to be useful + * Any future fields should be initialized to NULL if absent. + */ + if (crc_idx == 0 || name_idx == 0) { + start->remaining = 0; + return; + } + + start->crc = (const u32 *)sechdrs[crc_idx].sh_addr; + start->name = (const char *)sechdrs[name_idx].sh_addr; + start->remaining = sechdrs[crc_idx].sh_size / sizeof(*start->crc); +} + +void modversion_ext_advance(struct modversion_info_ext *vers) +{ + vers->remaining--; + vers->crc++; + vers->name += strlen(vers->name) + 1; +} + /* * Generate the signature for all relevant module structures here. * If these change, we don't want to try to parse the module. diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 1f87aa01ba44f..10a01af63a807 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -608,7 +608,11 @@ int hibernation_platform_enter(void) local_irq_disable(); system_state = SYSTEM_SUSPEND; - syscore_suspend(); + + error = syscore_suspend(); + if (error) + goto Enable_irqs; + if (pm_wakeup_pending()) { error = -EAGAIN; goto Power_up; @@ -620,6 +624,7 @@ int hibernation_platform_enter(void) Power_up: syscore_resume(); + Enable_irqs: system_state = SYSTEM_RUNNING; local_irq_enable(); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index a2a29e3fffcaa..1a19d69b91ed3 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -666,7 +666,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) } sg_policy->thread = thread; - kthread_bind_mask(thread, policy->related_cpus); + if (policy->dvfs_possible_from_any_cpu) + set_cpus_allowed_ptr(thread, policy->related_cpus); + else + kthread_bind_mask(thread, policy->related_cpus); + init_irq_work(&sg_policy->irq_work, sugov_irq_work); mutex_init(&sg_policy->work_lock); diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index 149e2c8036d36..456d339be98fb 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -1129,6 +1129,13 @@ int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask) if (!task_has_dl_policy(p) || !dl_bandwidth_enabled()) return 0; + /* + * The special/sugov task isn't part of regular bandwidth/admission + * control so let userspace change affinities. + */ + if (dl_entity_is_special(&p->dl)) + return 0; + /* * Since bandwidth control happens on root_domain basis, * if admission test is enabled, we only admit -deadline diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7304d7cf47f2d..77d9566d3aa68 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -382,7 +382,8 @@ void clocksource_verify_percpu(struct clocksource *cs) return; } testcpu = smp_processor_id(); - pr_warn("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen)); + pr_info("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", + cs->name, testcpu, cpumask_pr_args(&cpus_chosen)); for_each_cpu(cpu, &cpus_chosen) { if (cpu == testcpu) continue; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f6d8df94045c9..deb1aa32814e3 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -58,6 +58,8 @@ #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) +static void retrigger_next_event(void *arg); + /* * The timer bases: * @@ -111,7 +113,8 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clockid = CLOCK_TAI, .get_time = &ktime_get_clocktai, }, - } + }, + .csd = CSD_INIT(retrigger_next_event, NULL) }; static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { @@ -124,6 +127,14 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_TAI] = HRTIMER_BASE_TAI, }; +static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base) +{ + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return true; + else + return likely(base->online); +} + /* * Functions and macros which are different for UP/SMP systems are kept in a * single place @@ -145,11 +156,6 @@ static struct hrtimer_cpu_base migration_cpu_base = { #define migration_base migration_cpu_base.clock_base[0] -static inline bool is_migration_base(struct hrtimer_clock_base *base) -{ - return base == &migration_base; -} - /* * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock * means that all timers which are tied to this base via timer->base are @@ -183,27 +189,54 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, } /* - * We do not migrate the timer when it is expiring before the next - * event on the target cpu. When high resolution is enabled, we cannot - * reprogram the target cpu hardware and we would cause it to fire - * late. To keep it simple, we handle the high resolution enabled and - * disabled case similar. + * Check if the elected target is suitable considering its next + * event and the hotplug state of the current CPU. + * + * If the elected target is remote and its next event is after the timer + * to queue, then a remote reprogram is necessary. However there is no + * guarantee the IPI handling the operation would arrive in time to meet + * the high resolution deadline. In this case the local CPU becomes a + * preferred target, unless it is offline. + * + * High and low resolution modes are handled the same way for simplicity. * * Called with cpu_base->lock of target cpu held. */ -static int -hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) +static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base, + struct hrtimer_cpu_base *new_cpu_base, + struct hrtimer_cpu_base *this_cpu_base) { ktime_t expires; + /* + * The local CPU clockevent can be reprogrammed. Also get_target_base() + * guarantees it is online. + */ + if (new_cpu_base == this_cpu_base) + return true; + + /* + * The offline local CPU can't be the default target if the + * next remote target event is after this timer. Keep the + * elected new base. An IPI will we issued to reprogram + * it as a last resort. + */ + if (!hrtimer_base_is_online(this_cpu_base)) + return true; + expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); - return expires < new_base->cpu_base->expires_next; + + return expires >= new_base->cpu_base->expires_next; } -static inline -struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, - int pinned) +static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned) { + if (!hrtimer_base_is_online(base)) { + int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER)); + + return &per_cpu(hrtimer_bases, cpu); + } + #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) if (static_branch_likely(&timers_migration_enabled) && !pinned) return &per_cpu(hrtimer_bases, get_nohz_timer_target()); @@ -254,8 +287,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, raw_spin_unlock(&base->cpu_base->lock); raw_spin_lock(&new_base->cpu_base->lock); - if (new_cpu_base != this_cpu_base && - hrtimer_check_target(timer, new_base)) { + if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, + this_cpu_base)) { raw_spin_unlock(&new_base->cpu_base->lock); raw_spin_lock(&base->cpu_base->lock); new_cpu_base = this_cpu_base; @@ -264,8 +297,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, } WRITE_ONCE(timer->base, new_base); } else { - if (new_cpu_base != this_cpu_base && - hrtimer_check_target(timer, new_base)) { + if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) { new_cpu_base = this_cpu_base; goto again; } @@ -275,11 +307,6 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, #else /* CONFIG_SMP */ -static inline bool is_migration_base(struct hrtimer_clock_base *base) -{ - return false; -} - static inline struct hrtimer_clock_base * lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) __acquires(&timer->base->cpu_base->lock) @@ -716,8 +743,6 @@ static inline int hrtimer_is_hres_enabled(void) return hrtimer_hres_enabled; } -static void retrigger_next_event(void *arg); - /* * Switch to high resolution mode */ @@ -1205,6 +1230,7 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode, struct hrtimer_clock_base *base) { + struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases); struct hrtimer_clock_base *new_base; bool force_local, first; @@ -1216,9 +1242,15 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * and enforce reprogramming after it is queued no matter whether * it is the new first expiring timer again or not. */ - force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases); + force_local = base->cpu_base == this_cpu_base; force_local &= base->cpu_base->next_timer == timer; + /* + * Don't force local queuing if this enqueue happens on a unplugged + * CPU after hrtimer_cpu_dying() has been invoked. + */ + force_local &= this_cpu_base->online; + /* * Remove an active timer from the queue. In case it is not queued * on the current CPU, make sure that remove_hrtimer() updates the @@ -1248,8 +1280,27 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, } first = enqueue_hrtimer(timer, new_base, mode); - if (!force_local) - return first; + if (!force_local) { + /* + * If the current CPU base is online, then the timer is + * never queued on a remote CPU if it would be the first + * expiring timer there. + */ + if (hrtimer_base_is_online(this_cpu_base)) + return first; + + /* + * Timer was enqueued remote because the current base is + * already offline. If the timer is the first to expire, + * kick the remote CPU to reprogram the clock event. + */ + if (first) { + struct hrtimer_cpu_base *new_cpu_base = new_base->cpu_base; + + smp_call_function_single_async(new_cpu_base->cpu, &new_cpu_base->csd); + } + return 0; + } /* * Timer was forced to stay on the current CPU to avoid @@ -1370,6 +1421,18 @@ static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, } } +#ifdef CONFIG_SMP +static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) +{ + return base == &migration_base; +} +#else +static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) +{ + return false; +} +#endif + /* * This function is called on PREEMPT_RT kernels when the fast path * deletion of a timer failed because the timer callback function was diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 775966cf6114e..1af972a92d06f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2456,22 +2456,8 @@ config TEST_BITMAP config TEST_UUID tristate "Test functions located in the uuid module at runtime" -config XARRAY_KUNIT - tristate "KUnit test XArray code at runtime" if !KUNIT_ALL_TESTS - depends on KUNIT - default KUNIT_ALL_TESTS - help - Enable this option to test the Xarray code at boot. - - KUnit tests run during boot and output the results to the debug log - in TAP format (http://testanything.org/). Only useful for kernel devs - running the KUnit test harness, and not intended for inclusion into a - production build. - - For more information on KUnit and unit tests in general please refer - to the KUnit documentation in Documentation/dev-tools/kunit/. - - If unsure, say N. +config TEST_XARRAY + tristate "Test the XArray code at runtime" config TEST_MAPLE_TREE tristate "Test the Maple Tree code at runtime or module load" diff --git a/lib/Makefile b/lib/Makefile index f1c6e9d76a7c0..d5cfc7afbbb82 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -94,6 +94,7 @@ GCOV_PROFILE_test_bitmap.o := n endif obj-$(CONFIG_TEST_UUID) += test_uuid.o +obj-$(CONFIG_TEST_XARRAY) += test_xarray.o obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o @@ -372,7 +373,6 @@ CFLAGS_bitfield_kunit.o := $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o obj-$(CONFIG_CHECKSUM_KUNIT) += checksum_kunit.o obj-$(CONFIG_UTIL_MACROS_KUNIT) += util_macros_kunit.o -obj-$(CONFIG_XARRAY_KUNIT) += test_xarray.o obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o obj-$(CONFIG_HASHTABLE_KUNIT_TEST) += hashtable_test.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c index c40818ec9c180..fbe910c9c8253 100644 --- a/lib/stackinit_kunit.c +++ b/lib/stackinit_kunit.c @@ -47,10 +47,12 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, #define DO_NOTHING_TYPE_SCALAR(var_type) var_type #define DO_NOTHING_TYPE_STRING(var_type) void #define DO_NOTHING_TYPE_STRUCT(var_type) void +#define DO_NOTHING_TYPE_UNION(var_type) void #define DO_NOTHING_RETURN_SCALAR(ptr) *(ptr) #define DO_NOTHING_RETURN_STRING(ptr) /**/ #define DO_NOTHING_RETURN_STRUCT(ptr) /**/ +#define DO_NOTHING_RETURN_UNION(ptr) /**/ #define DO_NOTHING_CALL_SCALAR(var, name) \ (var) = do_nothing_ ## name(&(var)) @@ -58,10 +60,13 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, do_nothing_ ## name(var) #define DO_NOTHING_CALL_STRUCT(var, name) \ do_nothing_ ## name(&(var)) +#define DO_NOTHING_CALL_UNION(var, name) \ + do_nothing_ ## name(&(var)) #define FETCH_ARG_SCALAR(var) &var #define FETCH_ARG_STRING(var) var #define FETCH_ARG_STRUCT(var) &var +#define FETCH_ARG_UNION(var) &var /* * On m68k, if the leaf function test variable is longer than 8 bytes, @@ -77,6 +82,7 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, #define INIT_CLONE_SCALAR /**/ #define INIT_CLONE_STRING [FILL_SIZE_STRING] #define INIT_CLONE_STRUCT /**/ +#define INIT_CLONE_UNION /**/ #define ZERO_CLONE_SCALAR(zero) memset(&(zero), 0x00, sizeof(zero)) #define ZERO_CLONE_STRING(zero) memset(&(zero), 0x00, sizeof(zero)) @@ -92,6 +98,7 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, zero.three = 0; \ zero.four = 0; \ } while (0) +#define ZERO_CLONE_UNION(zero) ZERO_CLONE_STRUCT(zero) #define INIT_SCALAR_none(var_type) /**/ #define INIT_SCALAR_zero(var_type) = 0 @@ -101,6 +108,7 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, #define INIT_STRUCT_none(var_type) /**/ #define INIT_STRUCT_zero(var_type) = { } +#define INIT_STRUCT_old_zero(var_type) = { 0 } #define __static_partial { .two = 0, } @@ -146,6 +154,34 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, #define INIT_STRUCT_assigned_copy(var_type) \ ; var = *(arg) +/* Union initialization is the same as structs. */ +#define INIT_UNION_none(var_type) INIT_STRUCT_none(var_type) +#define INIT_UNION_zero(var_type) INIT_STRUCT_zero(var_type) +#define INIT_UNION_old_zero(var_type) INIT_STRUCT_old_zero(var_type) + +#define INIT_UNION_static_partial(var_type) \ + INIT_STRUCT_static_partial(var_type) +#define INIT_UNION_static_all(var_type) \ + INIT_STRUCT_static_all(var_type) +#define INIT_UNION_dynamic_partial(var_type) \ + INIT_STRUCT_dynamic_partial(var_type) +#define INIT_UNION_dynamic_all(var_type) \ + INIT_STRUCT_dynamic_all(var_type) +#define INIT_UNION_runtime_partial(var_type) \ + INIT_STRUCT_runtime_partial(var_type) +#define INIT_UNION_runtime_all(var_type) \ + INIT_STRUCT_runtime_all(var_type) +#define INIT_UNION_assigned_static_partial(var_type) \ + INIT_STRUCT_assigned_static_partial(var_type) +#define INIT_UNION_assigned_static_all(var_type) \ + INIT_STRUCT_assigned_static_all(var_type) +#define INIT_UNION_assigned_dynamic_partial(var_type) \ + INIT_STRUCT_assigned_dynamic_partial(var_type) +#define INIT_UNION_assigned_dynamic_all(var_type) \ + INIT_STRUCT_assigned_dynamic_all(var_type) +#define INIT_UNION_assigned_copy(var_type) \ + INIT_STRUCT_assigned_copy(var_type) + /* * @name: unique string name for the test * @var_type: type to be tested for zeroing initialization @@ -294,6 +330,33 @@ struct test_user { unsigned long four; }; +/* No padding: all members are the same size. */ +union test_same_sizes { + unsigned long one; + unsigned long two; + unsigned long three; + unsigned long four; +}; + +/* Mismatched sizes, with one and two being small */ +union test_small_start { + char one:1; + char two; + short three; + unsigned long four; + struct big_struct { + unsigned long array[8]; + } big; +}; + +/* Mismatched sizes, with one and two being small */ +union test_small_end { + short one; + unsigned long two; + char three:1; + char four; +}; + #define ALWAYS_PASS WANT_SUCCESS #define ALWAYS_FAIL XFAIL @@ -332,6 +395,11 @@ struct test_user { struct test_ ## name, STRUCT, init, \ xfail) +#define DEFINE_UNION_TEST(name, init, xfail) \ + DEFINE_TEST(name ## _ ## init, \ + union test_ ## name, STRUCT, init, \ + xfail) + #define DEFINE_STRUCT_TESTS(init, xfail) \ DEFINE_STRUCT_TEST(small_hole, init, xfail); \ DEFINE_STRUCT_TEST(big_hole, init, xfail); \ @@ -343,9 +411,22 @@ struct test_user { xfail); \ DEFINE_STRUCT_TESTS(base ## _ ## all, xfail) +#define DEFINE_UNION_INITIALIZER_TESTS(base, xfail) \ + DEFINE_UNION_TESTS(base ## _ ## partial, \ + xfail); \ + DEFINE_UNION_TESTS(base ## _ ## all, xfail) + +#define DEFINE_UNION_TESTS(init, xfail) \ + DEFINE_UNION_TEST(same_sizes, init, xfail); \ + DEFINE_UNION_TEST(small_start, init, xfail); \ + DEFINE_UNION_TEST(small_end, init, xfail); + /* These should be fully initialized all the time! */ DEFINE_SCALAR_TESTS(zero, ALWAYS_PASS); DEFINE_STRUCT_TESTS(zero, ALWAYS_PASS); +DEFINE_STRUCT_TESTS(old_zero, ALWAYS_PASS); +DEFINE_UNION_TESTS(zero, ALWAYS_PASS); +DEFINE_UNION_TESTS(old_zero, ALWAYS_PASS); /* Struct initializers: padding may be left uninitialized. */ DEFINE_STRUCT_INITIALIZER_TESTS(static, STRONG_PASS); DEFINE_STRUCT_INITIALIZER_TESTS(dynamic, STRONG_PASS); @@ -353,6 +434,12 @@ DEFINE_STRUCT_INITIALIZER_TESTS(runtime, STRONG_PASS); DEFINE_STRUCT_INITIALIZER_TESTS(assigned_static, STRONG_PASS); DEFINE_STRUCT_INITIALIZER_TESTS(assigned_dynamic, STRONG_PASS); DEFINE_STRUCT_TESTS(assigned_copy, ALWAYS_FAIL); +DEFINE_UNION_INITIALIZER_TESTS(static, STRONG_PASS); +DEFINE_UNION_INITIALIZER_TESTS(dynamic, STRONG_PASS); +DEFINE_UNION_INITIALIZER_TESTS(runtime, STRONG_PASS); +DEFINE_UNION_INITIALIZER_TESTS(assigned_static, STRONG_PASS); +DEFINE_UNION_INITIALIZER_TESTS(assigned_dynamic, STRONG_PASS); +DEFINE_UNION_TESTS(assigned_copy, ALWAYS_FAIL); /* No initialization without compiler instrumentation. */ DEFINE_SCALAR_TESTS(none, STRONG_PASS); DEFINE_STRUCT_TESTS(none, BYREF_PASS); @@ -436,13 +523,23 @@ DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, ALWAYS_FAIL); KUNIT_CASE(test_trailing_hole_ ## init),\ KUNIT_CASE(test_packed_ ## init) \ +#define KUNIT_test_unions(init) \ + KUNIT_CASE(test_same_sizes_ ## init), \ + KUNIT_CASE(test_small_start_ ## init), \ + KUNIT_CASE(test_small_end_ ## init) \ + static struct kunit_case stackinit_test_cases[] = { /* These are explicitly initialized and should always pass. */ KUNIT_test_scalars(zero), KUNIT_test_structs(zero), + KUNIT_test_structs(old_zero), + KUNIT_test_unions(zero), + KUNIT_test_unions(old_zero), /* Padding here appears to be accidentally always initialized? */ KUNIT_test_structs(dynamic_partial), KUNIT_test_structs(assigned_dynamic_partial), + KUNIT_test_unions(dynamic_partial), + KUNIT_test_unions(assigned_dynamic_partial), /* Padding initialization depends on compiler behaviors. */ KUNIT_test_structs(static_partial), KUNIT_test_structs(static_all), @@ -452,8 +549,17 @@ static struct kunit_case stackinit_test_cases[] = { KUNIT_test_structs(assigned_static_partial), KUNIT_test_structs(assigned_static_all), KUNIT_test_structs(assigned_dynamic_all), + KUNIT_test_unions(static_partial), + KUNIT_test_unions(static_all), + KUNIT_test_unions(dynamic_all), + KUNIT_test_unions(runtime_partial), + KUNIT_test_unions(runtime_all), + KUNIT_test_unions(assigned_static_partial), + KUNIT_test_unions(assigned_static_all), + KUNIT_test_unions(assigned_dynamic_all), /* Everything fails this since it effectively performs a memcpy(). */ KUNIT_test_structs(assigned_copy), + KUNIT_test_unions(assigned_copy), /* STRUCTLEAK_BYREF_ALL should cover everything from here down. */ KUNIT_test_scalars(none), KUNIT_CASE(test_switch_1_none), diff --git a/lib/test_xarray.c b/lib/test_xarray.c index eab5971d0a481..6932a26f4927c 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -6,10 +6,11 @@ * Author: Matthew Wilcox */ -#include - -#include #include +#include + +static unsigned int tests_run; +static unsigned int tests_passed; static const unsigned int order_limit = IS_ENABLED(CONFIG_XARRAY_MULTI) ? BITS_PER_LONG : 1; @@ -19,12 +20,15 @@ static const unsigned int order_limit = void xa_dump(const struct xarray *xa) { } # endif #undef XA_BUG_ON -#define XA_BUG_ON(xa, x) do { \ - if (x) { \ - KUNIT_FAIL(test, #x); \ - xa_dump(xa); \ - dump_stack(); \ - } \ +#define XA_BUG_ON(xa, x) do { \ + tests_run++; \ + if (x) { \ + printk("BUG at %s:%d\n", __func__, __LINE__); \ + xa_dump(xa); \ + dump_stack(); \ + } else { \ + tests_passed++; \ + } \ } while (0) #endif @@ -38,13 +42,13 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp) return xa_store(xa, index, xa_mk_index(index), gfp); } -static void xa_insert_index(struct kunit *test, struct xarray *xa, unsigned long index) +static void xa_insert_index(struct xarray *xa, unsigned long index) { XA_BUG_ON(xa, xa_insert(xa, index, xa_mk_index(index), GFP_KERNEL) != 0); } -static void xa_alloc_index(struct kunit *test, struct xarray *xa, unsigned long index, gfp_t gfp) +static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp) { u32 id; @@ -53,7 +57,7 @@ static void xa_alloc_index(struct kunit *test, struct xarray *xa, unsigned long XA_BUG_ON(xa, id != index); } -static void xa_erase_index(struct kunit *test, struct xarray *xa, unsigned long index) +static void xa_erase_index(struct xarray *xa, unsigned long index) { XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_index(index)); XA_BUG_ON(xa, xa_load(xa, index) != NULL); @@ -79,15 +83,8 @@ static void *xa_store_order(struct xarray *xa, unsigned long index, return curr; } -static inline struct xarray *xa_param(struct kunit *test) +static noinline void check_xa_err(struct xarray *xa) { - return *(struct xarray **)test->param_value; -} - -static noinline void check_xa_err(struct kunit *test) -{ - struct xarray *xa = xa_param(test); - XA_BUG_ON(xa, xa_err(xa_store_index(xa, 0, GFP_NOWAIT)) != 0); XA_BUG_ON(xa, xa_err(xa_erase(xa, 0)) != 0); #ifndef __KERNEL__ @@ -102,10 +99,8 @@ static noinline void check_xa_err(struct kunit *test) // XA_BUG_ON(xa, xa_err(xa_store(xa, 0, xa_mk_internal(0), 0)) != -EINVAL); } -static noinline void check_xas_retry(struct kunit *test) +static noinline void check_xas_retry(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; @@ -114,7 +109,7 @@ static noinline void check_xas_retry(struct kunit *test) rcu_read_lock(); XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_value(0)); - xa_erase_index(test, xa, 1); + xa_erase_index(xa, 1); XA_BUG_ON(xa, !xa_is_retry(xas_reload(&xas))); XA_BUG_ON(xa, xas_retry(&xas, NULL)); XA_BUG_ON(xa, xas_retry(&xas, xa_mk_value(0))); @@ -145,14 +140,12 @@ static noinline void check_xas_retry(struct kunit *test) } xas_unlock(&xas); - xa_erase_index(test, xa, 0); - xa_erase_index(test, xa, 1); + xa_erase_index(xa, 0); + xa_erase_index(xa, 1); } -static noinline void check_xa_load(struct kunit *test) +static noinline void check_xa_load(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long i, j; for (i = 0; i < 1024; i++) { @@ -174,15 +167,13 @@ static noinline void check_xa_load(struct kunit *test) else XA_BUG_ON(xa, entry); } - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); } XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_xa_mark_1(struct kunit *test, unsigned long index) +static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index) { - struct xarray *xa = xa_param(test); - unsigned int order; unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 8 : 1; @@ -202,7 +193,7 @@ static noinline void check_xa_mark_1(struct kunit *test, unsigned long index) XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_1)); /* Storing NULL clears marks, and they can't be set again */ - xa_erase_index(test, xa, index); + xa_erase_index(xa, index); XA_BUG_ON(xa, !xa_empty(xa)); XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0)); xa_set_mark(xa, index, XA_MARK_0); @@ -253,17 +244,15 @@ static noinline void check_xa_mark_1(struct kunit *test, unsigned long index) XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_0)); XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_1)); XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_2)); - xa_erase_index(test, xa, index); - xa_erase_index(test, xa, next); + xa_erase_index(xa, index); + xa_erase_index(xa, next); XA_BUG_ON(xa, !xa_empty(xa)); } XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_xa_mark_2(struct kunit *test) +static noinline void check_xa_mark_2(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); unsigned long index; unsigned int count = 0; @@ -300,11 +289,9 @@ static noinline void check_xa_mark_2(struct kunit *test) xa_destroy(xa); } -static noinline void check_xa_mark_3(struct kunit *test) +static noinline void check_xa_mark_3(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0x41); void *entry; int count = 0; @@ -323,21 +310,19 @@ static noinline void check_xa_mark_3(struct kunit *test) #endif } -static noinline void check_xa_mark(struct kunit *test) +static noinline void check_xa_mark(struct xarray *xa) { unsigned long index; for (index = 0; index < 16384; index += 4) - check_xa_mark_1(test, index); + check_xa_mark_1(xa, index); - check_xa_mark_2(test); - check_xa_mark_3(test); + check_xa_mark_2(xa); + check_xa_mark_3(xa); } -static noinline void check_xa_shrink(struct kunit *test) +static noinline void check_xa_shrink(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 1); struct xa_node *node; unsigned int order; @@ -362,7 +347,7 @@ static noinline void check_xa_shrink(struct kunit *test) XA_BUG_ON(xa, xas_load(&xas) != NULL); xas_unlock(&xas); XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0)); - xa_erase_index(test, xa, 0); + xa_erase_index(xa, 0); XA_BUG_ON(xa, !xa_empty(xa)); for (order = 0; order < max_order; order++) { @@ -379,49 +364,45 @@ static noinline void check_xa_shrink(struct kunit *test) XA_BUG_ON(xa, xa_head(xa) == node); rcu_read_unlock(); XA_BUG_ON(xa, xa_load(xa, max + 1) != NULL); - xa_erase_index(test, xa, ULONG_MAX); + xa_erase_index(xa, ULONG_MAX); XA_BUG_ON(xa, xa->xa_head != node); - xa_erase_index(test, xa, 0); + xa_erase_index(xa, 0); } } -static noinline void check_insert(struct kunit *test) +static noinline void check_insert(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long i; for (i = 0; i < 1024; i++) { - xa_insert_index(test, xa, i); + xa_insert_index(xa, i); XA_BUG_ON(xa, xa_load(xa, i - 1) != NULL); XA_BUG_ON(xa, xa_load(xa, i + 1) != NULL); - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); } for (i = 10; i < BITS_PER_LONG; i++) { - xa_insert_index(test, xa, 1UL << i); + xa_insert_index(xa, 1UL << i); XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 1) != NULL); XA_BUG_ON(xa, xa_load(xa, (1UL << i) + 1) != NULL); - xa_erase_index(test, xa, 1UL << i); + xa_erase_index(xa, 1UL << i); - xa_insert_index(test, xa, (1UL << i) - 1); + xa_insert_index(xa, (1UL << i) - 1); XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 2) != NULL); XA_BUG_ON(xa, xa_load(xa, 1UL << i) != NULL); - xa_erase_index(test, xa, (1UL << i) - 1); + xa_erase_index(xa, (1UL << i) - 1); } - xa_insert_index(test, xa, ~0UL); + xa_insert_index(xa, ~0UL); XA_BUG_ON(xa, xa_load(xa, 0UL) != NULL); XA_BUG_ON(xa, xa_load(xa, ~1UL) != NULL); - xa_erase_index(test, xa, ~0UL); + xa_erase_index(xa, ~0UL); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_cmpxchg(struct kunit *test) +static noinline void check_cmpxchg(struct xarray *xa) { - struct xarray *xa = xa_param(test); - void *FIVE = xa_mk_value(5); void *SIX = xa_mk_value(6); void *LOTS = xa_mk_value(12345678); @@ -437,16 +418,14 @@ static noinline void check_cmpxchg(struct kunit *test) XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) != -EBUSY); XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != FIVE); XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) == -EBUSY); - xa_erase_index(test, xa, 12345678); - xa_erase_index(test, xa, 5); + xa_erase_index(xa, 12345678); + xa_erase_index(xa, 5); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_cmpxchg_order(struct kunit *test) +static noinline void check_cmpxchg_order(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - void *FIVE = xa_mk_value(5); unsigned int i, order = 3; @@ -497,10 +476,8 @@ static noinline void check_cmpxchg_order(struct kunit *test) #endif } -static noinline void check_reserve(struct kunit *test) +static noinline void check_reserve(struct xarray *xa) { - struct xarray *xa = xa_param(test); - void *entry; unsigned long index; int count; @@ -517,7 +494,7 @@ static noinline void check_reserve(struct kunit *test) XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0); XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_NOWAIT) != NULL); xa_release(xa, 12345678); - xa_erase_index(test, xa, 12345678); + xa_erase_index(xa, 12345678); XA_BUG_ON(xa, !xa_empty(xa)); /* cmpxchg sees a reserved entry as ZERO */ @@ -525,7 +502,7 @@ static noinline void check_reserve(struct kunit *test) XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, XA_ZERO_ENTRY, xa_mk_value(12345678), GFP_NOWAIT) != NULL); xa_release(xa, 12345678); - xa_erase_index(test, xa, 12345678); + xa_erase_index(xa, 12345678); XA_BUG_ON(xa, !xa_empty(xa)); /* xa_insert treats it as busy */ @@ -565,10 +542,8 @@ static noinline void check_reserve(struct kunit *test) xa_destroy(xa); } -static noinline void check_xas_erase(struct kunit *test) +static noinline void check_xas_erase(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; unsigned long i, j; @@ -606,11 +581,9 @@ static noinline void check_xas_erase(struct kunit *test) } #ifdef CONFIG_XARRAY_MULTI -static noinline void check_multi_store_1(struct kunit *test, unsigned long index, +static noinline void check_multi_store_1(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, index); unsigned long min = index & ~((1UL << order) - 1); unsigned long max = min + (1UL << order); @@ -629,15 +602,13 @@ static noinline void check_multi_store_1(struct kunit *test, unsigned long index XA_BUG_ON(xa, xa_load(xa, max) != NULL); XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL); - xa_erase_index(test, xa, min); + xa_erase_index(xa, min); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_multi_store_2(struct kunit *test, unsigned long index, +static noinline void check_multi_store_2(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, index); xa_store_order(xa, index, order, xa_mk_value(0), GFP_KERNEL); @@ -649,11 +620,9 @@ static noinline void check_multi_store_2(struct kunit *test, unsigned long index XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_multi_store_3(struct kunit *test, unsigned long index, +static noinline void check_multi_store_3(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; int n = 0; @@ -678,11 +647,9 @@ static noinline void check_multi_store_3(struct kunit *test, unsigned long index } #endif -static noinline void check_multi_store(struct kunit *test) +static noinline void check_multi_store(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - unsigned long i, j, k; unsigned int max_order = (sizeof(long) == 4) ? 30 : 60; @@ -747,28 +714,26 @@ static noinline void check_multi_store(struct kunit *test) } for (i = 0; i < 20; i++) { - check_multi_store_1(test, 200, i); - check_multi_store_1(test, 0, i); - check_multi_store_1(test, (1UL << i) + 1, i); + check_multi_store_1(xa, 200, i); + check_multi_store_1(xa, 0, i); + check_multi_store_1(xa, (1UL << i) + 1, i); } - check_multi_store_2(test, 4095, 9); + check_multi_store_2(xa, 4095, 9); for (i = 1; i < 20; i++) { - check_multi_store_3(test, 0, i); - check_multi_store_3(test, 1UL << i, i); + check_multi_store_3(xa, 0, i); + check_multi_store_3(xa, 1UL << i, i); } #endif } #ifdef CONFIG_XARRAY_MULTI /* mimics page cache __filemap_add_folio() */ -static noinline void check_xa_multi_store_adv_add(struct kunit *test, +static noinline void check_xa_multi_store_adv_add(struct xarray *xa, unsigned long index, unsigned int order, void *p) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, index); unsigned int nrpages = 1UL << order; @@ -796,12 +761,10 @@ static noinline void check_xa_multi_store_adv_add(struct kunit *test, } /* mimics page_cache_delete() */ -static noinline void check_xa_multi_store_adv_del_entry(struct kunit *test, +static noinline void check_xa_multi_store_adv_del_entry(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, index); xas_set_order(&xas, index, order); @@ -809,14 +772,12 @@ static noinline void check_xa_multi_store_adv_del_entry(struct kunit *test, xas_init_marks(&xas); } -static noinline void check_xa_multi_store_adv_delete(struct kunit *test, +static noinline void check_xa_multi_store_adv_delete(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - xa_lock_irq(xa); - check_xa_multi_store_adv_del_entry(test, index, order); + check_xa_multi_store_adv_del_entry(xa, index, order); xa_unlock_irq(xa); } @@ -853,12 +814,10 @@ static unsigned long some_val = 0xdeadbeef; static unsigned long some_val_2 = 0xdeaddead; /* mimics the page cache usage */ -static noinline void check_xa_multi_store_adv(struct kunit *test, +static noinline void check_xa_multi_store_adv(struct xarray *xa, unsigned long pos, unsigned int order) { - struct xarray *xa = xa_param(test); - unsigned int nrpages = 1UL << order; unsigned long index, base, next_index, next_next_index; unsigned int i; @@ -868,7 +827,7 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, next_index = round_down(base + nrpages, nrpages); next_next_index = round_down(next_index + nrpages, nrpages); - check_xa_multi_store_adv_add(test, base, order, &some_val); + check_xa_multi_store_adv_add(xa, base, order, &some_val); for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, base + i) != &some_val); @@ -876,20 +835,20 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, XA_BUG_ON(xa, test_get_entry(xa, next_index) != NULL); /* Use order 0 for the next item */ - check_xa_multi_store_adv_add(test, next_index, 0, &some_val_2); + check_xa_multi_store_adv_add(xa, next_index, 0, &some_val_2); XA_BUG_ON(xa, test_get_entry(xa, next_index) != &some_val_2); /* Remove the next item */ - check_xa_multi_store_adv_delete(test, next_index, 0); + check_xa_multi_store_adv_delete(xa, next_index, 0); /* Now use order for a new pointer */ - check_xa_multi_store_adv_add(test, next_index, order, &some_val_2); + check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2); for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, next_index + i) != &some_val_2); - check_xa_multi_store_adv_delete(test, next_index, order); - check_xa_multi_store_adv_delete(test, base, order); + check_xa_multi_store_adv_delete(xa, next_index, order); + check_xa_multi_store_adv_delete(xa, base, order); XA_BUG_ON(xa, !xa_empty(xa)); /* starting fresh again */ @@ -897,7 +856,7 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, /* let's test some holes now */ /* hole at base and next_next */ - check_xa_multi_store_adv_add(test, next_index, order, &some_val_2); + check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2); for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, base + i) != NULL); @@ -908,12 +867,12 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, next_next_index + i) != NULL); - check_xa_multi_store_adv_delete(test, next_index, order); + check_xa_multi_store_adv_delete(xa, next_index, order); XA_BUG_ON(xa, !xa_empty(xa)); /* hole at base and next */ - check_xa_multi_store_adv_add(test, next_next_index, order, &some_val_2); + check_xa_multi_store_adv_add(xa, next_next_index, order, &some_val_2); for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, base + i) != NULL); @@ -924,12 +883,12 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, next_next_index + i) != &some_val_2); - check_xa_multi_store_adv_delete(test, next_next_index, order); + check_xa_multi_store_adv_delete(xa, next_next_index, order); XA_BUG_ON(xa, !xa_empty(xa)); } #endif -static noinline void check_multi_store_advanced(struct kunit *test) +static noinline void check_multi_store_advanced(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; @@ -941,59 +900,59 @@ static noinline void check_multi_store_advanced(struct kunit *test) */ for (pos = 7; pos < end; pos = (pos * pos) + 564) { for (i = 0; i < max_order; i++) { - check_xa_multi_store_adv(test, pos, i); - check_xa_multi_store_adv(test, pos + 157, i); + check_xa_multi_store_adv(xa, pos, i); + check_xa_multi_store_adv(xa, pos + 157, i); } } #endif } -static noinline void check_xa_alloc_1(struct kunit *test, struct xarray *xa, unsigned int base) +static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base) { int i; u32 id; XA_BUG_ON(xa, !xa_empty(xa)); /* An empty array should assign %base to the first alloc */ - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_alloc_index(xa, base, GFP_KERNEL); /* Erasing it should make the array empty again */ - xa_erase_index(test, xa, base); + xa_erase_index(xa, base); XA_BUG_ON(xa, !xa_empty(xa)); /* And it should assign %base again */ - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_alloc_index(xa, base, GFP_KERNEL); /* Allocating and then erasing a lot should not lose base */ for (i = base + 1; i < 2 * XA_CHUNK_SIZE; i++) - xa_alloc_index(test, xa, i, GFP_KERNEL); + xa_alloc_index(xa, i, GFP_KERNEL); for (i = base; i < 2 * XA_CHUNK_SIZE; i++) - xa_erase_index(test, xa, i); - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_erase_index(xa, i); + xa_alloc_index(xa, base, GFP_KERNEL); /* Destroying the array should do the same as erasing */ xa_destroy(xa); /* And it should assign %base again */ - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_alloc_index(xa, base, GFP_KERNEL); /* The next assigned ID should be base+1 */ - xa_alloc_index(test, xa, base + 1, GFP_KERNEL); - xa_erase_index(test, xa, base + 1); + xa_alloc_index(xa, base + 1, GFP_KERNEL); + xa_erase_index(xa, base + 1); /* Storing a value should mark it used */ xa_store_index(xa, base + 1, GFP_KERNEL); - xa_alloc_index(test, xa, base + 2, GFP_KERNEL); + xa_alloc_index(xa, base + 2, GFP_KERNEL); /* If we then erase base, it should be free */ - xa_erase_index(test, xa, base); - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_erase_index(xa, base); + xa_alloc_index(xa, base, GFP_KERNEL); - xa_erase_index(test, xa, base + 1); - xa_erase_index(test, xa, base + 2); + xa_erase_index(xa, base + 1); + xa_erase_index(xa, base + 2); for (i = 1; i < 5000; i++) { - xa_alloc_index(test, xa, base + i, GFP_KERNEL); + xa_alloc_index(xa, base + i, GFP_KERNEL); } xa_destroy(xa); @@ -1016,14 +975,14 @@ static noinline void check_xa_alloc_1(struct kunit *test, struct xarray *xa, uns XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5), GFP_KERNEL) != -EBUSY); - XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != 0); XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5), GFP_KERNEL) != -EBUSY); - xa_erase_index(test, xa, 3); + xa_erase_index(xa, 3); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, unsigned int base) +static noinline void check_xa_alloc_2(struct xarray *xa, unsigned int base) { unsigned int i, id; unsigned long index; @@ -1059,7 +1018,7 @@ static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, uns XA_BUG_ON(xa, id != 5); xa_for_each(xa, index, entry) { - xa_erase_index(test, xa, index); + xa_erase_index(xa, index); } for (i = base; i < base + 9; i++) { @@ -1074,7 +1033,7 @@ static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, uns xa_destroy(xa); } -static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, unsigned int base) +static noinline void check_xa_alloc_3(struct xarray *xa, unsigned int base) { struct xa_limit limit = XA_LIMIT(1, 0x3fff); u32 next = 0; @@ -1090,8 +1049,8 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(0x3ffd), limit, &next, GFP_KERNEL) != 0); XA_BUG_ON(xa, id != 0x3ffd); - xa_erase_index(test, xa, 0x3ffd); - xa_erase_index(test, xa, 1); + xa_erase_index(xa, 0x3ffd); + xa_erase_index(xa, 1); XA_BUG_ON(xa, !xa_empty(xa)); for (i = 0x3ffe; i < 0x4003; i++) { @@ -1106,8 +1065,8 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns /* Check wrap-around is handled correctly */ if (base != 0) - xa_erase_index(test, xa, base); - xa_erase_index(test, xa, base + 1); + xa_erase_index(xa, base); + xa_erase_index(xa, base + 1); next = UINT_MAX; XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(UINT_MAX), xa_limit_32b, &next, GFP_KERNEL) != 0); @@ -1120,7 +1079,7 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns XA_BUG_ON(xa, id != base + 1); xa_for_each(xa, index, entry) - xa_erase_index(test, xa, index); + xa_erase_index(xa, index); XA_BUG_ON(xa, !xa_empty(xa)); } @@ -1128,21 +1087,19 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns static DEFINE_XARRAY_ALLOC(xa0); static DEFINE_XARRAY_ALLOC1(xa1); -static noinline void check_xa_alloc(struct kunit *test) +static noinline void check_xa_alloc(void) { - check_xa_alloc_1(test, &xa0, 0); - check_xa_alloc_1(test, &xa1, 1); - check_xa_alloc_2(test, &xa0, 0); - check_xa_alloc_2(test, &xa1, 1); - check_xa_alloc_3(test, &xa0, 0); - check_xa_alloc_3(test, &xa1, 1); + check_xa_alloc_1(&xa0, 0); + check_xa_alloc_1(&xa1, 1); + check_xa_alloc_2(&xa0, 0); + check_xa_alloc_2(&xa1, 1); + check_xa_alloc_3(&xa0, 0); + check_xa_alloc_3(&xa1, 1); } -static noinline void __check_store_iter(struct kunit *test, unsigned long start, +static noinline void __check_store_iter(struct xarray *xa, unsigned long start, unsigned int order, unsigned int present) { - struct xarray *xa = xa_param(test); - XA_STATE_ORDER(xas, xa, start, order); void *entry; unsigned int count = 0; @@ -1166,54 +1123,50 @@ static noinline void __check_store_iter(struct kunit *test, unsigned long start, XA_BUG_ON(xa, xa_load(xa, start) != xa_mk_index(start)); XA_BUG_ON(xa, xa_load(xa, start + (1UL << order) - 1) != xa_mk_index(start)); - xa_erase_index(test, xa, start); + xa_erase_index(xa, start); } -static noinline void check_store_iter(struct kunit *test) +static noinline void check_store_iter(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int i, j; unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; for (i = 0; i < max_order; i++) { unsigned int min = 1 << i; unsigned int max = (2 << i) - 1; - __check_store_iter(test, 0, i, 0); + __check_store_iter(xa, 0, i, 0); XA_BUG_ON(xa, !xa_empty(xa)); - __check_store_iter(test, min, i, 0); + __check_store_iter(xa, min, i, 0); XA_BUG_ON(xa, !xa_empty(xa)); xa_store_index(xa, min, GFP_KERNEL); - __check_store_iter(test, min, i, 1); + __check_store_iter(xa, min, i, 1); XA_BUG_ON(xa, !xa_empty(xa)); xa_store_index(xa, max, GFP_KERNEL); - __check_store_iter(test, min, i, 1); + __check_store_iter(xa, min, i, 1); XA_BUG_ON(xa, !xa_empty(xa)); for (j = 0; j < min; j++) xa_store_index(xa, j, GFP_KERNEL); - __check_store_iter(test, 0, i, min); + __check_store_iter(xa, 0, i, min); XA_BUG_ON(xa, !xa_empty(xa)); for (j = 0; j < min; j++) xa_store_index(xa, min + j, GFP_KERNEL); - __check_store_iter(test, min, i, min); + __check_store_iter(xa, min, i, min); XA_BUG_ON(xa, !xa_empty(xa)); } #ifdef CONFIG_XARRAY_MULTI xa_store_index(xa, 63, GFP_KERNEL); xa_store_index(xa, 65, GFP_KERNEL); - __check_store_iter(test, 64, 2, 1); - xa_erase_index(test, xa, 63); + __check_store_iter(xa, 64, 2, 1); + xa_erase_index(xa, 63); #endif XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_multi_find_1(struct kunit *test, unsigned int order) +static noinline void check_multi_find_1(struct xarray *xa, unsigned order) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - unsigned long multi = 3 << order; unsigned long next = 4 << order; unsigned long index; @@ -1236,17 +1189,15 @@ static noinline void check_multi_find_1(struct kunit *test, unsigned int order) XA_BUG_ON(xa, xa_find_after(xa, &index, next, XA_PRESENT) != NULL); XA_BUG_ON(xa, index != next); - xa_erase_index(test, xa, multi); - xa_erase_index(test, xa, next); - xa_erase_index(test, xa, next + 1); + xa_erase_index(xa, multi); + xa_erase_index(xa, next); + xa_erase_index(xa, next + 1); XA_BUG_ON(xa, !xa_empty(xa)); #endif } -static noinline void check_multi_find_2(struct kunit *test) +static noinline void check_multi_find_2(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 10 : 1; unsigned int i, j; void *entry; @@ -1260,19 +1211,17 @@ static noinline void check_multi_find_2(struct kunit *test) GFP_KERNEL); rcu_read_lock(); xas_for_each(&xas, entry, ULONG_MAX) { - xa_erase_index(test, xa, index); + xa_erase_index(xa, index); } rcu_read_unlock(); - xa_erase_index(test, xa, index - 1); + xa_erase_index(xa, index - 1); XA_BUG_ON(xa, !xa_empty(xa)); } } } -static noinline void check_multi_find_3(struct kunit *test) +static noinline void check_multi_find_3(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int order; for (order = 5; order < order_limit; order++) { @@ -1281,14 +1230,12 @@ static noinline void check_multi_find_3(struct kunit *test) XA_BUG_ON(xa, !xa_empty(xa)); xa_store_order(xa, 0, order - 4, xa_mk_index(0), GFP_KERNEL); XA_BUG_ON(xa, xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT)); - xa_erase_index(test, xa, 0); + xa_erase_index(xa, 0); } } -static noinline void check_find_1(struct kunit *test) +static noinline void check_find_1(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long i, j, k; XA_BUG_ON(xa, !xa_empty(xa)); @@ -1325,20 +1272,18 @@ static noinline void check_find_1(struct kunit *test) else XA_BUG_ON(xa, entry != NULL); } - xa_erase_index(test, xa, j); + xa_erase_index(xa, j); XA_BUG_ON(xa, xa_get_mark(xa, j, XA_MARK_0)); XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0)); } - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_0)); } XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_find_2(struct kunit *test) +static noinline void check_find_2(struct xarray *xa) { - struct xarray *xa = xa_param(test); - void *entry; unsigned long i, j, index; @@ -1358,10 +1303,8 @@ static noinline void check_find_2(struct kunit *test) xa_destroy(xa); } -static noinline void check_find_3(struct kunit *test) +static noinline void check_find_3(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); unsigned long i, j, k; void *entry; @@ -1385,10 +1328,8 @@ static noinline void check_find_3(struct kunit *test) xa_destroy(xa); } -static noinline void check_find_4(struct kunit *test) +static noinline void check_find_4(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long index = 0; void *entry; @@ -1400,22 +1341,22 @@ static noinline void check_find_4(struct kunit *test) entry = xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT); XA_BUG_ON(xa, entry); - xa_erase_index(test, xa, ULONG_MAX); + xa_erase_index(xa, ULONG_MAX); } -static noinline void check_find(struct kunit *test) +static noinline void check_find(struct xarray *xa) { unsigned i; - check_find_1(test); - check_find_2(test); - check_find_3(test); - check_find_4(test); + check_find_1(xa); + check_find_2(xa); + check_find_3(xa); + check_find_4(xa); for (i = 2; i < 10; i++) - check_multi_find_1(test, i); - check_multi_find_2(test); - check_multi_find_3(test); + check_multi_find_1(xa, i); + check_multi_find_2(xa); + check_multi_find_3(xa); } /* See find_swap_entry() in mm/shmem.c */ @@ -1441,10 +1382,8 @@ static noinline unsigned long xa_find_entry(struct xarray *xa, void *item) return entry ? xas.xa_index : -1; } -static noinline void check_find_entry(struct kunit *test) +static noinline void check_find_entry(struct xarray *xa) { - struct xarray *xa = xa_param(test); - #ifdef CONFIG_XARRAY_MULTI unsigned int order; unsigned long offset, index; @@ -1471,14 +1410,12 @@ static noinline void check_find_entry(struct kunit *test) xa_store_index(xa, ULONG_MAX, GFP_KERNEL); XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1); XA_BUG_ON(xa, xa_find_entry(xa, xa_mk_index(ULONG_MAX)) != -1); - xa_erase_index(test, xa, ULONG_MAX); + xa_erase_index(xa, ULONG_MAX); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_pause(struct kunit *test) +static noinline void check_pause(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; unsigned int order; @@ -1548,10 +1485,8 @@ static noinline void check_pause(struct kunit *test) } -static noinline void check_move_tiny(struct kunit *test) +static noinline void check_move_tiny(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); XA_BUG_ON(xa, !xa_empty(xa)); @@ -1568,14 +1503,12 @@ static noinline void check_move_tiny(struct kunit *test) XA_BUG_ON(xa, xas_prev(&xas) != xa_mk_index(0)); XA_BUG_ON(xa, xas_prev(&xas) != NULL); rcu_read_unlock(); - xa_erase_index(test, xa, 0); + xa_erase_index(xa, 0); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_move_max(struct kunit *test) +static noinline void check_move_max(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); xa_store_index(xa, ULONG_MAX, GFP_KERNEL); @@ -1591,14 +1524,12 @@ static noinline void check_move_max(struct kunit *test) XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != NULL); rcu_read_unlock(); - xa_erase_index(test, xa, ULONG_MAX); + xa_erase_index(xa, ULONG_MAX); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_move_small(struct kunit *test, unsigned long idx) +static noinline void check_move_small(struct xarray *xa, unsigned long idx) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); unsigned long i; @@ -1640,15 +1571,13 @@ static noinline void check_move_small(struct kunit *test, unsigned long idx) XA_BUG_ON(xa, xas.xa_index != ULONG_MAX); rcu_read_unlock(); - xa_erase_index(test, xa, 0); - xa_erase_index(test, xa, idx); + xa_erase_index(xa, 0); + xa_erase_index(xa, idx); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_move(struct kunit *test) +static noinline void check_move(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, (1 << 16) - 1); unsigned long i; @@ -1675,7 +1604,7 @@ static noinline void check_move(struct kunit *test) rcu_read_unlock(); for (i = (1 << 8); i < (1 << 15); i++) - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); i = xas.xa_index; @@ -1706,17 +1635,17 @@ static noinline void check_move(struct kunit *test) xa_destroy(xa); - check_move_tiny(test); - check_move_max(test); + check_move_tiny(xa); + check_move_max(xa); for (i = 0; i < 16; i++) - check_move_small(test, 1UL << i); + check_move_small(xa, 1UL << i); for (i = 2; i < 16; i++) - check_move_small(test, (1UL << i) - 1); + check_move_small(xa, (1UL << i) - 1); } -static noinline void xa_store_many_order(struct kunit *test, struct xarray *xa, +static noinline void xa_store_many_order(struct xarray *xa, unsigned long index, unsigned order) { XA_STATE_ORDER(xas, xa, index, order); @@ -1739,34 +1668,30 @@ static noinline void xa_store_many_order(struct kunit *test, struct xarray *xa, XA_BUG_ON(xa, xas_error(&xas)); } -static noinline void check_create_range_1(struct kunit *test, +static noinline void check_create_range_1(struct xarray *xa, unsigned long index, unsigned order) { - struct xarray *xa = xa_param(test); - unsigned long i; - xa_store_many_order(test, xa, index, order); + xa_store_many_order(xa, index, order); for (i = index; i < index + (1UL << order); i++) - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_create_range_2(struct kunit *test, unsigned int order) +static noinline void check_create_range_2(struct xarray *xa, unsigned order) { - struct xarray *xa = xa_param(test); - unsigned long i; unsigned long nr = 1UL << order; for (i = 0; i < nr * nr; i += nr) - xa_store_many_order(test, xa, i, order); + xa_store_many_order(xa, i, order); for (i = 0; i < nr * nr; i++) - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_create_range_3(struct kunit *test) +static noinline void check_create_range_3(void) { XA_STATE(xas, NULL, 0); xas_set_err(&xas, -EEXIST); @@ -1774,11 +1699,9 @@ static noinline void check_create_range_3(struct kunit *test) XA_BUG_ON(NULL, xas_error(&xas) != -EEXIST); } -static noinline void check_create_range_4(struct kunit *test, +static noinline void check_create_range_4(struct xarray *xa, unsigned long index, unsigned order) { - struct xarray *xa = xa_param(test); - XA_STATE_ORDER(xas, xa, index, order); unsigned long base = xas.xa_index; unsigned long i = 0; @@ -1804,15 +1727,13 @@ static noinline void check_create_range_4(struct kunit *test, XA_BUG_ON(xa, xas_error(&xas)); for (i = base; i < base + (1UL << order); i++) - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_create_range_5(struct kunit *test, +static noinline void check_create_range_5(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE_ORDER(xas, xa, index, order); unsigned int i; @@ -1829,46 +1750,44 @@ static noinline void check_create_range_5(struct kunit *test, xa_destroy(xa); } -static noinline void check_create_range(struct kunit *test) +static noinline void check_create_range(struct xarray *xa) { unsigned int order; unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 12 : 1; for (order = 0; order < max_order; order++) { - check_create_range_1(test, 0, order); - check_create_range_1(test, 1U << order, order); - check_create_range_1(test, 2U << order, order); - check_create_range_1(test, 3U << order, order); - check_create_range_1(test, 1U << 24, order); + check_create_range_1(xa, 0, order); + check_create_range_1(xa, 1U << order, order); + check_create_range_1(xa, 2U << order, order); + check_create_range_1(xa, 3U << order, order); + check_create_range_1(xa, 1U << 24, order); if (order < 10) - check_create_range_2(test, order); - - check_create_range_4(test, 0, order); - check_create_range_4(test, 1U << order, order); - check_create_range_4(test, 2U << order, order); - check_create_range_4(test, 3U << order, order); - check_create_range_4(test, 1U << 24, order); - - check_create_range_4(test, 1, order); - check_create_range_4(test, (1U << order) + 1, order); - check_create_range_4(test, (2U << order) + 1, order); - check_create_range_4(test, (2U << order) - 1, order); - check_create_range_4(test, (3U << order) + 1, order); - check_create_range_4(test, (3U << order) - 1, order); - check_create_range_4(test, (1U << 24) + 1, order); - - check_create_range_5(test, 0, order); - check_create_range_5(test, (1U << order), order); + check_create_range_2(xa, order); + + check_create_range_4(xa, 0, order); + check_create_range_4(xa, 1U << order, order); + check_create_range_4(xa, 2U << order, order); + check_create_range_4(xa, 3U << order, order); + check_create_range_4(xa, 1U << 24, order); + + check_create_range_4(xa, 1, order); + check_create_range_4(xa, (1U << order) + 1, order); + check_create_range_4(xa, (2U << order) + 1, order); + check_create_range_4(xa, (2U << order) - 1, order); + check_create_range_4(xa, (3U << order) + 1, order); + check_create_range_4(xa, (3U << order) - 1, order); + check_create_range_4(xa, (1U << 24) + 1, order); + + check_create_range_5(xa, 0, order); + check_create_range_5(xa, (1U << order), order); } - check_create_range_3(test); + check_create_range_3(); } -static noinline void __check_store_range(struct kunit *test, unsigned long first, +static noinline void __check_store_range(struct xarray *xa, unsigned long first, unsigned long last) { - struct xarray *xa = xa_param(test); - #ifdef CONFIG_XARRAY_MULTI xa_store_range(xa, first, last, xa_mk_index(first), GFP_KERNEL); @@ -1883,28 +1802,26 @@ static noinline void __check_store_range(struct kunit *test, unsigned long first XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_store_range(struct kunit *test) +static noinline void check_store_range(struct xarray *xa) { unsigned long i, j; for (i = 0; i < 128; i++) { for (j = i; j < 128; j++) { - __check_store_range(test, i, j); - __check_store_range(test, 128 + i, 128 + j); - __check_store_range(test, 4095 + i, 4095 + j); - __check_store_range(test, 4096 + i, 4096 + j); - __check_store_range(test, 123456 + i, 123456 + j); - __check_store_range(test, (1 << 24) + i, (1 << 24) + j); + __check_store_range(xa, i, j); + __check_store_range(xa, 128 + i, 128 + j); + __check_store_range(xa, 4095 + i, 4095 + j); + __check_store_range(xa, 4096 + i, 4096 + j); + __check_store_range(xa, 123456 + i, 123456 + j); + __check_store_range(xa, (1 << 24) + i, (1 << 24) + j); } } } #ifdef CONFIG_XARRAY_MULTI -static void check_split_1(struct kunit *test, unsigned long index, +static void check_split_1(struct xarray *xa, unsigned long index, unsigned int order, unsigned int new_order) { - struct xarray *xa = xa_param(test); - XA_STATE_ORDER(xas, xa, index, new_order); unsigned int i, found; void *entry; @@ -1940,30 +1857,26 @@ static void check_split_1(struct kunit *test, unsigned long index, xa_destroy(xa); } -static noinline void check_split(struct kunit *test) +static noinline void check_split(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int order, new_order; XA_BUG_ON(xa, !xa_empty(xa)); for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) { for (new_order = 0; new_order < order; new_order++) { - check_split_1(test, 0, order, new_order); - check_split_1(test, 1UL << order, order, new_order); - check_split_1(test, 3UL << order, order, new_order); + check_split_1(xa, 0, order, new_order); + check_split_1(xa, 1UL << order, order, new_order); + check_split_1(xa, 3UL << order, order, new_order); } } } #else -static void check_split(struct kunit *test) { } +static void check_split(struct xarray *xa) { } #endif -static void check_align_1(struct kunit *test, char *name) +static void check_align_1(struct xarray *xa, char *name) { - struct xarray *xa = xa_param(test); - int i; unsigned int id; unsigned long index; @@ -1983,10 +1896,8 @@ static void check_align_1(struct kunit *test, char *name) * We should always be able to store without allocating memory after * reserving a slot. */ -static void check_align_2(struct kunit *test, char *name) +static void check_align_2(struct xarray *xa, char *name) { - struct xarray *xa = xa_param(test); - int i; XA_BUG_ON(xa, !xa_empty(xa)); @@ -2005,15 +1916,15 @@ static void check_align_2(struct kunit *test, char *name) XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_align(struct kunit *test) +static noinline void check_align(struct xarray *xa) { char name[] = "Motorola 68000"; - check_align_1(test, name); - check_align_1(test, name + 1); - check_align_1(test, name + 2); - check_align_1(test, name + 3); - check_align_2(test, name); + check_align_1(xa, name); + check_align_1(xa, name + 1); + check_align_1(xa, name + 2); + check_align_1(xa, name + 3); + check_align_2(xa, name); } static LIST_HEAD(shadow_nodes); @@ -2029,7 +1940,7 @@ static void test_update_node(struct xa_node *node) } } -static noinline void shadow_remove(struct kunit *test, struct xarray *xa) +static noinline void shadow_remove(struct xarray *xa) { struct xa_node *node; @@ -2043,17 +1954,8 @@ static noinline void shadow_remove(struct kunit *test, struct xarray *xa) xa_unlock(xa); } -struct workingset_testcase { - struct xarray *xa; - unsigned long index; -}; - -static noinline void check_workingset(struct kunit *test) +static noinline void check_workingset(struct xarray *xa, unsigned long index) { - struct workingset_testcase tc = *(struct workingset_testcase *)test->param_value; - struct xarray *xa = tc.xa; - unsigned long index = tc.index; - XA_STATE(xas, xa, index); xas_set_update(&xas, test_update_node); @@ -2076,7 +1978,7 @@ static noinline void check_workingset(struct kunit *test) xas_unlock(&xas); XA_BUG_ON(xa, list_empty(&shadow_nodes)); - shadow_remove(test, xa); + shadow_remove(xa); XA_BUG_ON(xa, !list_empty(&shadow_nodes)); XA_BUG_ON(xa, !xa_empty(xa)); } @@ -2085,11 +1987,9 @@ static noinline void check_workingset(struct kunit *test) * Check that the pointer / value / sibling entries are accounted the * way we expect them to be. */ -static noinline void check_account(struct kunit *test) +static noinline void check_account(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - unsigned int order; for (order = 1; order < 12; order++) { @@ -2116,10 +2016,8 @@ static noinline void check_account(struct kunit *test) #endif } -static noinline void check_get_order(struct kunit *test) +static noinline void check_get_order(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; unsigned int order; unsigned long i, j; @@ -2138,10 +2036,8 @@ static noinline void check_get_order(struct kunit *test) } } -static noinline void check_xas_get_order(struct kunit *test) +static noinline void check_xas_get_order(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; @@ -2173,10 +2069,8 @@ static noinline void check_xas_get_order(struct kunit *test) } } -static noinline void check_xas_conflict_get_order(struct kunit *test) +static noinline void check_xas_conflict_get_order(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; @@ -2233,10 +2127,8 @@ static noinline void check_xas_conflict_get_order(struct kunit *test) } -static noinline void check_destroy(struct kunit *test) +static noinline void check_destroy(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long index; XA_BUG_ON(xa, !xa_empty(xa)); @@ -2269,59 +2161,52 @@ static noinline void check_destroy(struct kunit *test) } static DEFINE_XARRAY(array); -static struct xarray *arrays[] = { &array }; -KUNIT_ARRAY_PARAM(array, arrays, NULL); - -static struct xarray *xa0s[] = { &xa0 }; -KUNIT_ARRAY_PARAM(xa0, xa0s, NULL); - -static struct workingset_testcase workingset_testcases[] = { - { &array, 0 }, - { &array, 64 }, - { &array, 4096 }, -}; -KUNIT_ARRAY_PARAM(workingset, workingset_testcases, NULL); - -static struct kunit_case xarray_cases[] = { - KUNIT_CASE_PARAM(check_xa_err, array_gen_params), - KUNIT_CASE_PARAM(check_xas_retry, array_gen_params), - KUNIT_CASE_PARAM(check_xa_load, array_gen_params), - KUNIT_CASE_PARAM(check_xa_mark, array_gen_params), - KUNIT_CASE_PARAM(check_xa_shrink, array_gen_params), - KUNIT_CASE_PARAM(check_xas_erase, array_gen_params), - KUNIT_CASE_PARAM(check_insert, array_gen_params), - KUNIT_CASE_PARAM(check_cmpxchg, array_gen_params), - KUNIT_CASE_PARAM(check_cmpxchg_order, array_gen_params), - KUNIT_CASE_PARAM(check_reserve, array_gen_params), - KUNIT_CASE_PARAM(check_reserve, xa0_gen_params), - KUNIT_CASE_PARAM(check_multi_store, array_gen_params), - KUNIT_CASE_PARAM(check_multi_store_advanced, array_gen_params), - KUNIT_CASE_PARAM(check_get_order, array_gen_params), - KUNIT_CASE_PARAM(check_xas_get_order, array_gen_params), - KUNIT_CASE_PARAM(check_xas_conflict_get_order, array_gen_params), - KUNIT_CASE(check_xa_alloc), - KUNIT_CASE_PARAM(check_find, array_gen_params), - KUNIT_CASE_PARAM(check_find_entry, array_gen_params), - KUNIT_CASE_PARAM(check_pause, array_gen_params), - KUNIT_CASE_PARAM(check_account, array_gen_params), - KUNIT_CASE_PARAM(check_destroy, array_gen_params), - KUNIT_CASE_PARAM(check_move, array_gen_params), - KUNIT_CASE_PARAM(check_create_range, array_gen_params), - KUNIT_CASE_PARAM(check_store_range, array_gen_params), - KUNIT_CASE_PARAM(check_store_iter, array_gen_params), - KUNIT_CASE_PARAM(check_align, xa0_gen_params), - KUNIT_CASE_PARAM(check_split, array_gen_params), - KUNIT_CASE_PARAM(check_workingset, workingset_gen_params), - {}, -}; - -static struct kunit_suite xarray_suite = { - .name = "xarray", - .test_cases = xarray_cases, -}; - -kunit_test_suite(xarray_suite); +static int xarray_checks(void) +{ + check_xa_err(&array); + check_xas_retry(&array); + check_xa_load(&array); + check_xa_mark(&array); + check_xa_shrink(&array); + check_xas_erase(&array); + check_insert(&array); + check_cmpxchg(&array); + check_cmpxchg_order(&array); + check_reserve(&array); + check_reserve(&xa0); + check_multi_store(&array); + check_multi_store_advanced(&array); + check_get_order(&array); + check_xas_get_order(&array); + check_xas_conflict_get_order(&array); + check_xa_alloc(); + check_find(&array); + check_find_entry(&array); + check_pause(&array); + check_account(&array); + check_destroy(&array); + check_move(&array); + check_create_range(&array); + check_store_range(&array); + check_store_iter(&array); + check_align(&xa0); + check_split(&array); + + check_workingset(&array, 0); + check_workingset(&array, 64); + check_workingset(&array, 4096); + + printk("XArray: %u of %u tests passed\n", tests_passed, tests_run); + return (tests_run == tests_passed) ? 0 : -EINVAL; +} + +static void xarray_exit(void) +{ +} + +module_init(xarray_checks); +module_exit(xarray_exit); MODULE_AUTHOR("Matthew Wilcox "); MODULE_DESCRIPTION("XArray API test module"); MODULE_LICENSE("GPL"); diff --git a/mm/compaction.c b/mm/compaction.c index bcc0df0066dc3..12ed8425fa175 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2491,7 +2491,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, */ static enum compact_result compaction_suit_allocation_order(struct zone *zone, unsigned int order, - int highest_zoneidx, unsigned int alloc_flags) + int highest_zoneidx, unsigned int alloc_flags, + bool async) { unsigned long watermark; @@ -2500,6 +2501,23 @@ compaction_suit_allocation_order(struct zone *zone, unsigned int order, alloc_flags)) return COMPACT_SUCCESS; + /* + * For unmovable allocations (without ALLOC_CMA), check if there is enough + * free memory in the non-CMA pageblocks. Otherwise compaction could form + * the high-order page in CMA pageblocks, which would not help the + * allocation to succeed. However, limit the check to costly order async + * compaction (such as opportunistic THP attempts) because there is the + * possibility that compaction would migrate pages from non-CMA to CMA + * pageblock. + */ + if (order > PAGE_ALLOC_COSTLY_ORDER && async && + !(alloc_flags & ALLOC_CMA)) { + watermark = low_wmark_pages(zone) + compact_gap(order); + if (!__zone_watermark_ok(zone, 0, watermark, highest_zoneidx, + 0, zone_page_state(zone, NR_FREE_PAGES))) + return COMPACT_SKIPPED; + } + if (!compaction_suitable(zone, order, highest_zoneidx)) return COMPACT_SKIPPED; @@ -2535,7 +2553,8 @@ compact_zone(struct compact_control *cc, struct capture_control *capc) if (!is_via_compact_memory(cc->order)) { ret = compaction_suit_allocation_order(cc->zone, cc->order, cc->highest_zoneidx, - cc->alloc_flags); + cc->alloc_flags, + cc->mode == MIGRATE_ASYNC); if (ret != COMPACT_CONTINUE) return ret; } @@ -3038,7 +3057,8 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat) ret = compaction_suit_allocation_order(zone, pgdat->kcompactd_max_order, - highest_zoneidx, ALLOC_WMARK_MIN); + highest_zoneidx, ALLOC_WMARK_MIN, + false); if (ret == COMPACT_CONTINUE) return true; } @@ -3079,7 +3099,8 @@ static void kcompactd_do_work(pg_data_t *pgdat) continue; ret = compaction_suit_allocation_order(zone, - cc.order, zoneid, ALLOC_WMARK_MIN); + cc.order, zoneid, ALLOC_WMARK_MIN, + false); if (ret != COMPACT_CONTINUE) continue; diff --git a/mm/gup.c b/mm/gup.c index 9aaf338cc1f48..3883b307780ea 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2320,13 +2320,13 @@ static void pofs_unpin(struct pages_or_folios *pofs) /* * Returns the number of collected folios. Return value is always >= 0. */ -static unsigned long collect_longterm_unpinnable_folios( +static void collect_longterm_unpinnable_folios( struct list_head *movable_folio_list, struct pages_or_folios *pofs) { - unsigned long i, collected = 0; struct folio *prev_folio = NULL; bool drain_allow = true; + unsigned long i; for (i = 0; i < pofs->nr_entries; i++) { struct folio *folio = pofs_get_folio(pofs, i); @@ -2338,8 +2338,6 @@ static unsigned long collect_longterm_unpinnable_folios( if (folio_is_longterm_pinnable(folio)) continue; - collected++; - if (folio_is_device_coherent(folio)) continue; @@ -2361,8 +2359,6 @@ static unsigned long collect_longterm_unpinnable_folios( NR_ISOLATED_ANON + folio_is_file_lru(folio), folio_nr_pages(folio)); } - - return collected; } /* @@ -2439,11 +2435,9 @@ static long check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs) { LIST_HEAD(movable_folio_list); - unsigned long collected; - collected = collect_longterm_unpinnable_folios(&movable_folio_list, - pofs); - if (!collected) + collect_longterm_unpinnable_folios(&movable_folio_list, pofs); + if (list_empty(&movable_folio_list)) return 0; return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3b25b69aa94f1..65068671e460a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3309,7 +3309,7 @@ static void __init gather_bootmem_prealloc(void) .thread_fn = gather_bootmem_prealloc_parallel, .fn_arg = NULL, .start = 0, - .size = num_node_state(N_MEMORY), + .size = nr_node_ids, .align = 1, .min_chunk = 1, .max_threads = num_node_state(N_MEMORY), diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 67fc321db79b7..102048821c222 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -1084,6 +1085,7 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) * properties (e.g. reside in DMAable memory). */ if ((flags & GFP_ZONEMASK) || + ((flags & __GFP_THISNODE) && num_online_nodes() > 1) || (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) { atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]); return NULL; diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 982bb5ef32331..c6ed68604136a 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1689,7 +1689,7 @@ static void kmemleak_scan(void) unsigned long phys = object->pointer; if (PHYS_PFN(phys) < min_low_pfn || - PHYS_PFN(phys + object->size) >= max_low_pfn) + PHYS_PFN(phys + object->size) > max_low_pfn) __paint_it(object, KMEMLEAK_BLACK); } diff --git a/mm/secretmem.c b/mm/secretmem.c index 399552814fd0f..1b0a214ee5580 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -195,14 +195,13 @@ static struct file *secretmem_file_create(unsigned long flags) struct file *file; struct inode *inode; const char *anon_name = "[secretmem]"; - const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name)); int err; inode = alloc_anon_inode(secretmem_mnt->mnt_sb); if (IS_ERR(inode)) return ERR_CAST(inode); - err = security_inode_init_security_anon(inode, &qname, NULL); + err = security_inode_init_security_anon(inode, &QSTR(anon_name), NULL); if (err) { file = ERR_PTR(err); goto err_free_inode; diff --git a/mm/swapfile.c b/mm/swapfile.c index 6e867c16ea934..ba19430dd4ead 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -794,7 +794,7 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si, if (!cluster_scan_range(si, ci, offset, nr_pages, &need_reclaim)) continue; if (need_reclaim) { - ret = cluster_reclaim_range(si, ci, start, end); + ret = cluster_reclaim_range(si, ci, offset, offset + nr_pages); /* * Reclaim drops ci->lock and cluster could be used * by another order. Not checking flag as off-list diff --git a/mm/vmscan.c b/mm/vmscan.c index 683ec56d4f608..c767d71c43d7d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1086,7 +1086,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, struct folio_batch free_folios; LIST_HEAD(ret_folios); LIST_HEAD(demote_folios); - unsigned int nr_reclaimed = 0; + unsigned int nr_reclaimed = 0, nr_demoted = 0; unsigned int pgactivate = 0; bool do_demote_pass; struct swap_iocb *plug = NULL; @@ -1550,8 +1550,9 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, /* 'folio_list' is always empty here */ /* Migrate folios selected for demotion */ - stat->nr_demoted = demote_folio_list(&demote_folios, pgdat); - nr_reclaimed += stat->nr_demoted; + nr_demoted = demote_folio_list(&demote_folios, pgdat); + nr_reclaimed += nr_demoted; + stat->nr_demoted += nr_demoted; /* Folios that could not be demoted are still in @demote_folios */ if (!list_empty(&demote_folios)) { /* Folios which weren't demoted go back on @folio_list */ @@ -1692,6 +1693,7 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan, unsigned long nr_skipped[MAX_NR_ZONES] = { 0, }; unsigned long skipped = 0; unsigned long scan, total_scan, nr_pages; + unsigned long max_nr_skipped = 0; LIST_HEAD(folios_skipped); total_scan = 0; @@ -1706,9 +1708,12 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan, nr_pages = folio_nr_pages(folio); total_scan += nr_pages; - if (folio_zonenum(folio) > sc->reclaim_idx) { + /* Using max_nr_skipped to prevent hard LOCKUP*/ + if (max_nr_skipped < SWAP_CLUSTER_MAX_SKIPPED && + (folio_zonenum(folio) > sc->reclaim_idx)) { nr_skipped[folio_zonenum(folio)] += nr_pages; move_to = &folios_skipped; + max_nr_skipped++; goto move; } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 817626a351f89..6d0e47f7ae339 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -452,7 +452,7 @@ static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = { .lock = INIT_LOCAL_LOCK(lock), }; -static inline bool is_first_zpdesc(struct zpdesc *zpdesc) +static inline bool __maybe_unused is_first_zpdesc(struct zpdesc *zpdesc) { return PagePrivate(zpdesc_page(zpdesc)); } diff --git a/net/core/dev.c b/net/core/dev.c index c41d1e1cbf62e..520906e285eb7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11293,6 +11293,20 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; const struct net_device_core_stats __percpu *p; + /* + * IPv{4,6} and udp tunnels share common stat helpers and use + * different stat type (NETDEV_PCPU_STAT_TSTATS vs + * NETDEV_PCPU_STAT_DSTATS). Ensure the accounting is consistent. + */ + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_bytes) != + offsetof(struct pcpu_dstats, rx_bytes)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_packets) != + offsetof(struct pcpu_dstats, rx_packets)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_bytes) != + offsetof(struct pcpu_dstats, tx_bytes)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_packets) != + offsetof(struct pcpu_dstats, tx_packets)); + if (ops->ndo_get_stats64) { memset(storage, 0, sizeof(*storage)); ops->ndo_get_stats64(dev, storage); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 34bee42e12470..7609ce2b2c5e2 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -993,7 +993,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, return rc; /* Nonzero ring with RSS only makes sense if NIC adds them together */ - if (cmd == ETHTOOL_SRXCLSRLINS && info.flow_type & FLOW_RSS && + if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS && !ops->cap_rss_rxnfc_adds && ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 7cb106b590aba..58df9ad02ce8a 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -107,6 +107,8 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, u32 total_size, indir_bytes; u8 *rss_config; + data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key; + ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context); if (!ctx) return -ENOENT; @@ -153,7 +155,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base, if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context) return -EOPNOTSUPP; - data->no_key_fields = !ops->rxfh_per_ctx_key; return rss_prepare_ctx(request, dev, data, info); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c472c9a57cf68..a9bb9ce5438ea 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1141,9 +1141,9 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) { + if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); - return -EINVAL; + return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 28e5a89dc2557..2c383c12a4315 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -336,7 +336,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb), *cache_dst; + struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL; struct in6_addr orig_daddr; struct ioam6_lwt *ilwt; int err = -EINVAL; @@ -407,13 +407,15 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) cache_dst = ip6_route_output(net, NULL, &fl6); if (cache_dst->error) { err = cache_dst->error; - dst_release(cache_dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (dst->lwtstate != cache_dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev)); if (unlikely(err)) @@ -426,8 +428,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst_output(net, sk, skb); } out: + dst_release(cache_dst); return dst->lwtstate->orig_output(net, sk, skb); drop: + dst_release(cache_dst); kfree_skb(skb); return err; } diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 7ba22d2f2bfef..0ac4283acdf20 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -232,13 +232,15 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; - dst_release(dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (orig_dst->lwtstate != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) @@ -251,6 +253,7 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst_output(net, sk, skb); drop: + dst_release(dst); kfree_skb(skb); return err; } @@ -269,8 +272,10 @@ static int rpl_input(struct sk_buff *skb) local_bh_enable(); err = rpl_do_srh(skb, rlwt, dst); - if (unlikely(err)) + if (unlikely(err)) { + dst_release(dst); goto drop; + } if (!dst) { ip6_route_input(skb); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 4bf937bfc2633..33833b2064c07 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -482,8 +482,10 @@ static int seg6_input_core(struct net *net, struct sock *sk, local_bh_enable(); err = seg6_do_srh(skb, dst); - if (unlikely(err)) + if (unlikely(err)) { + dst_release(dst); goto drop; + } if (!dst) { ip6_route_input(skb); @@ -571,13 +573,15 @@ static int seg6_output_core(struct net *net, struct sock *sk, dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; - dst_release(dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (orig_dst->lwtstate != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) @@ -593,6 +597,7 @@ static int seg6_output_core(struct net *net, struct sock *sk, return dst_output(net, sk, skb); drop: + dst_release(dst); kfree_skb(skb); return err; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6671daa67f4fa..c6ea438b5c758 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1389,9 +1389,9 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) { + if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); - return -EINVAL; + return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 72c65d938a150..a4a668b88a8f2 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -701,11 +701,9 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net_device *dev; ax25_address *source; ax25_uid_assoc *user; + int err = -EINVAL; int n; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; - if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose)) return -EINVAL; @@ -718,8 +716,15 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; - if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) - return -EADDRNOTAVAIL; + lock_sock(sk); + + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out_release; + + err = -EADDRNOTAVAIL; + dev = rose_dev_get(&addr->srose_addr); + if (!dev) + goto out_release; source = &addr->srose_call; @@ -730,7 +735,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { dev_put(dev); - return -EACCES; + err = -EACCES; + goto out_release; } rose->source_call = *source; } @@ -753,8 +759,10 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) rose_insert_socket(sk); sock_reset_flag(sk, SOCK_ZAPPED); - - return 0; + err = 0; +out_release: + release_sock(sk); + return err; } static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 718193df9d2e2..f251845fe532c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -582,6 +582,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */ RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */ RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */ + RXRPC_CALL_CONN_CHALLENGING, /* The connection is being challenged */ }; /* @@ -602,7 +603,6 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ - RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 5a543c3f6fb08..c4c8b46a68c67 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -22,7 +22,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", - [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq", [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", @@ -453,17 +452,16 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->cong_tstamp = skb->tstamp; __set_bit(RXRPC_CALL_EXPOSED, &call->flags); - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); spin_lock(&conn->state_lock); switch (conn->state) { case RXRPC_CONN_SERVICE_UNSECURED: case RXRPC_CONN_SERVICE_CHALLENGING: - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + __set_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags); break; case RXRPC_CONN_SERVICE: - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); break; case RXRPC_CONN_ABORTED: diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 713e04394ceb7..4d9c5e21ba785 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -228,10 +228,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn) */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { - if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) { - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); + if (call && __test_and_clear_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) rxrpc_notify_socket(call); - } } /* @@ -272,6 +270,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, * we've already received the packet, put it on the * front of the queue. */ + sp->conn = rxrpc_get_connection(conn, rxrpc_conn_get_poke_secured); skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED; rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured); skb_queue_head(&conn->local->rx_queue, skb); @@ -437,14 +436,16 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events)) rxrpc_abort_calls(conn); - switch (skb->mark) { - case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: - if (conn->state != RXRPC_CONN_SERVICE) - break; + if (skb) { + switch (skb->mark) { + case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: + if (conn->state != RXRPC_CONN_SERVICE) + break; - for (loop = 0; loop < RXRPC_MAXCALLS; loop++) - rxrpc_call_is_secure(conn->channels[loop].call); - break; + for (loop = 0; loop < RXRPC_MAXCALLS; loop++) + rxrpc_call_is_secure(conn->channels[loop].call); + break; + } } /* Process delayed ACKs whose time has come. */ diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 7eba4d7d9a380..2f1fd1e2e7e48 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -67,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, INIT_WORK(&conn->destructor, rxrpc_clean_up_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); + INIT_LIST_HEAD(&conn->attend_link); mutex_init(&conn->security_lock); mutex_init(&conn->tx_data_alloc_lock); skb_queue_head_init(&conn->rx_queue); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 4974b5accafa3..9047ba13bd31e 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -448,11 +448,19 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); bool last = sp->hdr.flags & RXRPC_LAST_PACKET; - skb_queue_tail(&call->recvmsg_queue, skb); + spin_lock_irq(&call->recvmsg_queue.lock); + + __skb_queue_tail(&call->recvmsg_queue, skb); rxrpc_input_update_ack_window(call, window, wtop); trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq); if (last) + /* Change the state inside the lock so that recvmsg syncs + * correctly with it and using sendmsg() to send a reply + * doesn't race. + */ rxrpc_end_rx_phase(call, sp->hdr.serial); + + spin_unlock_irq(&call->recvmsg_queue.lock); } /* @@ -657,7 +665,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb rxrpc_propose_delay_ACK(call, sp->hdr.serial, rxrpc_propose_ack_input_data); } - if (notify) { + if (notify && !test_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) { trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial); rxrpc_notify_socket(call); } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0e8da909d4f2f..584397aba4a07 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -707,7 +707,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) } else { switch (rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_AWAIT_CONN: - case RXRPC_CALL_SERVER_SECURING: + case RXRPC_CALL_SERVER_RECV_REQUEST: if (p.command == RXRPC_CMD_SEND_ABORT) break; fallthrough; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index b50b2c2cc09bc..e6bfd39ff3396 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -40,6 +40,9 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, { unsigned int prev_backlog; + if (unlikely(READ_ONCE(sch->limit) == 0)) + return qdisc_drop(skb, sch, to_free); + if (likely(sch->q.qlen < READ_ONCE(sch->limit))) return qdisc_enqueue_tail(skb, sch); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 71ec9986ed37f..fdd79d3ccd8ce 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -749,9 +749,9 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (err != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(err)) qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, pkt_len); sch->qstats.backlog -= pkt_len; sch->q.qlen--; + qdisc_tree_reduce_backlog(sch, 1, pkt_len); } goto tfifo_dequeue; } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 7ce3721c06ca5..eadc00410ebc5 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -630,7 +630,7 @@ static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, const char *name) { - struct qstr q = QSTR_INIT(name, strlen(name)); + struct qstr q = QSTR(name); struct dentry *dentry = d_hash_and_lookup(parent, &q); if (!dentry) { dentry = d_alloc(parent, &q); @@ -1190,8 +1190,7 @@ static const struct rpc_filelist files[] = { struct dentry *rpc_d_lookup_sb(const struct super_block *sb, const unsigned char *dir_name) { - struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name)); - return d_hash_and_lookup(sb->s_root, &dir); + return d_hash_and_lookup(sb->s_root, &QSTR(dir_name)); } EXPORT_SYMBOL_GPL(rpc_d_lookup_sb); @@ -1300,11 +1299,9 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data) struct dentry *gssd_dentry; struct dentry *clnt_dentry = NULL; struct dentry *pipe_dentry = NULL; - struct qstr q = QSTR_INIT(files[RPCAUTH_gssd].name, - strlen(files[RPCAUTH_gssd].name)); /* We should never get this far if "gssd" doesn't exist */ - gssd_dentry = d_hash_and_lookup(root, &q); + gssd_dentry = d_hash_and_lookup(root, &QSTR(files[RPCAUTH_gssd].name)); if (!gssd_dentry) return ERR_PTR(-ENOENT); @@ -1314,9 +1311,8 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data) goto out; } - q.name = gssd_dummy_clnt_dir[0].name; - q.len = strlen(gssd_dummy_clnt_dir[0].name); - clnt_dentry = d_hash_and_lookup(gssd_dentry, &q); + clnt_dentry = d_hash_and_lookup(gssd_dentry, + &QSTR(gssd_dummy_clnt_dir[0].name)); if (!clnt_dentry) { __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); pipe_dentry = ERR_PTR(-ENOENT); diff --git a/rust/Makefile b/rust/Makefile index 71a05a3c895a0..8fcfd60447bc8 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -331,10 +331,11 @@ $(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_extra = ; $(obj)/bindings/bindings_helpers_generated.rs: $(src)/helpers/helpers.c FORCE $(call if_changed_dep,bindgen) +rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ { printf $(2),$$3 }' + quiet_cmd_exports = EXPORTS $@ cmd_exports = \ - $(NM) -p --defined-only $< \ - | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ {printf "EXPORT_SYMBOL_RUST_GPL(%s);\n",$$3}' > $@ + $(call rust_exports,$<,"EXPORT_SYMBOL_RUST_GPL(%s);\n") > $@ $(obj)/exports_core_generated.h: $(obj)/core.o FORCE $(call if_changed,exports) @@ -403,11 +404,36 @@ ifneq ($(or $(CONFIG_ARM64),$(and $(CONFIG_RISCV),$(CONFIG_64BIT))),) __ashlti3 __lshrti3 endif +ifdef CONFIG_MODVERSIONS +cmd_gendwarfksyms = $(if $(skip_gendwarfksyms),, \ + $(call rust_exports,$@,"%s\n") | \ + scripts/gendwarfksyms/gendwarfksyms \ + $(if $(KBUILD_GENDWARFKSYMS_STABLE), --stable) \ + $(if $(KBUILD_SYMTYPES), --symtypes $(@:.o=.symtypes),) \ + $@ >> $(dot-target).cmd) +endif + define rule_rustc_library $(call cmd_and_fixdep,rustc_library) $(call cmd,gen_objtooldep) + $(call cmd,gendwarfksyms) endef +define rule_rust_cc_library + $(call if_changed_rule,cc_o_c) + $(call cmd,force_checksrc) + $(call cmd,gendwarfksyms) +endef + +# helpers.o uses the same export mechanism as Rust libraries, so ensure symbol +# versions are calculated for the helpers too. +$(obj)/helpers/helpers.o: $(src)/helpers/helpers.c $(recordmcount_source) FORCE + +$(call if_changed_rule,rust_cc_library) + +# Disable symbol versioning for exports.o to avoid conflicts with the actual +# symbol versions generated from Rust objects. +$(obj)/exports.o: private skip_gendwarfksyms = 1 + $(obj)/core.o: private skip_clippy = 1 $(obj)/core.o: private skip_flags = -Wunreachable_pub $(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym)) @@ -419,13 +445,16 @@ ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),) $(obj)/core.o: scripts/target.json endif +$(obj)/compiler_builtins.o: private skip_gendwarfksyms = 1 $(obj)/compiler_builtins.o: private rustc_objcopy = -w -W '__*' $(obj)/compiler_builtins.o: $(src)/compiler_builtins.rs $(obj)/core.o FORCE +$(call if_changed_rule,rustc_library) +$(obj)/build_error.o: private skip_gendwarfksyms = 1 $(obj)/build_error.o: $(src)/build_error.rs $(obj)/compiler_builtins.o FORCE +$(call if_changed_rule,rustc_library) +$(obj)/ffi.o: private skip_gendwarfksyms = 1 $(obj)/ffi.o: $(src)/ffi.rs $(obj)/compiler_builtins.o FORCE +$(call if_changed_rule,rustc_library) @@ -437,6 +466,7 @@ $(obj)/bindings.o: $(src)/bindings/lib.rs \ +$(call if_changed_rule,rustc_library) $(obj)/uapi.o: private rustc_target_flags = --extern ffi +$(obj)/uapi.o: private skip_gendwarfksyms = 1 $(obj)/uapi.o: $(src)/uapi/lib.rs \ $(obj)/ffi.o \ $(obj)/uapi/uapi_generated.rs FORCE diff --git a/samples/check-exec/inc.c b/samples/check-exec/inc.c index 94b87569d2a2e..7f6ef06a2f067 100644 --- a/samples/check-exec/inc.c +++ b/samples/check-exec/inc.c @@ -21,8 +21,15 @@ #include #include #include +#include #include +static int sys_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} + /* Returns 1 on error, 0 otherwise. */ static int interpret_buffer(char *buffer, size_t buffer_size) { @@ -78,8 +85,8 @@ static int interpret_stream(FILE *script, char *const script_name, * script execution. We must use the script file descriptor instead of * the script path name to avoid race conditions. */ - err = execveat(fileno(script), "", script_argv, envp, - AT_EMPTY_PATH | AT_EXECVE_CHECK); + err = sys_execveat(fileno(script), "", script_argv, envp, + AT_EMPTY_PATH | AT_EXECVE_CHECK); if (err && restrict_stream) { perror("ERROR: Script execution check"); return 1; diff --git a/scripts/Makefile b/scripts/Makefile index 546e8175e1c4c..46f860529df5e 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -53,7 +53,8 @@ hostprogs += unifdef gen_packed_field_checks targets += module.lds subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins -subdir-$(CONFIG_MODVERSIONS) += genksyms +subdir-$(CONFIG_GENKSYMS) += genksyms +subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms subdir-$(CONFIG_SECURITY_SELINUX) += selinux subdir-$(CONFIG_SECURITY_IPE) += ipe diff --git a/scripts/Makefile.build b/scripts/Makefile.build index c16e4cf54d770..993708d118745 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -107,13 +107,24 @@ cmd_cpp_i_c = $(CPP) $(c_flags) -o $@ $< $(obj)/%.i: $(obj)/%.c FORCE $(call if_changed_dep,cpp_i_c) +getexportsymbols = $(NM) $@ | sed -n 's/.* __export_symbol_\(.*\)/$(1)/p' + +gendwarfksyms = $(objtree)/scripts/gendwarfksyms/gendwarfksyms \ + $(if $(KBUILD_SYMTYPES), --symtypes $(@:.o=.symtypes)) \ + $(if $(KBUILD_GENDWARFKSYMS_STABLE), --stable) + genksyms = $(objtree)/scripts/genksyms/genksyms \ $(if $(KBUILD_SYMTYPES), -T $(@:.o=.symtypes)) \ $(if $(KBUILD_PRESERVE), -p) \ $(addprefix -r , $(wildcard $(@:.o=.symref))) # These mirror gensymtypes_S and co below, keep them in synch. +ifdef CONFIG_GENDWARFKSYMS +cmd_gensymtypes_c = $(if $(skip_gendwarfksyms),, \ + $(call getexportsymbols,\1) | $(gendwarfksyms) $@) +else cmd_gensymtypes_c = $(CPP) -D__GENKSYMS__ $(c_flags) $< | $(genksyms) +endif # CONFIG_GENDWARFKSYMS # LLVM assembly # Generate .ll files from .c @@ -183,7 +194,9 @@ endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT is-standard-object = $(if $(filter-out y%, $(OBJECT_FILES_NON_STANDARD_$(target-stem).o)$(OBJECT_FILES_NON_STANDARD)n),$(is-kernel-object)) +ifdef CONFIG_OBJTOOL $(obj)/%.o: private objtool-enabled = $(if $(is-standard-object),$(if $(delay-objtool),$(is-single-obj-m),y)) +endif ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),) cmd_warn_shared_object = $(if $(word 2, $(modname-multi)),$(warning $(kbuild-file): $*.o is added to multiple modules: $(modname-multi))) @@ -286,14 +299,26 @@ $(obj)/%.rs: $(obj)/%.rs.S FORCE # This is convoluted. The .S file must first be preprocessed to run guards and # expand names, then the resulting exports must be constructed into plain # EXPORT_SYMBOL(symbol); to build our dummy C file, and that gets preprocessed -# to make the genksyms input. +# to make the genksyms input or compiled into an object for gendwarfksyms. # # These mirror gensymtypes_c and co above, keep them in synch. -cmd_gensymtypes_S = \ - { echo "\#include " ; \ - echo "\#include " ; \ - $(NM) $@ | sed -n 's/.* __export_symbol_\(.*\)/EXPORT_SYMBOL(\1);/p' ; } | \ - $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | $(genksyms) +getasmexports = \ + { echo "\#include " ; \ + echo "\#include " ; \ + echo "\#include " ; \ + $(call getexportsymbols,EXPORT_SYMBOL(\1);) ; } + +ifdef CONFIG_GENDWARFKSYMS +cmd_gensymtypes_S = \ + $(getasmexports) | \ + $(CC) $(c_flags) -c -o $(@:.o=.gendwarfksyms.o) -xc -; \ + $(call getexportsymbols,\1) | \ + $(gendwarfksyms) $(@:.o=.gendwarfksyms.o) +else +cmd_gensymtypes_S = \ + $(getasmexports) | \ + $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | $(genksyms) +endif # CONFIG_GENDWARFKSYMS quiet_cmd_cpp_s_S = CPP $(quiet_modtag) $@ cmd_cpp_s_S = $(CPP) $(a_flags) -o $@ $< diff --git a/scripts/Makefile.defconf b/scripts/Makefile.defconf index 226ea3df3b4b4..a44307f08e9d6 100644 --- a/scripts/Makefile.defconf +++ b/scripts/Makefile.defconf @@ -1,6 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 # Configuration heplers +cmd_merge_fragments = \ + $(srctree)/scripts/kconfig/merge_config.sh \ + $4 -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$2 \ + $(foreach config,$3,$(srctree)/arch/$(SRCARCH)/configs/$(config).config) + # Creates 'merged defconfigs' # --------------------------------------------------------------------------- # Usage: @@ -8,9 +13,7 @@ # # Input config fragments without '.config' suffix define merge_into_defconfig - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ - -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \ - $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config) + $(call cmd,merge_fragments,$1,$2) +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig endef @@ -22,8 +25,6 @@ endef # # Input config fragments without '.config' suffix define merge_into_defconfig_override - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ - -Q -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \ - $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config) + $(call cmd,merge_fragments,$1,$2,-Q) +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig endef diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 1d13cecc7cc78..eb719f6d8d536 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -77,6 +77,9 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # Warn if there is an enum types mismatch KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion) +# Explicitly clear padding bits during variable initialization +KBUILD_CFLAGS += $(call cc-option,-fzero-init-padding-bits=all) + KBUILD_CFLAGS += -Wextra KBUILD_CFLAGS += -Wunused diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 7395200538da8..ad55ef201aacb 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -287,6 +287,8 @@ delay-objtool := $(or $(CONFIG_LTO_CLANG),$(CONFIG_X86_KERNEL_IBT)) cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool-args) $@) cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) +objtool-enabled := y + endif # CONFIG_OBJTOOL # Useful for describing the dependency of composite objects @@ -302,11 +304,11 @@ endef # =========================================================================== # These are shared by some Makefile.* files. -objtool-enabled := y - ifdef CONFIG_LTO_CLANG -# objtool cannot process LLVM IR. Make $(LD) covert LLVM IR to ELF here. -cmd_ld_single = $(if $(objtool-enabled), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@) +# Run $(LD) here to covert LLVM IR to ELF in the following cases: +# - when this object needs objtool processing, as objtool cannot process LLVM IR +# - when this is a single-object module, as modpost cannot process LLVM IR +cmd_ld_single = $(if $(objtool-enabled)$(is-single-obj-m), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@) endif quiet_cmd_cc_o_c = CC $(quiet_modtag) $@ @@ -374,6 +376,9 @@ quiet_cmd_ar = AR $@ quiet_cmd_objcopy = OBJCOPY $@ cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@ +quiet_cmd_strip_relocs = RSTRIP $@ +cmd_strip_relocs = $(OBJCOPY) --remove-section='.rel*' $@ + # Gzip # --------------------------------------------------------------------------- diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index f97c9926ed31b..1628198f3e830 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -105,7 +105,7 @@ else sig-key := $(CONFIG_MODULE_SIG_KEY) endif quiet_cmd_sign = SIGN $@ - cmd_sign = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) "$(sig-key)" certs/signing_key.x509 $@ \ + cmd_sign = $(objtree)/scripts/sign-file $(CONFIG_MODULE_SIG_HASH) "$(sig-key)" $(objtree)/certs/signing_key.x509 $@ \ $(if $(KBUILD_EXTMOD),|| true) ifeq ($(sign-only),) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index ab0e94ea62496..d7d45067d08b9 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -43,6 +43,8 @@ MODPOST = $(objtree)/scripts/mod/modpost modpost-args = \ $(if $(CONFIG_MODULES),-M) \ $(if $(CONFIG_MODVERSIONS),-m) \ + $(if $(CONFIG_BASIC_MODVERSIONS),-b) \ + $(if $(CONFIG_EXTENDED_MODVERSIONS),-x) \ $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ $(if $(KBUILD_MODPOST_WARN),-w) \ diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py index 2f11c4f9c345a..13eb8b3901b8f 100644 --- a/scripts/gdb/linux/cpus.py +++ b/scripts/gdb/linux/cpus.py @@ -167,7 +167,7 @@ def get_current_task(cpu): var_ptr = gdb.parse_and_eval("&pcpu_hot.current_task") return per_cpu(var_ptr, cpu).dereference() elif utils.is_target_arch("aarch64"): - current_task_addr = gdb.parse_and_eval("$SP_EL0") + current_task_addr = gdb.parse_and_eval("(unsigned long)$SP_EL0") if (current_task_addr >> 63) != 0: current_task = current_task_addr.cast(task_ptr_type) return current_task.dereference() diff --git a/scripts/gendwarfksyms/.gitignore b/scripts/gendwarfksyms/.gitignore new file mode 100644 index 0000000000000..0927f8d3cd96c --- /dev/null +++ b/scripts/gendwarfksyms/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +/gendwarfksyms diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile new file mode 100644 index 0000000000000..6334c7d3c4d58 --- /dev/null +++ b/scripts/gendwarfksyms/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +hostprogs-always-y += gendwarfksyms + +gendwarfksyms-objs += gendwarfksyms.o +gendwarfksyms-objs += cache.o +gendwarfksyms-objs += die.o +gendwarfksyms-objs += dwarf.o +gendwarfksyms-objs += kabi.o +gendwarfksyms-objs += symbols.o +gendwarfksyms-objs += types.o + +HOSTLDLIBS_gendwarfksyms := -ldw -lelf -lz diff --git a/scripts/gendwarfksyms/cache.c b/scripts/gendwarfksyms/cache.c new file mode 100644 index 0000000000000..c9c19b86a686f --- /dev/null +++ b/scripts/gendwarfksyms/cache.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include "gendwarfksyms.h" + +struct cache_item { + unsigned long key; + int value; + struct hlist_node hash; +}; + +void cache_set(struct cache *cache, unsigned long key, int value) +{ + struct cache_item *ci; + + ci = xmalloc(sizeof(struct cache_item)); + ci->key = key; + ci->value = value; + hash_add(cache->cache, &ci->hash, hash_32(key)); +} + +int cache_get(struct cache *cache, unsigned long key) +{ + struct cache_item *ci; + + hash_for_each_possible(cache->cache, ci, hash, hash_32(key)) { + if (ci->key == key) + return ci->value; + } + + return -1; +} + +void cache_init(struct cache *cache) +{ + hash_init(cache->cache); +} + +void cache_free(struct cache *cache) +{ + struct hlist_node *tmp; + struct cache_item *ci; + + hash_for_each_safe(cache->cache, ci, tmp, hash) { + free(ci); + } + + hash_init(cache->cache); +} diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c new file mode 100644 index 0000000000000..66bd4c9bc9528 --- /dev/null +++ b/scripts/gendwarfksyms/die.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include +#include "gendwarfksyms.h" + +#define DIE_HASH_BITS 15 + +/* {die->addr, state} -> struct die * */ +static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS); + +static unsigned int map_hits; +static unsigned int map_misses; + +static inline unsigned int die_hash(uintptr_t addr, enum die_state state) +{ + return hash_32(addr_hash(addr) ^ (unsigned int)state); +} + +static void init_die(struct die *cd) +{ + cd->state = DIE_INCOMPLETE; + cd->mapped = false; + cd->fqn = NULL; + cd->tag = -1; + cd->addr = 0; + INIT_LIST_HEAD(&cd->fragments); +} + +static struct die *create_die(Dwarf_Die *die, enum die_state state) +{ + struct die *cd; + + cd = xmalloc(sizeof(struct die)); + init_die(cd); + cd->addr = (uintptr_t)die->addr; + + hash_add(die_map, &cd->hash, die_hash(cd->addr, state)); + return cd; +} + +int __die_map_get(uintptr_t addr, enum die_state state, struct die **res) +{ + struct die *cd; + + hash_for_each_possible(die_map, cd, hash, die_hash(addr, state)) { + if (cd->addr == addr && cd->state == state) { + *res = cd; + return 0; + } + } + + return -1; +} + +struct die *die_map_get(Dwarf_Die *die, enum die_state state) +{ + struct die *cd; + + if (__die_map_get((uintptr_t)die->addr, state, &cd) == 0) { + map_hits++; + return cd; + } + + map_misses++; + return create_die(die, state); +} + +static void reset_die(struct die *cd) +{ + struct die_fragment *tmp; + struct die_fragment *df; + + list_for_each_entry_safe(df, tmp, &cd->fragments, list) { + if (df->type == FRAGMENT_STRING) + free(df->data.str); + free(df); + } + + if (cd->fqn && *cd->fqn) + free(cd->fqn); + init_die(cd); +} + +void die_map_for_each(die_map_callback_t func, void *arg) +{ + struct hlist_node *tmp; + struct die *cd; + + hash_for_each_safe(die_map, cd, tmp, hash) { + func(cd, arg); + } +} + +void die_map_free(void) +{ + struct hlist_node *tmp; + unsigned int stats[DIE_LAST + 1]; + struct die *cd; + int i; + + memset(stats, 0, sizeof(stats)); + + hash_for_each_safe(die_map, cd, tmp, hash) { + stats[cd->state]++; + reset_die(cd); + free(cd); + } + hash_init(die_map); + + if (map_hits + map_misses > 0) + debug("hits %u, misses %u (hit rate %.02f%%)", map_hits, + map_misses, + (100.0f * map_hits) / (map_hits + map_misses)); + + for (i = 0; i <= DIE_LAST; i++) + debug("%s: %u entries", die_state_name(i), stats[i]); +} + +static struct die_fragment *append_item(struct die *cd) +{ + struct die_fragment *df; + + df = xmalloc(sizeof(struct die_fragment)); + df->type = FRAGMENT_EMPTY; + list_add_tail(&df->list, &cd->fragments); + return df; +} + +void die_map_add_string(struct die *cd, const char *str) +{ + struct die_fragment *df; + + if (!cd) + return; + + df = append_item(cd); + df->data.str = xstrdup(str); + df->type = FRAGMENT_STRING; +} + +void die_map_add_linebreak(struct die *cd, int linebreak) +{ + struct die_fragment *df; + + if (!cd) + return; + + df = append_item(cd); + df->data.linebreak = linebreak; + df->type = FRAGMENT_LINEBREAK; +} + +void die_map_add_die(struct die *cd, struct die *child) +{ + struct die_fragment *df; + + if (!cd) + return; + + df = append_item(cd); + df->data.addr = child->addr; + df->type = FRAGMENT_DIE; +} diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c new file mode 100644 index 0000000000000..534d9aa7c1147 --- /dev/null +++ b/scripts/gendwarfksyms/dwarf.c @@ -0,0 +1,1159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include +#include +#include +#include "gendwarfksyms.h" + +/* See get_union_kabi_status */ +#define KABI_PREFIX "__kabi_" +#define KABI_PREFIX_LEN (sizeof(KABI_PREFIX) - 1) +#define KABI_RESERVED_PREFIX "reserved" +#define KABI_RESERVED_PREFIX_LEN (sizeof(KABI_RESERVED_PREFIX) - 1) +#define KABI_RENAMED_PREFIX "renamed" +#define KABI_RENAMED_PREFIX_LEN (sizeof(KABI_RENAMED_PREFIX) - 1) +#define KABI_IGNORED_PREFIX "ignored" +#define KABI_IGNORED_PREFIX_LEN (sizeof(KABI_IGNORED_PREFIX) - 1) + +static inline bool is_kabi_prefix(const char *name) +{ + return name && !strncmp(name, KABI_PREFIX, KABI_PREFIX_LEN); +} + +enum kabi_status { + /* >0 to stop DIE processing */ + KABI_NORMAL = 1, + KABI_RESERVED, + KABI_IGNORED, +}; + +static bool do_linebreak; +static int indentation_level; + +/* Line breaks and indentation for pretty-printing */ +static void process_linebreak(struct die *cache, int n) +{ + indentation_level += n; + do_linebreak = true; + die_map_add_linebreak(cache, n); +} + +#define DEFINE_GET_ATTR(attr, type) \ + static bool get_##attr##_attr(Dwarf_Die *die, unsigned int id, \ + type *value) \ + { \ + Dwarf_Attribute da; \ + return dwarf_attr(die, id, &da) && \ + !dwarf_form##attr(&da, value); \ + } + +DEFINE_GET_ATTR(flag, bool) +DEFINE_GET_ATTR(udata, Dwarf_Word) + +static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value) +{ + Dwarf_Attribute da; + + /* dwarf_formref_die returns a pointer instead of an error value. */ + return dwarf_attr(die, id, &da) && dwarf_formref_die(&da, value); +} + +#define DEFINE_GET_STRING_ATTR(attr) \ + static const char *get_##attr##_attr(Dwarf_Die *die) \ + { \ + Dwarf_Attribute da; \ + if (dwarf_attr(die, DW_AT_##attr, &da)) \ + return dwarf_formstring(&da); \ + return NULL; \ + } + +DEFINE_GET_STRING_ATTR(name) +DEFINE_GET_STRING_ATTR(linkage_name) + +static const char *get_symbol_name(Dwarf_Die *die) +{ + const char *name; + + /* rustc uses DW_AT_linkage_name for exported symbols */ + name = get_linkage_name_attr(die); + if (!name) + name = get_name_attr(die); + + return name; +} + +static bool match_export_symbol(struct state *state, Dwarf_Die *die) +{ + Dwarf_Die *source = die; + Dwarf_Die origin; + + /* If the DIE has an abstract origin, use it for type information. */ + if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin)) + source = &origin; + + state->sym = symbol_get(get_symbol_name(die)); + + /* Look up using the origin name if there are no matches. */ + if (!state->sym && source != die) + state->sym = symbol_get(get_symbol_name(source)); + + state->die = *source; + return !!state->sym; +} + +/* DW_AT_decl_file -> struct srcfile */ +static struct cache srcfile_cache; + +static bool is_definition_private(Dwarf_Die *die) +{ + Dwarf_Word filenum; + Dwarf_Files *files; + Dwarf_Die cudie; + const char *s; + int res; + + /* + * Definitions in .c files cannot change the public ABI, + * so consider them private. + */ + if (!get_udata_attr(die, DW_AT_decl_file, &filenum)) + return false; + + res = cache_get(&srcfile_cache, filenum); + if (res >= 0) + return !!res; + + if (!dwarf_cu_die(die->cu, &cudie, NULL, NULL, NULL, NULL, NULL, NULL)) + error("dwarf_cu_die failed: '%s'", dwarf_errmsg(-1)); + + if (dwarf_getsrcfiles(&cudie, &files, NULL)) + error("dwarf_getsrcfiles failed: '%s'", dwarf_errmsg(-1)); + + s = dwarf_filesrc(files, filenum, NULL, NULL); + if (!s) + error("dwarf_filesrc failed: '%s'", dwarf_errmsg(-1)); + + s = strrchr(s, '.'); + res = s && !strcmp(s, ".c"); + cache_set(&srcfile_cache, filenum, res); + + return !!res; +} + +static bool is_kabi_definition(struct die *cache, Dwarf_Die *die) +{ + bool value; + + if (get_flag_attr(die, DW_AT_declaration, &value) && value) + return false; + + if (kabi_is_declonly(cache->fqn)) + return false; + + return !is_definition_private(die); +} + +/* + * Type string processing + */ +static void process(struct die *cache, const char *s) +{ + s = s ?: ""; + + if (dump_dies && do_linebreak) { + fputs("\n", stderr); + for (int i = 0; i < indentation_level; i++) + fputs(" ", stderr); + do_linebreak = false; + } + if (dump_dies) + fputs(s, stderr); + + if (cache) + die_debug_r("cache %p string '%s'", cache, s); + die_map_add_string(cache, s); +} + +#define MAX_FMT_BUFFER_SIZE 128 + +static void process_fmt(struct die *cache, const char *fmt, ...) +{ + char buf[MAX_FMT_BUFFER_SIZE]; + va_list args; + + va_start(args, fmt); + + if (checkp(vsnprintf(buf, sizeof(buf), fmt, args)) >= sizeof(buf)) + error("vsnprintf overflow: increase MAX_FMT_BUFFER_SIZE"); + + process(cache, buf); + va_end(args); +} + +#define MAX_FQN_SIZE 64 + +/* Get a fully qualified name from DWARF scopes */ +static char *get_fqn(Dwarf_Die *die) +{ + const char *list[MAX_FQN_SIZE]; + Dwarf_Die *scopes = NULL; + bool has_name = false; + char *fqn = NULL; + char *p; + int count = 0; + int len = 0; + int res; + int i; + + res = checkp(dwarf_getscopes_die(die, &scopes)); + if (!res) { + list[count] = get_name_attr(die); + + if (!list[count]) + return NULL; + + len += strlen(list[count]); + count++; + + goto done; + } + + for (i = res - 1; i >= 0 && count < MAX_FQN_SIZE; i--) { + if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit) + continue; + + list[count] = get_name_attr(&scopes[i]); + + if (list[count]) { + has_name = true; + } else { + list[count] = ""; + has_name = false; + } + + len += strlen(list[count]); + count++; + + if (i > 0) { + list[count++] = "::"; + len += 2; + } + } + + free(scopes); + + if (count == MAX_FQN_SIZE) + warn("increase MAX_FQN_SIZE: reached the maximum"); + + /* Consider the DIE unnamed if the last scope doesn't have a name */ + if (!has_name) + return NULL; +done: + fqn = xmalloc(len + 1); + *fqn = '\0'; + + p = fqn; + for (i = 0; i < count; i++) + p = stpcpy(p, list[i]); + + return fqn; +} + +static void update_fqn(struct die *cache, Dwarf_Die *die) +{ + if (!cache->fqn) + cache->fqn = get_fqn(die) ?: ""; +} + +static void process_fqn(struct die *cache, Dwarf_Die *die) +{ + update_fqn(cache, die); + if (*cache->fqn) + process(cache, " "); + process(cache, cache->fqn); +} + +#define DEFINE_PROCESS_UDATA_ATTRIBUTE(attribute) \ + static void process_##attribute##_attr(struct die *cache, \ + Dwarf_Die *die) \ + { \ + Dwarf_Word value; \ + if (get_udata_attr(die, DW_AT_##attribute, &value)) \ + process_fmt(cache, " " #attribute "(%" PRIu64 ")", \ + value); \ + } + +DEFINE_PROCESS_UDATA_ATTRIBUTE(accessibility) +DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment) +DEFINE_PROCESS_UDATA_ATTRIBUTE(bit_size) +DEFINE_PROCESS_UDATA_ATTRIBUTE(byte_size) +DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding) +DEFINE_PROCESS_UDATA_ATTRIBUTE(data_bit_offset) +DEFINE_PROCESS_UDATA_ATTRIBUTE(data_member_location) +DEFINE_PROCESS_UDATA_ATTRIBUTE(discr_value) + +/* Match functions -- die_match_callback_t */ +#define DEFINE_MATCH(type) \ + static bool match_##type##_type(Dwarf_Die *die) \ + { \ + return dwarf_tag(die) == DW_TAG_##type##_type; \ + } + +DEFINE_MATCH(enumerator) +DEFINE_MATCH(formal_parameter) +DEFINE_MATCH(member) +DEFINE_MATCH(subrange) + +bool match_all(Dwarf_Die *die) +{ + return true; +} + +int process_die_container(struct state *state, struct die *cache, + Dwarf_Die *die, die_callback_t func, + die_match_callback_t match) +{ + Dwarf_Die current; + int res; + + /* Track the first item in lists. */ + if (state) + state->first_list_item = true; + + res = checkp(dwarf_child(die, ¤t)); + while (!res) { + if (match(¤t)) { + /* <0 = error, 0 = continue, >0 = stop */ + res = checkp(func(state, cache, ¤t)); + if (res) + goto out; + } + + res = checkp(dwarf_siblingof(¤t, ¤t)); + } + + res = 0; +out: + if (state) + state->first_list_item = false; + + return res; +} + +static int process_type(struct state *state, struct die *parent, + Dwarf_Die *die); + +static void process_type_attr(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + Dwarf_Die type; + + if (get_ref_die_attr(die, DW_AT_type, &type)) { + check(process_type(state, cache, &type)); + return; + } + + /* Compilers can omit DW_AT_type -- print out 'void' to clarify */ + process(cache, "base_type void"); +} + +static void process_list_comma(struct state *state, struct die *cache) +{ + if (state->first_list_item) { + state->first_list_item = false; + } else { + process(cache, " ,"); + process_linebreak(cache, 0); + } +} + +/* Comma-separated with DW_AT_type */ +static void __process_list_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type) +{ + const char *name = get_name_attr(die); + + if (stable) { + if (is_kabi_prefix(name)) + name = NULL; + state->kabi.orig_name = NULL; + } + + process_list_comma(state, cache); + process(cache, type); + process_type_attr(state, cache, die); + + if (stable && state->kabi.orig_name) + name = state->kabi.orig_name; + if (name) { + process(cache, " "); + process(cache, name); + } + + process_accessibility_attr(cache, die); + process_bit_size_attr(cache, die); + process_data_bit_offset_attr(cache, die); + process_data_member_location_attr(cache, die); +} + +#define DEFINE_PROCESS_LIST_TYPE(type) \ + static void process_##type##_type(struct state *state, \ + struct die *cache, Dwarf_Die *die) \ + { \ + __process_list_type(state, cache, die, #type " "); \ + } + +DEFINE_PROCESS_LIST_TYPE(formal_parameter) +DEFINE_PROCESS_LIST_TYPE(member) + +/* Container types with DW_AT_type */ +static void __process_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type) +{ + process(cache, type); + process_fqn(cache, die); + process(cache, " {"); + process_linebreak(cache, 1); + process_type_attr(state, cache, die); + process_linebreak(cache, -1); + process(cache, "}"); + process_byte_size_attr(cache, die); + process_alignment_attr(cache, die); +} + +#define DEFINE_PROCESS_TYPE(type) \ + static void process_##type##_type(struct state *state, \ + struct die *cache, Dwarf_Die *die) \ + { \ + __process_type(state, cache, die, #type "_type"); \ + } + +DEFINE_PROCESS_TYPE(atomic) +DEFINE_PROCESS_TYPE(const) +DEFINE_PROCESS_TYPE(immutable) +DEFINE_PROCESS_TYPE(packed) +DEFINE_PROCESS_TYPE(pointer) +DEFINE_PROCESS_TYPE(reference) +DEFINE_PROCESS_TYPE(restrict) +DEFINE_PROCESS_TYPE(rvalue_reference) +DEFINE_PROCESS_TYPE(shared) +DEFINE_PROCESS_TYPE(template_type_parameter) +DEFINE_PROCESS_TYPE(volatile) +DEFINE_PROCESS_TYPE(typedef) + +static void process_subrange_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + Dwarf_Word count = 0; + + if (get_udata_attr(die, DW_AT_count, &count)) + process_fmt(cache, "[%" PRIu64 "]", count); + else if (get_udata_attr(die, DW_AT_upper_bound, &count)) + process_fmt(cache, "[%" PRIu64 "]", count + 1); + else + process(cache, "[]"); +} + +static void process_array_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process(cache, "array_type"); + /* Array size */ + check(process_die_container(state, cache, die, process_type, + match_subrange_type)); + process(cache, " {"); + process_linebreak(cache, 1); + process_type_attr(state, cache, die); + process_linebreak(cache, -1); + process(cache, "}"); +} + +static void __process_subroutine_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type) +{ + process(cache, type); + process(cache, " ("); + process_linebreak(cache, 1); + /* Parameters */ + check(process_die_container(state, cache, die, process_type, + match_formal_parameter_type)); + process_linebreak(cache, -1); + process(cache, ")"); + process_linebreak(cache, 0); + /* Return type */ + process(cache, "-> "); + process_type_attr(state, cache, die); +} + +static void process_subroutine_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + __process_subroutine_type(state, cache, die, "subroutine_type"); +} + +static void process_variant_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process_list_comma(state, cache); + process(cache, "variant {"); + process_linebreak(cache, 1); + check(process_die_container(state, cache, die, process_type, + match_member_type)); + process_linebreak(cache, -1); + process(cache, "}"); + process_discr_value_attr(cache, die); +} + +static void process_variant_part_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process_list_comma(state, cache); + process(cache, "variant_part {"); + process_linebreak(cache, 1); + check(process_die_container(state, cache, die, process_type, + match_all)); + process_linebreak(cache, -1); + process(cache, "}"); +} + +static int get_kabi_status(Dwarf_Die *die, const char **suffix) +{ + const char *name = get_name_attr(die); + + if (suffix) + *suffix = NULL; + + if (is_kabi_prefix(name)) { + name += KABI_PREFIX_LEN; + + if (!strncmp(name, KABI_RESERVED_PREFIX, + KABI_RESERVED_PREFIX_LEN)) + return KABI_RESERVED; + if (!strncmp(name, KABI_IGNORED_PREFIX, + KABI_IGNORED_PREFIX_LEN)) + return KABI_IGNORED; + + if (!strncmp(name, KABI_RENAMED_PREFIX, + KABI_RENAMED_PREFIX_LEN)) { + if (suffix) { + name += KABI_RENAMED_PREFIX_LEN; + *suffix = name; + } + return KABI_RESERVED; + } + } + + return KABI_NORMAL; +} + +static int check_struct_member_kabi_status(struct state *state, + struct die *__unused, Dwarf_Die *die) +{ + int res; + + assert(dwarf_tag(die) == DW_TAG_member_type); + + /* + * If the union member is a struct, expect the __kabi field to + * be the first member of the structure, i.e..: + * + * union { + * type new_member; + * struct { + * type __kabi_field; + * } + * }; + */ + res = get_kabi_status(die, &state->kabi.orig_name); + + if (res == KABI_RESERVED && + !get_ref_die_attr(die, DW_AT_type, &state->kabi.placeholder)) + error("structure member missing a type?"); + + return res; +} + +static int check_union_member_kabi_status(struct state *state, + struct die *__unused, Dwarf_Die *die) +{ + Dwarf_Die type; + int res; + + assert(dwarf_tag(die) == DW_TAG_member_type); + + if (!get_ref_die_attr(die, DW_AT_type, &type)) + error("union member missing a type?"); + + /* + * We expect a union with two members. Check if either of them + * has a __kabi name prefix, i.e.: + * + * union { + * ... + * type memberN; // <- type, N = {0,1} + * ... + * }; + * + * The member can also be a structure type, in which case we'll + * check the first structure member. + * + * In any case, stop processing after we've seen two members. + */ + res = get_kabi_status(die, &state->kabi.orig_name); + + if (res == KABI_RESERVED) + state->kabi.placeholder = type; + if (res != KABI_NORMAL) + return res; + + if (dwarf_tag(&type) == DW_TAG_structure_type) + res = checkp(process_die_container( + state, NULL, &type, check_struct_member_kabi_status, + match_member_type)); + + if (res <= KABI_NORMAL && ++state->kabi.members < 2) + return 0; /* Continue */ + + return res; +} + +static int get_union_kabi_status(Dwarf_Die *die, Dwarf_Die *placeholder, + const char **orig_name) +{ + struct state state; + int res; + + if (!stable) + return KABI_NORMAL; + + /* + * To maintain a stable kABI, distributions may choose to reserve + * space in structs for later use by adding placeholder members, + * for example: + * + * struct s { + * u32 a; + * // an 8-byte placeholder for future use + * u64 __kabi_reserved_0; + * }; + * + * When the reserved member is taken into use, the type change + * would normally cause the symbol version to change as well, but + * if the replacement uses the following convention, gendwarfksyms + * continues to use the placeholder type for versioning instead, + * thus maintaining the same symbol version: + * + * struct s { + * u32 a; + * union { + * // placeholder replaced with a new member `b` + * struct t b; + * struct { + * // the placeholder type that is still + * // used for versioning + * u64 __kabi_reserved_0; + * }; + * }; + * }; + * + * I.e., as long as the replaced member is in a union, and the + * placeholder has a __kabi_reserved name prefix, we'll continue + * to use the placeholder type (here u64) for version calculation + * instead of the union type. + * + * It's also possible to ignore new members from versioning if + * they've been added to alignment holes, for example, by + * including them in a union with another member that uses the + * __kabi_ignored name prefix: + * + * struct s { + * u32 a; + * // an alignment hole is used to add `n` + * union { + * u32 n; + * // hide the entire union member from versioning + * u8 __kabi_ignored_0; + * }; + * u64 b; + * }; + * + * Note that the user of this feature is responsible for ensuring + * that the structure actually remains ABI compatible. + */ + memset(&state.kabi, 0, sizeof(struct kabi_state)); + + res = checkp(process_die_container(&state, NULL, die, + check_union_member_kabi_status, + match_member_type)); + + if (res == KABI_RESERVED) { + if (placeholder) + *placeholder = state.kabi.placeholder; + if (orig_name) + *orig_name = state.kabi.orig_name; + } + + return res; +} + +static bool is_kabi_ignored(Dwarf_Die *die) +{ + Dwarf_Die type; + + if (!stable) + return false; + + if (!get_ref_die_attr(die, DW_AT_type, &type)) + error("member missing a type?"); + + return dwarf_tag(&type) == DW_TAG_union_type && + checkp(get_union_kabi_status(&type, NULL, NULL)) == KABI_IGNORED; +} + +static int ___process_structure_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + switch (dwarf_tag(die)) { + case DW_TAG_member: + if (is_kabi_ignored(die)) + return 0; + return check(process_type(state, cache, die)); + case DW_TAG_variant_part: + return check(process_type(state, cache, die)); + case DW_TAG_class_type: + case DW_TAG_enumeration_type: + case DW_TAG_structure_type: + case DW_TAG_template_type_parameter: + case DW_TAG_union_type: + case DW_TAG_subprogram: + /* Skip non-member types, including member functions */ + return 0; + default: + error("unexpected structure_type child: %x", dwarf_tag(die)); + } +} + +static void __process_structure_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type, + die_callback_t process_func, + die_match_callback_t match_func) +{ + bool expand; + + process(cache, type); + process_fqn(cache, die); + process(cache, " {"); + process_linebreak(cache, 1); + + expand = state->expand.expand && is_kabi_definition(cache, die); + + if (expand) { + state->expand.current_fqn = cache->fqn; + check(process_die_container(state, cache, die, process_func, + match_func)); + } + + process_linebreak(cache, -1); + process(cache, "}"); + + if (expand) { + process_byte_size_attr(cache, die); + process_alignment_attr(cache, die); + } +} + +#define DEFINE_PROCESS_STRUCTURE_TYPE(structure) \ + static void process_##structure##_type( \ + struct state *state, struct die *cache, Dwarf_Die *die) \ + { \ + __process_structure_type(state, cache, die, \ + #structure "_type", \ + ___process_structure_type, \ + match_all); \ + } + +DEFINE_PROCESS_STRUCTURE_TYPE(class) +DEFINE_PROCESS_STRUCTURE_TYPE(structure) + +static void process_union_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + Dwarf_Die placeholder; + + int res = checkp(get_union_kabi_status(die, &placeholder, + &state->kabi.orig_name)); + + if (res == KABI_RESERVED) + check(process_type(state, cache, &placeholder)); + if (res > KABI_NORMAL) + return; + + __process_structure_type(state, cache, die, "union_type", + ___process_structure_type, match_all); +} + +static void process_enumerator_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + bool overridden = false; + Dwarf_Word value; + + if (stable) { + /* Get the fqn before we process anything */ + update_fqn(cache, die); + + if (kabi_is_enumerator_ignored(state->expand.current_fqn, + cache->fqn)) + return; + + overridden = kabi_get_enumerator_value( + state->expand.current_fqn, cache->fqn, &value); + } + + process_list_comma(state, cache); + process(cache, "enumerator"); + process_fqn(cache, die); + + if (overridden || get_udata_attr(die, DW_AT_const_value, &value)) { + process(cache, " = "); + process_fmt(cache, "%" PRIu64, value); + } +} + +static void process_enumeration_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + __process_structure_type(state, cache, die, "enumeration_type", + process_type, match_enumerator_type); +} + +static void process_base_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process(cache, "base_type"); + process_fqn(cache, die); + process_byte_size_attr(cache, die); + process_encoding_attr(cache, die); + process_alignment_attr(cache, die); +} + +static void process_unspecified_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + /* + * These can be emitted for stand-alone assembly code, which means we + * might run into them in vmlinux.o. + */ + process(cache, "unspecified_type"); +} + +static void process_cached(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + struct die_fragment *df; + Dwarf_Die child; + + list_for_each_entry(df, &cache->fragments, list) { + switch (df->type) { + case FRAGMENT_STRING: + die_debug_b("cache %p STRING '%s'", cache, + df->data.str); + process(NULL, df->data.str); + break; + case FRAGMENT_LINEBREAK: + process_linebreak(NULL, df->data.linebreak); + break; + case FRAGMENT_DIE: + if (!dwarf_die_addr_die(dwarf_cu_getdwarf(die->cu), + (void *)df->data.addr, &child)) + error("dwarf_die_addr_die failed"); + die_debug_b("cache %p DIE addr %" PRIxPTR " tag %x", + cache, df->data.addr, dwarf_tag(&child)); + check(process_type(state, NULL, &child)); + break; + default: + error("empty die_fragment"); + } + } +} + +static void state_init(struct state *state) +{ + state->expand.expand = true; + state->expand.current_fqn = NULL; + cache_init(&state->expansion_cache); +} + +static void expansion_state_restore(struct expansion_state *state, + struct expansion_state *saved) +{ + state->expand = saved->expand; + state->current_fqn = saved->current_fqn; +} + +static void expansion_state_save(struct expansion_state *state, + struct expansion_state *saved) +{ + expansion_state_restore(saved, state); +} + +static bool is_expanded_type(int tag) +{ + return tag == DW_TAG_class_type || tag == DW_TAG_structure_type || + tag == DW_TAG_union_type || tag == DW_TAG_enumeration_type; +} + +#define PROCESS_TYPE(type) \ + case DW_TAG_##type##_type: \ + process_##type##_type(state, cache, die); \ + break; + +static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) +{ + enum die_state want_state = DIE_COMPLETE; + struct die *cache; + struct expansion_state saved; + int tag = dwarf_tag(die); + + expansion_state_save(&state->expand, &saved); + + /* + * Structures and enumeration types are expanded only once per + * exported symbol. This is sufficient for detecting ABI changes + * within the structure. + */ + if (is_expanded_type(tag)) { + if (cache_was_expanded(&state->expansion_cache, die->addr)) + state->expand.expand = false; + + if (state->expand.expand) + cache_mark_expanded(&state->expansion_cache, die->addr); + else + want_state = DIE_UNEXPANDED; + } + + /* + * If we have want_state already cached, use it instead of walking + * through DWARF. + */ + cache = die_map_get(die, want_state); + + if (cache->state == want_state) { + die_debug_g("cached addr %p tag %x -- %s", die->addr, tag, + die_state_name(cache->state)); + + process_cached(state, cache, die); + die_map_add_die(parent, cache); + + expansion_state_restore(&state->expand, &saved); + return 0; + } + + die_debug_g("addr %p tag %x -- %s -> %s", die->addr, tag, + die_state_name(cache->state), die_state_name(want_state)); + + switch (tag) { + /* Type modifiers */ + PROCESS_TYPE(atomic) + PROCESS_TYPE(const) + PROCESS_TYPE(immutable) + PROCESS_TYPE(packed) + PROCESS_TYPE(pointer) + PROCESS_TYPE(reference) + PROCESS_TYPE(restrict) + PROCESS_TYPE(rvalue_reference) + PROCESS_TYPE(shared) + PROCESS_TYPE(volatile) + /* Container types */ + PROCESS_TYPE(class) + PROCESS_TYPE(structure) + PROCESS_TYPE(union) + PROCESS_TYPE(enumeration) + /* Subtypes */ + PROCESS_TYPE(enumerator) + PROCESS_TYPE(formal_parameter) + PROCESS_TYPE(member) + PROCESS_TYPE(subrange) + PROCESS_TYPE(template_type_parameter) + PROCESS_TYPE(variant) + PROCESS_TYPE(variant_part) + /* Other types */ + PROCESS_TYPE(array) + PROCESS_TYPE(base) + PROCESS_TYPE(subroutine) + PROCESS_TYPE(typedef) + PROCESS_TYPE(unspecified) + default: + error("unexpected type: %x", tag); + } + + die_debug_r("parent %p cache %p die addr %p tag %x", parent, cache, + die->addr, tag); + + /* Update cache state and append to the parent (if any) */ + cache->tag = tag; + cache->state = want_state; + die_map_add_die(parent, cache); + + expansion_state_restore(&state->expand, &saved); + return 0; +} + +/* + * Exported symbol processing + */ +static struct die *get_symbol_cache(struct state *state, Dwarf_Die *die) +{ + struct die *cache; + + cache = die_map_get(die, DIE_SYMBOL); + + if (cache->state != DIE_INCOMPLETE) + return NULL; /* We already processed a symbol for this DIE */ + + cache->tag = dwarf_tag(die); + return cache; +} + +static void process_symbol(struct state *state, Dwarf_Die *die, + die_callback_t process_func) +{ + struct die *cache; + + symbol_set_die(state->sym, die); + + cache = get_symbol_cache(state, die); + if (!cache) + return; + + debug("%s", state->sym->name); + check(process_func(state, cache, die)); + cache->state = DIE_SYMBOL; + if (dump_dies) + fputs("\n", stderr); +} + +static int __process_subprogram(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + __process_subroutine_type(state, cache, die, "subprogram"); + return 0; +} + +static void process_subprogram(struct state *state, Dwarf_Die *die) +{ + process_symbol(state, die, __process_subprogram); +} + +static int __process_variable(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process(cache, "variable "); + process_type_attr(state, cache, die); + return 0; +} + +static void process_variable(struct state *state, Dwarf_Die *die) +{ + process_symbol(state, die, __process_variable); +} + +static void save_symbol_ptr(struct state *state) +{ + Dwarf_Die ptr_type; + Dwarf_Die type; + + if (!get_ref_die_attr(&state->die, DW_AT_type, &ptr_type) || + dwarf_tag(&ptr_type) != DW_TAG_pointer_type) + error("%s must be a pointer type!", + get_symbol_name(&state->die)); + + if (!get_ref_die_attr(&ptr_type, DW_AT_type, &type)) + error("%s pointer missing a type attribute?", + get_symbol_name(&state->die)); + + /* + * Save the symbol pointer DIE in case the actual symbol is + * missing from the DWARF. Clang, for example, intentionally + * omits external symbols from the debugging information. + */ + if (dwarf_tag(&type) == DW_TAG_subroutine_type) + symbol_set_ptr(state->sym, &type); + else + symbol_set_ptr(state->sym, &ptr_type); +} + +static int process_exported_symbols(struct state *unused, struct die *cache, + Dwarf_Die *die) +{ + int tag = dwarf_tag(die); + + switch (tag) { + /* Possible containers of exported symbols */ + case DW_TAG_namespace: + case DW_TAG_class_type: + case DW_TAG_structure_type: + return check(process_die_container( + NULL, cache, die, process_exported_symbols, match_all)); + + /* Possible exported symbols */ + case DW_TAG_subprogram: + case DW_TAG_variable: { + struct state state; + + if (!match_export_symbol(&state, die)) + return 0; + + state_init(&state); + + if (is_symbol_ptr(get_symbol_name(&state.die))) + save_symbol_ptr(&state); + else if (tag == DW_TAG_subprogram) + process_subprogram(&state, &state.die); + else + process_variable(&state, &state.die); + + cache_free(&state.expansion_cache); + return 0; + } + default: + return 0; + } +} + +static void process_symbol_ptr(struct symbol *sym, void *arg) +{ + struct state state; + Dwarf *dwarf = arg; + + if (sym->state != SYMBOL_UNPROCESSED || !sym->ptr_die_addr) + return; + + debug("%s", sym->name); + state_init(&state); + state.sym = sym; + + if (!dwarf_die_addr_die(dwarf, (void *)sym->ptr_die_addr, &state.die)) + error("dwarf_die_addr_die failed for symbol ptr: '%s'", + sym->name); + + if (dwarf_tag(&state.die) == DW_TAG_subroutine_type) + process_subprogram(&state, &state.die); + else + process_variable(&state, &state.die); + + cache_free(&state.expansion_cache); +} + +void process_cu(Dwarf_Die *cudie) +{ + check(process_die_container(NULL, NULL, cudie, process_exported_symbols, + match_all)); + + symbol_for_each(process_symbol_ptr, dwarf_cu_getdwarf(cudie->cu)); + + cache_free(&srcfile_cache); +} diff --git a/scripts/gendwarfksyms/examples/kabi.h b/scripts/gendwarfksyms/examples/kabi.h new file mode 100644 index 0000000000000..97a5669b083d7 --- /dev/null +++ b/scripts/gendwarfksyms/examples/kabi.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Google LLC + * + * Example macros for maintaining kABI stability. + * + * This file is based on android_kabi.h, which has the following notice: + * + * Heavily influenced by rh_kabi.h which came from the RHEL/CENTOS kernel + * and was: + * Copyright (c) 2014 Don Zickus + * Copyright (c) 2015-2018 Jiri Benc + * Copyright (c) 2015 Sabrina Dubroca, Hannes Frederic Sowa + * Copyright (c) 2016-2018 Prarit Bhargava + * Copyright (c) 2017 Paolo Abeni, Larry Woodman + */ + +#ifndef __KABI_H__ +#define __KABI_H__ + +/* Kernel macros for userspace testing. */ +#ifndef __aligned +#define __aligned(x) __attribute__((__aligned__(x))) +#endif +#ifndef __used +#define __used __attribute__((__used__)) +#endif +#ifndef __section +#define __section(section) __attribute__((__section__(section))) +#endif +#ifndef __PASTE +#define ___PASTE(a, b) a##b +#define __PASTE(a, b) ___PASTE(a, b) +#endif +#ifndef __stringify +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) +#endif + +#define __KABI_RULE(hint, target, value) \ + static const char __PASTE(__gendwarfksyms_rule_, \ + __COUNTER__)[] __used __aligned(1) \ + __section(".discard.gendwarfksyms.kabi_rules") = \ + "1\0" #hint "\0" #target "\0" #value + +#define __KABI_NORMAL_SIZE_ALIGN(_orig, _new) \ + union { \ + _Static_assert( \ + sizeof(struct { _new; }) <= sizeof(struct { _orig; }), \ + __FILE__ ":" __stringify(__LINE__) ": " __stringify( \ + _new) " is larger than " __stringify(_orig)); \ + _Static_assert( \ + __alignof__(struct { _new; }) <= \ + __alignof__(struct { _orig; }), \ + __FILE__ ":" __stringify(__LINE__) ": " __stringify( \ + _orig) " is not aligned the same as " __stringify(_new)); \ + } + +#define __KABI_REPLACE(_orig, _new) \ + union { \ + _new; \ + struct { \ + _orig; \ + }; \ + __KABI_NORMAL_SIZE_ALIGN(_orig, _new); \ + } + +/* + * KABI_DECLONLY(fqn) + * Treat the struct/union/enum fqn as a declaration, i.e. even if + * a definition is available, don't expand the contents. + */ +#define KABI_DECLONLY(fqn) __KABI_RULE(declonly, fqn, ) + +/* + * KABI_ENUMERATOR_IGNORE(fqn, field) + * When expanding enum fqn, skip the provided field. This makes it + * possible to hide added enum fields from versioning. + */ +#define KABI_ENUMERATOR_IGNORE(fqn, field) \ + __KABI_RULE(enumerator_ignore, fqn field, ) + +/* + * KABI_ENUMERATOR_VALUE(fqn, field, value) + * When expanding enum fqn, use the provided value for the + * specified field. This makes it possible to override enumerator + * values when calculating versions. + */ +#define KABI_ENUMERATOR_VALUE(fqn, field, value) \ + __KABI_RULE(enumerator_value, fqn field, value) + +/* + * KABI_RESERVE + * Reserve some "padding" in a structure for use by LTS backports. + * This is normally placed at the end of a structure. + * number: the "number" of the padding variable in the structure. Start with + * 1 and go up. + */ +#define KABI_RESERVE(n) unsigned long __kabi_reserved##n + +/* + * KABI_RESERVE_ARRAY + * Same as _BACKPORT_RESERVE but allocates an array with the specified + * size in bytes. + */ +#define KABI_RESERVE_ARRAY(n, s) \ + unsigned char __aligned(8) __kabi_reserved##n[s] + +/* + * KABI_IGNORE + * Add a new field that's ignored in versioning. + */ +#define KABI_IGNORE(n, _new) \ + union { \ + _new; \ + unsigned char __kabi_ignored##n; \ + } + +/* + * KABI_REPLACE + * Replace a field with a compatible new field. + */ +#define KABI_REPLACE(_oldtype, _oldname, _new) \ + __KABI_REPLACE(_oldtype __kabi_renamed##_oldname, struct { _new; }) + +/* + * KABI_USE(number, _new) + * Use a previous padding entry that was defined with KABI_RESERVE + * number: the previous "number" of the padding variable + * _new: the variable to use now instead of the padding variable + */ +#define KABI_USE(number, _new) __KABI_REPLACE(KABI_RESERVE(number), _new) + +/* + * KABI_USE2(number, _new1, _new2) + * Use a previous padding entry that was defined with KABI_RESERVE for + * two new variables that fit into 64 bits. This is good for when you do not + * want to "burn" a 64bit padding variable for a smaller variable size if not + * needed. + */ +#define KABI_USE2(number, _new1, _new2) \ + __KABI_REPLACE( \ + KABI_RESERVE(number), struct { \ + _new1; \ + _new2; \ + }) +/* + * KABI_USE_ARRAY(number, bytes, _new) + * Use a previous padding entry that was defined with KABI_RESERVE_ARRAY + * number: the previous "number" of the padding variable + * bytes: the size in bytes reserved for the array + * _new: the variable to use now instead of the padding variable + */ +#define KABI_USE_ARRAY(number, bytes, _new) \ + __KABI_REPLACE(KABI_RESERVE_ARRAY(number, bytes), _new) + +#endif /* __KABI_H__ */ diff --git a/scripts/gendwarfksyms/examples/kabi_ex.c b/scripts/gendwarfksyms/examples/kabi_ex.c new file mode 100644 index 0000000000000..0b7ffd830541d --- /dev/null +++ b/scripts/gendwarfksyms/examples/kabi_ex.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * kabi_ex.c + * + * Copyright (C) 2024 Google LLC + * + * Examples for kABI stability features with --stable. See kabi_ex.h + * for details. + */ + +#include "kabi_ex.h" + +struct s e0; +enum e e1; + +struct ex0a ex0a; +struct ex0b ex0b; +struct ex0c ex0c; + +struct ex1a ex1a; +struct ex1b ex1b; +struct ex1c ex1c; + +struct ex2a ex2a; +struct ex2b ex2b; +struct ex2c ex2c; + +struct ex3a ex3a; +struct ex3b ex3b; +struct ex3c ex3c; diff --git a/scripts/gendwarfksyms/examples/kabi_ex.h b/scripts/gendwarfksyms/examples/kabi_ex.h new file mode 100644 index 0000000000000..1736e0f652081 --- /dev/null +++ b/scripts/gendwarfksyms/examples/kabi_ex.h @@ -0,0 +1,263 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * kabi_ex.h + * + * Copyright (C) 2024 Google LLC + * + * Examples for kABI stability features with --stable. + */ + +/* + * The comments below each example contain the expected gendwarfksyms + * output, which can be verified using LLVM's FileCheck tool: + * + * https://llvm.org/docs/CommandGuide/FileCheck.html + * + * Usage: + * + * $ gcc -g -c examples/kabi_ex.c -o examples/kabi_ex.o + * + * $ nm examples/kabi_ex.o | awk '{ print $NF }' | \ + * ./gendwarfksyms --stable --dump-dies \ + * examples/kabi_ex.o 2>&1 >/dev/null | \ + * FileCheck examples/kabi_ex.h --check-prefix=STABLE + */ + +#ifndef __KABI_EX_H__ +#define __KABI_EX_H__ + +#include "kabi.h" + +/* + * Example: kABI rules + */ + +struct s { + int a; +}; + +KABI_DECLONLY(s); + +/* + * STABLE: variable structure_type s { + * STABLE-NEXT: } + */ + +enum e { + A, + B, + C, + D, +}; + +KABI_ENUMERATOR_IGNORE(e, B); +KABI_ENUMERATOR_IGNORE(e, C); +KABI_ENUMERATOR_VALUE(e, D, 123456789); + +/* + * STABLE: variable enumeration_type e { + * STABLE-NEXT: enumerator A = 0 , + * STABLE-NEXT: enumerator D = 123456789 + * STABLE-NEXT: } byte_size(4) +*/ + +/* + * Example: Reserved fields + */ +struct ex0a { + int a; + KABI_RESERVE(0); + KABI_RESERVE(1); +}; + +/* + * STABLE: variable structure_type ex0a { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG:long unsigned int|unsigned long]] byte_size(8) encoding(7) data_member_location(8) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(16) + * STABLE-NEXT: } byte_size(24) + */ + +struct ex0b { + int a; + KABI_RESERVE(0); + KABI_USE2(1, int b, int c); +}; + +/* + * STABLE: variable structure_type ex0b { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(8) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(16) + * STABLE-NEXT: } byte_size(24) + */ + +struct ex0c { + int a; + KABI_USE(0, void *p); + KABI_USE2(1, int b, int c); +}; + +/* + * STABLE: variable structure_type ex0c { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(8) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(16) + * STABLE-NEXT: } byte_size(24) + */ + +/* + * Example: A reserved array + */ + +struct ex1a { + unsigned int a; + KABI_RESERVE_ARRAY(0, 64); +}; + +/* + * STABLE: variable structure_type ex1a { + * STABLE-NEXT: member base_type unsigned int byte_size(4) encoding(7) a data_member_location(0) , + * STABLE-NEXT: member array_type[64] { + * STABLE-NEXT: base_type unsigned char byte_size(1) encoding(8) + * STABLE-NEXT: } data_member_location(8) + * STABLE-NEXT: } byte_size(72) + */ + +struct ex1b { + unsigned int a; + KABI_USE_ARRAY( + 0, 64, struct { + void *p; + KABI_RESERVE_ARRAY(1, 56); + }); +}; + +/* + * STABLE: variable structure_type ex1b { + * STABLE-NEXT: member base_type unsigned int byte_size(4) encoding(7) a data_member_location(0) , + * STABLE-NEXT: member array_type[64] { + * STABLE-NEXT: base_type unsigned char byte_size(1) encoding(8) + * STABLE-NEXT: } data_member_location(8) + * STABLE-NEXT: } byte_size(72) + */ + +struct ex1c { + unsigned int a; + KABI_USE_ARRAY(0, 64, void *p[8]); +}; + +/* + * STABLE: variable structure_type ex1c { + * STABLE-NEXT: member base_type unsigned int byte_size(4) encoding(7) a data_member_location(0) , + * STABLE-NEXT: member array_type[64] { + * STABLE-NEXT: base_type unsigned char byte_size(1) encoding(8) + * STABLE-NEXT: } data_member_location(8) + * STABLE-NEXT: } byte_size(72) + */ + +/* + * Example: An ignored field added to an alignment hole + */ + +struct ex2a { + int a; + unsigned long b; + int c; + unsigned long d; +}; + +/* + * STABLE: variable structure_type ex2a { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG:long unsigned int|unsigned long]] byte_size(8) encoding(7) b data_member_location(8) + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) c data_member_location(16) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) d data_member_location(24) + * STABLE-NEXT: } byte_size(32) + */ + +struct ex2b { + int a; + KABI_IGNORE(0, unsigned int n); + unsigned long b; + int c; + unsigned long d; +}; + +_Static_assert(sizeof(struct ex2a) == sizeof(struct ex2b), "ex2a size doesn't match ex2b"); + +/* + * STABLE: variable structure_type ex2b { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) b data_member_location(8) + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) c data_member_location(16) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) d data_member_location(24) + * STABLE-NEXT: } byte_size(32) + */ + +struct ex2c { + int a; + KABI_IGNORE(0, unsigned int n); + unsigned long b; + int c; + KABI_IGNORE(1, unsigned int m); + unsigned long d; +}; + +_Static_assert(sizeof(struct ex2a) == sizeof(struct ex2c), "ex2a size doesn't match ex2c"); + +/* + * STABLE: variable structure_type ex2c { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) b data_member_location(8) + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) c data_member_location(16) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) d data_member_location(24) + * STABLE-NEXT: } byte_size(32) + */ + + +/* + * Example: A replaced field + */ + +struct ex3a { + unsigned long a; + unsigned long unused; +}; + +/* + * STABLE: variable structure_type ex3a { + * STABLE-NEXT: member base_type [[ULONG:long unsigned int|unsigned long]] byte_size(8) encoding(7) a data_member_location(0) + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) unused data_member_location(8) + * STABLE-NEXT: } byte_size(16) + */ + +struct ex3b { + unsigned long a; + KABI_REPLACE(unsigned long, unused, unsigned long renamed); +}; + +_Static_assert(sizeof(struct ex3a) == sizeof(struct ex3b), "ex3a size doesn't match ex3b"); + +/* + * STABLE: variable structure_type ex3b { + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) a data_member_location(0) + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) unused data_member_location(8) + * STABLE-NEXT: } byte_size(16) + */ + +struct ex3c { + unsigned long a; + KABI_REPLACE(unsigned long, unused, long replaced); +}; + +_Static_assert(sizeof(struct ex3a) == sizeof(struct ex3c), "ex3a size doesn't match ex3c"); + +/* + * STABLE: variable structure_type ex3c { + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) a data_member_location(0) + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) unused data_member_location(8) + * STABLE-NEXT: } byte_size(16) + */ + +#endif /* __KABI_EX_H__ */ diff --git a/scripts/gendwarfksyms/examples/symbolptr.c b/scripts/gendwarfksyms/examples/symbolptr.c new file mode 100644 index 0000000000000..88bc1bd60da86 --- /dev/null +++ b/scripts/gendwarfksyms/examples/symbolptr.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + * + * Example for symbol pointers. When compiled with Clang, gendwarfkyms + * uses a symbol pointer for `f`. + * + * $ clang -g -c examples/symbolptr.c -o examples/symbolptr.o + * $ echo -e "f\ng\np" | ./gendwarfksyms -d examples/symbolptr.o + */ + +/* Kernel macros for userspace testing. */ +#ifndef __used +#define __used __attribute__((__used__)) +#endif +#ifndef __section +#define __section(section) __attribute__((__section__(section))) +#endif + +#define __GENDWARFKSYMS_EXPORT(sym) \ + static typeof(sym) *__gendwarfksyms_ptr_##sym __used \ + __section(".discard.gendwarfksyms") = &sym; + +extern void f(unsigned int arg); +void g(int *arg); +void g(int *arg) {} + +struct s; +extern struct s *p; + +__GENDWARFKSYMS_EXPORT(f); +__GENDWARFKSYMS_EXPORT(g); +__GENDWARFKSYMS_EXPORT(p); diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c new file mode 100644 index 0000000000000..08ae61eb327ea --- /dev/null +++ b/scripts/gendwarfksyms/gendwarfksyms.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include +#include +#include +#include +#include +#include +#include "gendwarfksyms.h" + +/* + * Options + */ + +/* Print debugging information to stderr */ +int debug; +/* Dump DIE contents */ +int dump_dies; +/* Print debugging information about die_map changes */ +int dump_die_map; +/* Print out type strings (i.e. type_map) */ +int dump_types; +/* Print out expanded type strings used for symbol versions */ +int dump_versions; +/* Support kABI stability features */ +int stable; +/* Write a symtypes file */ +int symtypes; +static const char *symtypes_file; + +static void usage(void) +{ + fputs("Usage: gendwarfksyms [options] elf-object-file ... < symbol-list\n\n" + "Options:\n" + " -d, --debug Print debugging information\n" + " --dump-dies Dump DWARF DIE contents\n" + " --dump-die-map Print debugging information about die_map changes\n" + " --dump-types Dump type strings\n" + " --dump-versions Dump expanded type strings used for symbol versions\n" + " -s, --stable Support kABI stability features\n" + " -T, --symtypes file Write a symtypes file\n" + " -h, --help Print this message\n" + "\n", + stderr); +} + +static int process_module(Dwfl_Module *mod, void **userdata, const char *name, + Dwarf_Addr base, void *arg) +{ + Dwarf_Addr dwbias; + Dwarf_Die cudie; + Dwarf_CU *cu = NULL; + Dwarf *dbg; + FILE *symfile = arg; + int res; + + debug("%s", name); + dbg = dwfl_module_getdwarf(mod, &dwbias); + + /* + * Look for exported symbols in each CU, follow the DIE tree, and add + * the entries to die_map. + */ + do { + res = dwarf_get_units(dbg, cu, &cu, NULL, NULL, &cudie, NULL); + if (res < 0) + error("dwarf_get_units failed: no debugging information?"); + if (res == 1) + break; /* No more units */ + + process_cu(&cudie); + } while (cu); + + /* + * Use die_map to expand type strings, write them to `symfile`, and + * calculate symbol versions. + */ + generate_symtypes_and_versions(symfile); + die_map_free(); + + return DWARF_CB_OK; +} + +static const Dwfl_Callbacks callbacks = { + .section_address = dwfl_offline_section_address, + .find_debuginfo = dwfl_standard_find_debuginfo, +}; + +int main(int argc, char **argv) +{ + FILE *symfile = NULL; + unsigned int n; + int opt; + + static const struct option opts[] = { + { "debug", 0, NULL, 'd' }, + { "dump-dies", 0, &dump_dies, 1 }, + { "dump-die-map", 0, &dump_die_map, 1 }, + { "dump-types", 0, &dump_types, 1 }, + { "dump-versions", 0, &dump_versions, 1 }, + { "stable", 0, NULL, 's' }, + { "symtypes", 1, NULL, 'T' }, + { "help", 0, NULL, 'h' }, + { 0, 0, NULL, 0 } + }; + + while ((opt = getopt_long(argc, argv, "dsT:h", opts, NULL)) != EOF) { + switch (opt) { + case 0: + break; + case 'd': + debug = 1; + break; + case 's': + stable = 1; + break; + case 'T': + symtypes = 1; + symtypes_file = optarg; + break; + case 'h': + usage(); + return 0; + default: + usage(); + return 1; + } + } + + if (dump_die_map) + dump_dies = 1; + + if (optind >= argc) { + usage(); + error("no input files?"); + } + + symbol_read_exports(stdin); + + if (symtypes_file) { + symfile = fopen(symtypes_file, "w"); + if (!symfile) + error("fopen failed for '%s': %s", symtypes_file, + strerror(errno)); + } + + for (n = optind; n < argc; n++) { + Dwfl *dwfl; + int fd; + + fd = open(argv[n], O_RDONLY); + if (fd == -1) + error("open failed for '%s': %s", argv[n], + strerror(errno)); + + symbol_read_symtab(fd); + kabi_read_rules(fd); + + dwfl = dwfl_begin(&callbacks); + if (!dwfl) + error("dwfl_begin failed for '%s': %s", argv[n], + dwarf_errmsg(-1)); + + if (!dwfl_report_offline(dwfl, argv[n], argv[n], fd)) + error("dwfl_report_offline failed for '%s': %s", + argv[n], dwarf_errmsg(-1)); + + dwfl_report_end(dwfl, NULL, NULL); + + if (dwfl_getmodules(dwfl, &process_module, symfile, 0)) + error("dwfl_getmodules failed for '%s'", argv[n]); + + dwfl_end(dwfl); + kabi_free(); + } + + if (symfile) + check(fclose(symfile)); + + symbol_print_versions(); + symbol_free(); + + return 0; +} diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h new file mode 100644 index 0000000000000..197a1a8123c6c --- /dev/null +++ b/scripts/gendwarfksyms/gendwarfksyms.h @@ -0,0 +1,296 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Google LLC + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifndef __GENDWARFKSYMS_H +#define __GENDWARFKSYMS_H + +/* + * Options -- in gendwarfksyms.c + */ +extern int debug; +extern int dump_dies; +extern int dump_die_map; +extern int dump_types; +extern int dump_versions; +extern int stable; +extern int symtypes; + +/* + * Output helpers + */ +#define __PREFIX "gendwarfksyms: " +#define __println(prefix, format, ...) \ + fprintf(stderr, prefix __PREFIX "%s: " format "\n", __func__, \ + ##__VA_ARGS__) + +#define debug(format, ...) \ + do { \ + if (debug) \ + __println("", format, ##__VA_ARGS__); \ + } while (0) + +#define warn(format, ...) __println("warning: ", format, ##__VA_ARGS__) +#define error(format, ...) \ + do { \ + __println("error: ", format, ##__VA_ARGS__); \ + exit(1); \ + } while (0) + +#define __die_debug(color, format, ...) \ + do { \ + if (dump_dies && dump_die_map) \ + fprintf(stderr, \ + "\033[" #color "m<" format ">\033[39m", \ + __VA_ARGS__); \ + } while (0) + +#define die_debug_r(format, ...) __die_debug(91, format, __VA_ARGS__) +#define die_debug_g(format, ...) __die_debug(92, format, __VA_ARGS__) +#define die_debug_b(format, ...) __die_debug(94, format, __VA_ARGS__) + +/* + * Error handling helpers + */ +#define __check(expr, test) \ + ({ \ + int __res = expr; \ + if (test) \ + error("`%s` failed: %d", #expr, __res); \ + __res; \ + }) + +/* Error == non-zero values */ +#define check(expr) __check(expr, __res) +/* Error == negative values */ +#define checkp(expr) __check(expr, __res < 0) + +/* Consistent aliases (DW_TAG__type) for DWARF tags */ +#define DW_TAG_enumerator_type DW_TAG_enumerator +#define DW_TAG_formal_parameter_type DW_TAG_formal_parameter +#define DW_TAG_member_type DW_TAG_member +#define DW_TAG_template_type_parameter_type DW_TAG_template_type_parameter +#define DW_TAG_typedef_type DW_TAG_typedef +#define DW_TAG_variant_part_type DW_TAG_variant_part +#define DW_TAG_variant_type DW_TAG_variant + +/* + * symbols.c + */ + +/* See symbols.c:is_symbol_ptr */ +#define SYMBOL_PTR_PREFIX "__gendwarfksyms_ptr_" +#define SYMBOL_PTR_PREFIX_LEN (sizeof(SYMBOL_PTR_PREFIX) - 1) + +static inline unsigned int addr_hash(uintptr_t addr) +{ + return hash_ptr((const void *)addr); +} + +enum symbol_state { + SYMBOL_UNPROCESSED, + SYMBOL_MAPPED, + SYMBOL_PROCESSED +}; + +struct symbol_addr { + uint32_t section; + Elf64_Addr address; +}; + +struct symbol { + const char *name; + struct symbol_addr addr; + struct hlist_node addr_hash; + struct hlist_node name_hash; + enum symbol_state state; + uintptr_t die_addr; + uintptr_t ptr_die_addr; + unsigned long crc; +}; + +typedef void (*symbol_callback_t)(struct symbol *, void *arg); + +bool is_symbol_ptr(const char *name); +void symbol_read_exports(FILE *file); +void symbol_read_symtab(int fd); +struct symbol *symbol_get(const char *name); +void symbol_set_ptr(struct symbol *sym, Dwarf_Die *ptr); +void symbol_set_die(struct symbol *sym, Dwarf_Die *die); +void symbol_set_crc(struct symbol *sym, unsigned long crc); +void symbol_for_each(symbol_callback_t func, void *arg); +void symbol_print_versions(void); +void symbol_free(void); + +/* + * die.c + */ + +enum die_state { + DIE_INCOMPLETE, + DIE_UNEXPANDED, + DIE_COMPLETE, + DIE_SYMBOL, + DIE_LAST = DIE_SYMBOL +}; + +enum die_fragment_type { + FRAGMENT_EMPTY, + FRAGMENT_STRING, + FRAGMENT_LINEBREAK, + FRAGMENT_DIE +}; + +struct die_fragment { + enum die_fragment_type type; + union { + char *str; + int linebreak; + uintptr_t addr; + } data; + struct list_head list; +}; + +#define CASE_CONST_TO_STR(name) \ + case name: \ + return #name; + +static inline const char *die_state_name(enum die_state state) +{ + switch (state) { + CASE_CONST_TO_STR(DIE_INCOMPLETE) + CASE_CONST_TO_STR(DIE_UNEXPANDED) + CASE_CONST_TO_STR(DIE_COMPLETE) + CASE_CONST_TO_STR(DIE_SYMBOL) + } + + error("unexpected die_state: %d", state); +} + +struct die { + enum die_state state; + bool mapped; + char *fqn; + int tag; + uintptr_t addr; + struct list_head fragments; + struct hlist_node hash; +}; + +typedef void (*die_map_callback_t)(struct die *, void *arg); + +int __die_map_get(uintptr_t addr, enum die_state state, struct die **res); +struct die *die_map_get(Dwarf_Die *die, enum die_state state); +void die_map_add_string(struct die *pd, const char *str); +void die_map_add_linebreak(struct die *pd, int linebreak); +void die_map_for_each(die_map_callback_t func, void *arg); +void die_map_add_die(struct die *pd, struct die *child); +void die_map_free(void); + +/* + * cache.c + */ + +#define CACHE_HASH_BITS 10 + +/* A cache for addresses we've already seen. */ +struct cache { + HASHTABLE_DECLARE(cache, 1 << CACHE_HASH_BITS); +}; + +void cache_set(struct cache *cache, unsigned long key, int value); +int cache_get(struct cache *cache, unsigned long key); +void cache_init(struct cache *cache); +void cache_free(struct cache *cache); + +static inline void __cache_mark_expanded(struct cache *cache, uintptr_t addr) +{ + cache_set(cache, addr, 1); +} + +static inline bool __cache_was_expanded(struct cache *cache, uintptr_t addr) +{ + return cache_get(cache, addr) == 1; +} + +static inline void cache_mark_expanded(struct cache *cache, void *addr) +{ + __cache_mark_expanded(cache, (uintptr_t)addr); +} + +static inline bool cache_was_expanded(struct cache *cache, void *addr) +{ + return __cache_was_expanded(cache, (uintptr_t)addr); +} + +/* + * dwarf.c + */ + +struct expansion_state { + bool expand; + const char *current_fqn; +}; + +struct kabi_state { + int members; + Dwarf_Die placeholder; + const char *orig_name; +}; + +struct state { + struct symbol *sym; + Dwarf_Die die; + + /* List expansion */ + bool first_list_item; + + /* Structure expansion */ + struct expansion_state expand; + struct cache expansion_cache; + + /* Reserved or ignored members */ + struct kabi_state kabi; +}; + +typedef int (*die_callback_t)(struct state *state, struct die *cache, + Dwarf_Die *die); +typedef bool (*die_match_callback_t)(Dwarf_Die *die); +bool match_all(Dwarf_Die *die); + +int process_die_container(struct state *state, struct die *cache, + Dwarf_Die *die, die_callback_t func, + die_match_callback_t match); + +void process_cu(Dwarf_Die *cudie); + +/* + * types.c + */ + +void generate_symtypes_and_versions(FILE *file); + +/* + * kabi.c + */ + +bool kabi_is_enumerator_ignored(const char *fqn, const char *field); +bool kabi_get_enumerator_value(const char *fqn, const char *field, + unsigned long *value); +bool kabi_is_declonly(const char *fqn); + +void kabi_read_rules(int fd); +void kabi_free(void); + +#endif /* __GENDWARFKSYMS_H */ diff --git a/scripts/gendwarfksyms/kabi.c b/scripts/gendwarfksyms/kabi.c new file mode 100644 index 0000000000000..66f01fcd16079 --- /dev/null +++ b/scripts/gendwarfksyms/kabi.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#define _GNU_SOURCE +#include +#include + +#include "gendwarfksyms.h" + +#define KABI_RULE_SECTION ".discard.gendwarfksyms.kabi_rules" +#define KABI_RULE_VERSION "1" + +/* + * The rule section consists of four null-terminated strings per + * entry: + * + * 1. version + * Entry format version. Must match KABI_RULE_VERSION. + * + * 2. type + * Type of the kABI rule. Must be one of the tags defined below. + * + * 3. target + * Rule-dependent target, typically the fully qualified name of + * the target DIE. + * + * 4. value + * Rule-dependent value. + */ +#define KABI_RULE_MIN_ENTRY_SIZE \ + (/* version\0 */ 2 + /* type\0 */ 2 + /* target\0" */ 1 + \ + /* value\0 */ 1) +#define KABI_RULE_EMPTY_VALUE "" + +/* + * Rule: declonly + * - For the struct/enum/union in the target field, treat it as a + * declaration only even if a definition is available. + */ +#define KABI_RULE_TAG_DECLONLY "declonly" + +/* + * Rule: enumerator_ignore + * - For the enum_field in the target field, ignore the enumerator. + */ +#define KABI_RULE_TAG_ENUMERATOR_IGNORE "enumerator_ignore" + +/* + * Rule: enumerator_value + * - For the fqn_field in the target field, set the value to the + * unsigned integer in the value field. + */ +#define KABI_RULE_TAG_ENUMERATOR_VALUE "enumerator_value" + +enum kabi_rule_type { + KABI_RULE_TYPE_UNKNOWN, + KABI_RULE_TYPE_DECLONLY, + KABI_RULE_TYPE_ENUMERATOR_IGNORE, + KABI_RULE_TYPE_ENUMERATOR_VALUE, +}; + +#define RULE_HASH_BITS 7 + +struct rule { + enum kabi_rule_type type; + const char *target; + const char *value; + struct hlist_node hash; +}; + +/* { type, target } -> struct rule */ +static HASHTABLE_DEFINE(rules, 1 << RULE_HASH_BITS); + +static inline unsigned int rule_values_hash(enum kabi_rule_type type, + const char *target) +{ + return hash_32(type) ^ hash_str(target); +} + +static inline unsigned int rule_hash(const struct rule *rule) +{ + return rule_values_hash(rule->type, rule->target); +} + +static inline const char *get_rule_field(const char **pos, ssize_t *left) +{ + const char *start = *pos; + size_t len; + + if (*left <= 0) + error("unexpected end of kABI rules"); + + len = strnlen(start, *left) + 1; + *pos += len; + *left -= len; + + return start; +} + +void kabi_read_rules(int fd) +{ + GElf_Shdr shdr_mem; + GElf_Shdr *shdr; + Elf_Data *rule_data = NULL; + Elf_Scn *scn; + Elf *elf; + size_t shstrndx; + const char *rule_str; + ssize_t left; + int i; + + const struct { + enum kabi_rule_type type; + const char *tag; + } rule_types[] = { + { + .type = KABI_RULE_TYPE_DECLONLY, + .tag = KABI_RULE_TAG_DECLONLY, + }, + { + .type = KABI_RULE_TYPE_ENUMERATOR_IGNORE, + .tag = KABI_RULE_TAG_ENUMERATOR_IGNORE, + }, + { + .type = KABI_RULE_TYPE_ENUMERATOR_VALUE, + .tag = KABI_RULE_TAG_ENUMERATOR_VALUE, + }, + }; + + if (!stable) + return; + + if (elf_version(EV_CURRENT) != EV_CURRENT) + error("elf_version failed: %s", elf_errmsg(-1)); + + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) + error("elf_begin failed: %s", elf_errmsg(-1)); + + if (elf_getshdrstrndx(elf, &shstrndx) < 0) + error("elf_getshdrstrndx failed: %s", elf_errmsg(-1)); + + scn = elf_nextscn(elf, NULL); + + while (scn) { + const char *sname; + + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + error("gelf_getshdr failed: %s", elf_errmsg(-1)); + + sname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!sname) + error("elf_strptr failed: %s", elf_errmsg(-1)); + + if (!strcmp(sname, KABI_RULE_SECTION)) { + rule_data = elf_getdata(scn, NULL); + if (!rule_data) + error("elf_getdata failed: %s", elf_errmsg(-1)); + break; + } + + scn = elf_nextscn(elf, scn); + } + + if (!rule_data) { + debug("kABI rules not found"); + check(elf_end(elf)); + return; + } + + rule_str = rule_data->d_buf; + left = shdr->sh_size; + + if (left < KABI_RULE_MIN_ENTRY_SIZE) + error("kABI rule section too small: %zd bytes", left); + + if (rule_str[left - 1] != '\0') + error("kABI rules are not null-terminated"); + + while (left > KABI_RULE_MIN_ENTRY_SIZE) { + enum kabi_rule_type type = KABI_RULE_TYPE_UNKNOWN; + const char *field; + struct rule *rule; + + /* version */ + field = get_rule_field(&rule_str, &left); + + if (strcmp(field, KABI_RULE_VERSION)) + error("unsupported kABI rule version: '%s'", field); + + /* type */ + field = get_rule_field(&rule_str, &left); + + for (i = 0; i < ARRAY_SIZE(rule_types); i++) { + if (!strcmp(field, rule_types[i].tag)) { + type = rule_types[i].type; + break; + } + } + + if (type == KABI_RULE_TYPE_UNKNOWN) + error("unsupported kABI rule type: '%s'", field); + + rule = xmalloc(sizeof(struct rule)); + + rule->type = type; + rule->target = xstrdup(get_rule_field(&rule_str, &left)); + rule->value = xstrdup(get_rule_field(&rule_str, &left)); + + hash_add(rules, &rule->hash, rule_hash(rule)); + + debug("kABI rule: type: '%s', target: '%s', value: '%s'", field, + rule->target, rule->value); + } + + if (left > 0) + warn("unexpected data at the end of the kABI rules section"); + + check(elf_end(elf)); +} + +bool kabi_is_declonly(const char *fqn) +{ + struct rule *rule; + + if (!stable) + return false; + if (!fqn || !*fqn) + return false; + + hash_for_each_possible(rules, rule, hash, + rule_values_hash(KABI_RULE_TYPE_DECLONLY, fqn)) { + if (rule->type == KABI_RULE_TYPE_DECLONLY && + !strcmp(fqn, rule->target)) + return true; + } + + return false; +} + +static char *get_enumerator_target(const char *fqn, const char *field) +{ + char *target = NULL; + + if (asprintf(&target, "%s %s", fqn, field) < 0) + error("asprintf failed for '%s %s'", fqn, field); + + return target; +} + +static unsigned long get_ulong_value(const char *value) +{ + unsigned long result = 0; + char *endptr = NULL; + + errno = 0; + result = strtoul(value, &endptr, 10); + + if (errno || *endptr) + error("invalid unsigned value '%s'", value); + + return result; +} + +bool kabi_is_enumerator_ignored(const char *fqn, const char *field) +{ + bool match = false; + struct rule *rule; + char *target; + + if (!stable) + return false; + if (!fqn || !*fqn || !field || !*field) + return false; + + target = get_enumerator_target(fqn, field); + + hash_for_each_possible( + rules, rule, hash, + rule_values_hash(KABI_RULE_TYPE_ENUMERATOR_IGNORE, target)) { + if (rule->type == KABI_RULE_TYPE_ENUMERATOR_IGNORE && + !strcmp(target, rule->target)) { + match = true; + break; + } + } + + free(target); + return match; +} + +bool kabi_get_enumerator_value(const char *fqn, const char *field, + unsigned long *value) +{ + bool match = false; + struct rule *rule; + char *target; + + if (!stable) + return false; + if (!fqn || !*fqn || !field || !*field) + return false; + + target = get_enumerator_target(fqn, field); + + hash_for_each_possible(rules, rule, hash, + rule_values_hash(KABI_RULE_TYPE_ENUMERATOR_VALUE, + target)) { + if (rule->type == KABI_RULE_TYPE_ENUMERATOR_VALUE && + !strcmp(target, rule->target)) { + *value = get_ulong_value(rule->value); + match = true; + break; + } + } + + free(target); + return match; +} + +void kabi_free(void) +{ + struct hlist_node *tmp; + struct rule *rule; + + hash_for_each_safe(rules, rule, tmp, hash) { + free((void *)rule->target); + free((void *)rule->value); + free(rule); + } + + hash_init(rules); +} diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c new file mode 100644 index 0000000000000..327f87389c343 --- /dev/null +++ b/scripts/gendwarfksyms/symbols.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include "gendwarfksyms.h" + +#define SYMBOL_HASH_BITS 12 + +/* struct symbol_addr -> struct symbol */ +static HASHTABLE_DEFINE(symbol_addrs, 1 << SYMBOL_HASH_BITS); +/* name -> struct symbol */ +static HASHTABLE_DEFINE(symbol_names, 1 << SYMBOL_HASH_BITS); + +static inline unsigned int symbol_addr_hash(const struct symbol_addr *addr) +{ + return hash_32(addr->section ^ addr_hash(addr->address)); +} + +static unsigned int __for_each_addr(struct symbol *sym, symbol_callback_t func, + void *data) +{ + struct hlist_node *tmp; + struct symbol *match = NULL; + unsigned int processed = 0; + + hash_for_each_possible_safe(symbol_addrs, match, tmp, addr_hash, + symbol_addr_hash(&sym->addr)) { + if (match == sym) + continue; /* Already processed */ + + if (match->addr.section == sym->addr.section && + match->addr.address == sym->addr.address) { + func(match, data); + ++processed; + } + } + + return processed; +} + +/* + * For symbols without debugging information (e.g. symbols defined in other + * TUs), we also match __gendwarfksyms_ptr_ symbols, which the + * kernel uses to ensure type information is present in the TU that exports + * the symbol. A __gendwarfksyms_ptr pointer must have the same type as the + * exported symbol, e.g.: + * + * typeof(symname) *__gendwarf_ptr_symname = &symname; + */ +bool is_symbol_ptr(const char *name) +{ + return name && !strncmp(name, SYMBOL_PTR_PREFIX, SYMBOL_PTR_PREFIX_LEN); +} + +static unsigned int for_each(const char *name, symbol_callback_t func, + void *data) +{ + struct hlist_node *tmp; + struct symbol *match; + + if (!name || !*name) + return 0; + if (is_symbol_ptr(name)) + name += SYMBOL_PTR_PREFIX_LEN; + + hash_for_each_possible_safe(symbol_names, match, tmp, name_hash, + hash_str(name)) { + if (strcmp(match->name, name)) + continue; + + /* Call func for the match, and all address matches */ + if (func) + func(match, data); + + if (match->addr.section != SHN_UNDEF) + return __for_each_addr(match, func, data) + 1; + + return 1; + } + + return 0; +} + +static void set_crc(struct symbol *sym, void *data) +{ + unsigned long *crc = data; + + if (sym->state == SYMBOL_PROCESSED && sym->crc != *crc) + warn("overriding version for symbol %s (crc %lx vs. %lx)", + sym->name, sym->crc, *crc); + + sym->state = SYMBOL_PROCESSED; + sym->crc = *crc; +} + +void symbol_set_crc(struct symbol *sym, unsigned long crc) +{ + if (for_each(sym->name, set_crc, &crc) == 0) + error("no matching symbols: '%s'", sym->name); +} + +static void set_ptr(struct symbol *sym, void *data) +{ + sym->ptr_die_addr = (uintptr_t)((Dwarf_Die *)data)->addr; +} + +void symbol_set_ptr(struct symbol *sym, Dwarf_Die *ptr) +{ + if (for_each(sym->name, set_ptr, ptr) == 0) + error("no matching symbols: '%s'", sym->name); +} + +static void set_die(struct symbol *sym, void *data) +{ + sym->die_addr = (uintptr_t)((Dwarf_Die *)data)->addr; + sym->state = SYMBOL_MAPPED; +} + +void symbol_set_die(struct symbol *sym, Dwarf_Die *die) +{ + if (for_each(sym->name, set_die, die) == 0) + error("no matching symbols: '%s'", sym->name); +} + +static bool is_exported(const char *name) +{ + return for_each(name, NULL, NULL) > 0; +} + +void symbol_read_exports(FILE *file) +{ + struct symbol *sym; + char *line = NULL; + char *name = NULL; + size_t size = 0; + int nsym = 0; + + while (getline(&line, &size, file) > 0) { + if (sscanf(line, "%ms\n", &name) != 1) + error("malformed input line: %s", line); + + if (is_exported(name)) { + /* Ignore duplicates */ + free(name); + continue; + } + + sym = xcalloc(1, sizeof(struct symbol)); + sym->name = name; + sym->addr.section = SHN_UNDEF; + sym->state = SYMBOL_UNPROCESSED; + + hash_add(symbol_names, &sym->name_hash, hash_str(sym->name)); + ++nsym; + + debug("%s", sym->name); + } + + free(line); + debug("%d exported symbols", nsym); +} + +static void get_symbol(struct symbol *sym, void *arg) +{ + struct symbol **res = arg; + + if (sym->state == SYMBOL_UNPROCESSED) + *res = sym; +} + +struct symbol *symbol_get(const char *name) +{ + struct symbol *sym = NULL; + + for_each(name, get_symbol, &sym); + return sym; +} + +void symbol_for_each(symbol_callback_t func, void *arg) +{ + struct hlist_node *tmp; + struct symbol *sym; + + hash_for_each_safe(symbol_names, sym, tmp, name_hash) { + func(sym, arg); + } +} + +typedef void (*elf_symbol_callback_t)(const char *name, GElf_Sym *sym, + Elf32_Word xndx, void *arg); + +static void elf_for_each_global(int fd, elf_symbol_callback_t func, void *arg) +{ + size_t sym_size; + GElf_Shdr shdr_mem; + GElf_Shdr *shdr; + Elf_Data *xndx_data = NULL; + Elf_Scn *scn; + Elf *elf; + + if (elf_version(EV_CURRENT) != EV_CURRENT) + error("elf_version failed: %s", elf_errmsg(-1)); + + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) + error("elf_begin failed: %s", elf_errmsg(-1)); + + scn = elf_nextscn(elf, NULL); + + while (scn) { + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + error("gelf_getshdr failed: %s", elf_errmsg(-1)); + + if (shdr->sh_type == SHT_SYMTAB_SHNDX) { + xndx_data = elf_getdata(scn, NULL); + if (!xndx_data) + error("elf_getdata failed: %s", elf_errmsg(-1)); + break; + } + + scn = elf_nextscn(elf, scn); + } + + sym_size = gelf_fsize(elf, ELF_T_SYM, 1, EV_CURRENT); + scn = elf_nextscn(elf, NULL); + + while (scn) { + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + error("gelf_getshdr failed: %s", elf_errmsg(-1)); + + if (shdr->sh_type == SHT_SYMTAB) { + unsigned int nsyms; + unsigned int n; + Elf_Data *data = elf_getdata(scn, NULL); + + if (!data) + error("elf_getdata failed: %s", elf_errmsg(-1)); + + if (shdr->sh_entsize != sym_size) + error("expected sh_entsize (%lu) to be %zu", + shdr->sh_entsize, sym_size); + + nsyms = shdr->sh_size / shdr->sh_entsize; + + for (n = 1; n < nsyms; ++n) { + const char *name = NULL; + Elf32_Word xndx = 0; + GElf_Sym sym_mem; + GElf_Sym *sym; + + sym = gelf_getsymshndx(data, xndx_data, n, + &sym_mem, &xndx); + if (!sym) + error("gelf_getsymshndx failed: %s", + elf_errmsg(-1)); + + if (GELF_ST_BIND(sym->st_info) == STB_LOCAL) + continue; + + if (sym->st_shndx != SHN_XINDEX) + xndx = sym->st_shndx; + + name = elf_strptr(elf, shdr->sh_link, + sym->st_name); + if (!name) + error("elf_strptr failed: %s", + elf_errmsg(-1)); + + /* Skip empty symbol names */ + if (*name) + func(name, sym, xndx, arg); + } + } + + scn = elf_nextscn(elf, scn); + } + + check(elf_end(elf)); +} + +static void set_symbol_addr(struct symbol *sym, void *arg) +{ + struct symbol_addr *addr = arg; + + if (sym->addr.section == SHN_UNDEF) { + sym->addr = *addr; + hash_add(symbol_addrs, &sym->addr_hash, + symbol_addr_hash(&sym->addr)); + + debug("%s -> { %u, %lx }", sym->name, sym->addr.section, + sym->addr.address); + } else if (sym->addr.section != addr->section || + sym->addr.address != addr->address) { + warn("multiple addresses for symbol %s?", sym->name); + } +} + +static void elf_set_symbol_addr(const char *name, GElf_Sym *sym, + Elf32_Word xndx, void *arg) +{ + struct symbol_addr addr = { .section = xndx, .address = sym->st_value }; + + /* Set addresses for exported symbols */ + if (addr.section != SHN_UNDEF) + for_each(name, set_symbol_addr, &addr); +} + +void symbol_read_symtab(int fd) +{ + elf_for_each_global(fd, elf_set_symbol_addr, NULL); +} + +void symbol_print_versions(void) +{ + struct hlist_node *tmp; + struct symbol *sym; + + hash_for_each_safe(symbol_names, sym, tmp, name_hash) { + if (sym->state != SYMBOL_PROCESSED) + warn("no information for symbol %s", sym->name); + + printf("#SYMVER %s 0x%08lx\n", sym->name, sym->crc); + } +} + +void symbol_free(void) +{ + struct hlist_node *tmp; + struct symbol *sym; + + hash_for_each_safe(symbol_names, sym, tmp, name_hash) { + free((void *)sym->name); + free(sym); + } + + hash_init(symbol_addrs); + hash_init(symbol_names); +} diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c new file mode 100644 index 0000000000000..6c03265f4d107 --- /dev/null +++ b/scripts/gendwarfksyms/types.c @@ -0,0 +1,481 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#define _GNU_SOURCE +#include +#include +#include + +#include "gendwarfksyms.h" + +static struct cache expansion_cache; + +/* + * A simple linked list of shared or owned strings to avoid copying strings + * around when not necessary. + */ +struct type_list_entry { + const char *str; + void *owned; + struct list_head list; +}; + +static void type_list_free(struct list_head *list) +{ + struct type_list_entry *entry; + struct type_list_entry *tmp; + + list_for_each_entry_safe(entry, tmp, list, list) { + if (entry->owned) + free(entry->owned); + free(entry); + } + + INIT_LIST_HEAD(list); +} + +static int type_list_append(struct list_head *list, const char *s, void *owned) +{ + struct type_list_entry *entry; + + if (!s) + return 0; + + entry = xmalloc(sizeof(struct type_list_entry)); + entry->str = s; + entry->owned = owned; + list_add_tail(&entry->list, list); + + return strlen(entry->str); +} + +static void type_list_write(struct list_head *list, FILE *file) +{ + struct type_list_entry *entry; + + list_for_each_entry(entry, list, list) { + if (entry->str) + checkp(fputs(entry->str, file)); + } +} + +/* + * An expanded type string in symtypes format. + */ +struct type_expansion { + char *name; + size_t len; + struct list_head expanded; + struct hlist_node hash; +}; + +static void type_expansion_init(struct type_expansion *type) +{ + type->name = NULL; + type->len = 0; + INIT_LIST_HEAD(&type->expanded); +} + +static inline void type_expansion_free(struct type_expansion *type) +{ + free(type->name); + type->name = NULL; + type->len = 0; + type_list_free(&type->expanded); +} + +static void type_expansion_append(struct type_expansion *type, const char *s, + void *owned) +{ + type->len += type_list_append(&type->expanded, s, owned); +} + +/* + * type_map -- the longest expansions for each type. + * + * const char *name -> struct type_expansion * + */ +#define TYPE_HASH_BITS 12 +static HASHTABLE_DEFINE(type_map, 1 << TYPE_HASH_BITS); + +static int type_map_get(const char *name, struct type_expansion **res) +{ + struct type_expansion *e; + + hash_for_each_possible(type_map, e, hash, hash_str(name)) { + if (!strcmp(name, e->name)) { + *res = e; + return 0; + } + } + + return -1; +} + +static void type_map_add(const char *name, struct type_expansion *type) +{ + struct type_expansion *e; + + if (type_map_get(name, &e)) { + e = xmalloc(sizeof(struct type_expansion)); + type_expansion_init(e); + e->name = xstrdup(name); + + hash_add(type_map, &e->hash, hash_str(e->name)); + + if (dump_types) + debug("adding %s", e->name); + } else { + /* Use the longest available expansion */ + if (type->len <= e->len) + return; + + type_list_free(&e->expanded); + + if (dump_types) + debug("replacing %s", e->name); + } + + /* Take ownership of type->expanded */ + list_replace_init(&type->expanded, &e->expanded); + e->len = type->len; + + if (dump_types) { + checkp(fputs(e->name, stderr)); + checkp(fputs(" ", stderr)); + type_list_write(&e->expanded, stderr); + checkp(fputs("\n", stderr)); + } +} + +static void type_map_write(FILE *file) +{ + struct type_expansion *e; + struct hlist_node *tmp; + + if (!file) + return; + + hash_for_each_safe(type_map, e, tmp, hash) { + checkp(fputs(e->name, file)); + checkp(fputs(" ", file)); + type_list_write(&e->expanded, file); + checkp(fputs("\n", file)); + } +} + +static void type_map_free(void) +{ + struct type_expansion *e; + struct hlist_node *tmp; + + hash_for_each_safe(type_map, e, tmp, hash) { + type_expansion_free(e); + free(e); + } + + hash_init(type_map); +} + +/* + * CRC for a type, with an optional fully expanded type string for + * debugging. + */ +struct version { + struct type_expansion type; + unsigned long crc; +}; + +static void version_init(struct version *version) +{ + version->crc = crc32(0, NULL, 0); + type_expansion_init(&version->type); +} + +static void version_free(struct version *version) +{ + type_expansion_free(&version->type); +} + +static void version_add(struct version *version, const char *s) +{ + version->crc = crc32(version->crc, (void *)s, strlen(s)); + if (dump_versions) + type_expansion_append(&version->type, s, NULL); +} + +/* + * Type reference format: #, where prefix: + * s -> structure + * u -> union + * e -> enum + * t -> typedef + * + * Names with spaces are additionally wrapped in single quotes. + */ +static inline bool is_type_prefix(const char *s) +{ + return (s[0] == 's' || s[0] == 'u' || s[0] == 'e' || s[0] == 't') && + s[1] == '#'; +} + +static char get_type_prefix(int tag) +{ + switch (tag) { + case DW_TAG_class_type: + case DW_TAG_structure_type: + return 's'; + case DW_TAG_union_type: + return 'u'; + case DW_TAG_enumeration_type: + return 'e'; + case DW_TAG_typedef_type: + return 't'; + default: + return 0; + } +} + +static char *get_type_name(struct die *cache) +{ + const char *quote; + char prefix; + char *name; + + if (cache->state == DIE_INCOMPLETE) { + warn("found incomplete cache entry: %p", cache); + return NULL; + } + if (cache->state == DIE_SYMBOL) + return NULL; + if (!cache->fqn || !*cache->fqn) + return NULL; + + prefix = get_type_prefix(cache->tag); + if (!prefix) + return NULL; + + /* Wrap names with spaces in single quotes */ + quote = strstr(cache->fqn, " ") ? "'" : ""; + + /* #\0 */ + if (asprintf(&name, "%c#%s%s%s", prefix, quote, cache->fqn, quote) < 0) + error("asprintf failed for '%s'", cache->fqn); + + return name; +} + +static void __calculate_version(struct version *version, struct list_head *list) +{ + struct type_list_entry *entry; + struct type_expansion *e; + + /* Calculate a CRC over an expanded type string */ + list_for_each_entry(entry, list, list) { + if (is_type_prefix(entry->str)) { + check(type_map_get(entry->str, &e)); + + /* + * It's sufficient to expand each type reference just + * once to detect changes. + */ + if (cache_was_expanded(&expansion_cache, e)) { + version_add(version, entry->str); + } else { + cache_mark_expanded(&expansion_cache, e); + __calculate_version(version, &e->expanded); + } + } else { + version_add(version, entry->str); + } + } +} + +static void calculate_version(struct version *version, struct list_head *list) +{ + version_init(version); + __calculate_version(version, list); + cache_free(&expansion_cache); +} + +static void __type_expand(struct die *cache, struct type_expansion *type, + bool recursive); + +static void type_expand_child(struct die *cache, struct type_expansion *type, + bool recursive) +{ + struct type_expansion child; + char *name; + + name = get_type_name(cache); + if (!name) { + __type_expand(cache, type, recursive); + return; + } + + if (recursive && !__cache_was_expanded(&expansion_cache, cache->addr)) { + __cache_mark_expanded(&expansion_cache, cache->addr); + type_expansion_init(&child); + __type_expand(cache, &child, true); + type_map_add(name, &child); + type_expansion_free(&child); + } + + type_expansion_append(type, name, name); +} + +static void __type_expand(struct die *cache, struct type_expansion *type, + bool recursive) +{ + struct die_fragment *df; + struct die *child; + + list_for_each_entry(df, &cache->fragments, list) { + switch (df->type) { + case FRAGMENT_STRING: + type_expansion_append(type, df->data.str, NULL); + break; + case FRAGMENT_DIE: + /* Use a complete die_map expansion if available */ + if (__die_map_get(df->data.addr, DIE_COMPLETE, + &child) && + __die_map_get(df->data.addr, DIE_UNEXPANDED, + &child)) + error("unknown child: %" PRIxPTR, + df->data.addr); + + type_expand_child(child, type, recursive); + break; + case FRAGMENT_LINEBREAK: + /* + * Keep whitespace in the symtypes format, but avoid + * repeated spaces. + */ + if (list_is_last(&df->list, &cache->fragments) || + list_next_entry(df, list)->type != + FRAGMENT_LINEBREAK) + type_expansion_append(type, " ", NULL); + break; + default: + error("empty die_fragment in %p", cache); + } + } +} + +static void type_expand(struct die *cache, struct type_expansion *type, + bool recursive) +{ + type_expansion_init(type); + __type_expand(cache, type, recursive); + cache_free(&expansion_cache); +} + +static void expand_type(struct die *cache, void *arg) +{ + struct type_expansion type; + char *name; + + if (cache->mapped) + return; + + cache->mapped = true; + + /* + * Skip unexpanded die_map entries if there's a complete + * expansion available for this DIE. + */ + if (cache->state == DIE_UNEXPANDED && + !__die_map_get(cache->addr, DIE_COMPLETE, &cache)) { + if (cache->mapped) + return; + + cache->mapped = true; + } + + name = get_type_name(cache); + if (!name) + return; + + debug("%s", name); + type_expand(cache, &type, true); + type_map_add(name, &type); + + type_expansion_free(&type); + free(name); +} + +static void expand_symbol(struct symbol *sym, void *arg) +{ + struct type_expansion type; + struct version version; + struct die *cache; + + /* + * No need to expand again unless we want a symtypes file entry + * for the symbol. Note that this means `sym` has the same address + * as another symbol that was already processed. + */ + if (!symtypes && sym->state == SYMBOL_PROCESSED) + return; + + if (__die_map_get(sym->die_addr, DIE_SYMBOL, &cache)) + return; /* We'll warn about missing CRCs later. */ + + type_expand(cache, &type, false); + + /* If the symbol already has a version, don't calculate it again. */ + if (sym->state != SYMBOL_PROCESSED) { + calculate_version(&version, &type.expanded); + symbol_set_crc(sym, version.crc); + debug("%s = %lx", sym->name, version.crc); + + if (dump_versions) { + checkp(fputs(sym->name, stderr)); + checkp(fputs(" ", stderr)); + type_list_write(&version.type.expanded, stderr); + checkp(fputs("\n", stderr)); + } + + version_free(&version); + } + + /* These aren't needed in type_map unless we want a symtypes file. */ + if (symtypes) + type_map_add(sym->name, &type); + + type_expansion_free(&type); +} + +void generate_symtypes_and_versions(FILE *file) +{ + cache_init(&expansion_cache); + + /* + * die_map processing: + * + * 1. die_map contains all types referenced in exported symbol + * signatures, but can contain duplicates just like the original + * DWARF, and some references may not be fully expanded depending + * on how far we processed the DIE tree for that specific symbol. + * + * For each die_map entry, find the longest available expansion, + * and add it to type_map. + */ + die_map_for_each(expand_type, NULL); + + /* + * 2. For each exported symbol, expand the die_map type, and use + * type_map expansions to calculate a symbol version from the + * fully expanded type string. + */ + symbol_for_each(expand_symbol, NULL); + + /* + * 3. If a symtypes file is requested, write type_map contents to + * the file. + */ + type_map_write(file); + type_map_free(); +} diff --git a/scripts/genksyms/Makefile b/scripts/genksyms/Makefile index 312edccda7363..4350311fb7b39 100644 --- a/scripts/genksyms/Makefile +++ b/scripts/genksyms/Makefile @@ -4,24 +4,6 @@ hostprogs-always-y += genksyms genksyms-objs := genksyms.o parse.tab.o lex.lex.o -# FIXME: fix the ambiguous grammar in parse.y and delete this hack -# -# Suppress shift/reduce, reduce/reduce conflicts warnings -# unless W=1 is specified. -# -# Just in case, run "$(YACC) --version" without suppressing stderr -# so that 'bison: not found' will be displayed if it is missing. -ifeq ($(findstring 1,$(KBUILD_EXTRA_WARN)),) - -quiet_cmd_bison_no_warn = $(quiet_cmd_bison) - cmd_bison_no_warn = $(YACC) --version >/dev/null; \ - $(cmd_bison) 2>/dev/null - -$(obj)/pars%.tab.c $(obj)/pars%.tab.h: $(src)/pars%.y FORCE - $(call if_changed,bison_no_warn) - -endif - # -I needed for generated C source to include headers in source tree HOSTCFLAGS_parse.tab.o := -I $(src) HOSTCFLAGS_lex.lex.o := -I $(src) diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c index 07f9b8cfb2337..8b0d7ac73dbb0 100644 --- a/scripts/genksyms/genksyms.c +++ b/scripts/genksyms/genksyms.c @@ -12,18 +12,19 @@ #include #include +#include #include #include #include #include #include +#include + #include "genksyms.h" /*----------------------------------------------------------------------*/ -#define HASH_BUCKETS 4096 - -static struct symbol *symtab[HASH_BUCKETS]; +static HASHTABLE_DEFINE(symbol_hashtable, 1U << 12); static FILE *debugfile; int cur_line = 1; @@ -60,7 +61,7 @@ static void print_type_name(enum symbol_type type, const char *name); /*----------------------------------------------------------------------*/ -static const unsigned int crctab32[] = { +static const uint32_t crctab32[] = { 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U, 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U, 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U, @@ -115,19 +116,19 @@ static const unsigned int crctab32[] = { 0x2d02ef8dU }; -static unsigned long partial_crc32_one(unsigned char c, unsigned long crc) +static uint32_t partial_crc32_one(uint8_t c, uint32_t crc) { return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8); } -static unsigned long partial_crc32(const char *s, unsigned long crc) +static uint32_t partial_crc32(const char *s, uint32_t crc) { while (*s) crc = partial_crc32_one(*s++, crc); return crc; } -static unsigned long crc32(const char *s) +static uint32_t crc32(const char *s) { return partial_crc32(s, 0xffffffff) ^ 0xffffffff; } @@ -151,14 +152,14 @@ static enum symbol_type map_to_ns(enum symbol_type t) struct symbol *find_symbol(const char *name, enum symbol_type ns, int exact) { - unsigned long h = crc32(name) % HASH_BUCKETS; struct symbol *sym; - for (sym = symtab[h]; sym; sym = sym->hash_next) + hash_for_each_possible(symbol_hashtable, sym, hnode, crc32(name)) { if (map_to_ns(sym->type) == map_to_ns(ns) && strcmp(name, sym->name) == 0 && sym->is_declared) break; + } if (exact && sym && sym->type != ns) return NULL; @@ -224,64 +225,56 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type, return NULL; } - h = crc32(name) % HASH_BUCKETS; - for (sym = symtab[h]; sym; sym = sym->hash_next) { - if (map_to_ns(sym->type) == map_to_ns(type) && - strcmp(name, sym->name) == 0) { - if (is_reference) - /* fall through */ ; - else if (sym->type == type && - equal_list(sym->defn, defn)) { - if (!sym->is_declared && sym->is_override) { - print_location(); - print_type_name(type, name); - fprintf(stderr, " modversion is " - "unchanged\n"); - } - sym->is_declared = 1; - return sym; - } else if (!sym->is_declared) { - if (sym->is_override && flag_preserve) { - print_location(); - fprintf(stderr, "ignoring "); - print_type_name(type, name); - fprintf(stderr, " modversion change\n"); - sym->is_declared = 1; - return sym; - } else { - status = is_unknown_symbol(sym) ? - STATUS_DEFINED : STATUS_MODIFIED; - } - } else { - error_with_pos("redefinition of %s", name); - return sym; + h = crc32(name); + hash_for_each_possible(symbol_hashtable, sym, hnode, h) { + if (map_to_ns(sym->type) != map_to_ns(type) || + strcmp(name, sym->name)) + continue; + + if (is_reference) { + break; + } else if (sym->type == type && equal_list(sym->defn, defn)) { + if (!sym->is_declared && sym->is_override) { + print_location(); + print_type_name(type, name); + fprintf(stderr, " modversion is unchanged\n"); } + sym->is_declared = 1; + } else if (sym->is_declared) { + error_with_pos("redefinition of %s", name); + } else if (sym->is_override && flag_preserve) { + print_location(); + fprintf(stderr, "ignoring "); + print_type_name(type, name); + fprintf(stderr, " modversion change\n"); + sym->is_declared = 1; + } else { + status = is_unknown_symbol(sym) ? + STATUS_DEFINED : STATUS_MODIFIED; break; } + free_list(defn, NULL); + return sym; } if (sym) { - struct symbol **psym; + hash_del(&sym->hnode); - for (psym = &symtab[h]; *psym; psym = &(*psym)->hash_next) { - if (*psym == sym) { - *psym = sym->hash_next; - break; - } - } + free_list(sym->defn, NULL); + free(sym->name); + free(sym); --nsyms; } sym = xmalloc(sizeof(*sym)); - sym->name = name; + sym->name = xstrdup(name); sym->type = type; sym->defn = defn; sym->expansion_trail = NULL; sym->visited = NULL; sym->is_extern = is_extern; - sym->hash_next = symtab[h]; - symtab[h] = sym; + hash_add(symbol_hashtable, &sym->hnode, h); sym->is_declared = !is_reference; sym->status = status; @@ -480,7 +473,7 @@ static void read_reference(FILE *f) defn = def; def = read_node(f); } - subsym = add_reference_symbol(xstrdup(sym->string), sym->tag, + subsym = add_reference_symbol(sym->string, sym->tag, defn, is_extern); subsym->is_override = is_override; free_node(sym); @@ -525,7 +518,7 @@ static void print_list(FILE * f, struct string_list *list) } } -static unsigned long expand_and_crc_sym(struct symbol *sym, unsigned long crc) +static uint32_t expand_and_crc_sym(struct symbol *sym, uint32_t crc) { struct string_list *list = sym->defn; struct string_list **e, **b; @@ -632,7 +625,7 @@ static unsigned long expand_and_crc_sym(struct symbol *sym, unsigned long crc) void export_symbol(const char *name) { struct symbol *sym; - unsigned long crc; + uint32_t crc; int has_changed = 0; sym = find_symbol(name, SYM_NORMAL, 0); @@ -680,7 +673,7 @@ void export_symbol(const char *name) if (flag_dump_defs) fputs(">\n", debugfile); - printf("#SYMVER %s 0x%08lx\n", name, crc); + printf("#SYMVER %s 0x%08lx\n", name, (unsigned long)crc); } /*----------------------------------------------------------------------*/ @@ -832,9 +825,9 @@ int main(int argc, char **argv) } if (flag_debug) { - fprintf(debugfile, "Hash table occupancy %d/%d = %g\n", - nsyms, HASH_BUCKETS, - (double)nsyms / (double)HASH_BUCKETS); + fprintf(debugfile, "Hash table occupancy %d/%zd = %g\n", + nsyms, HASH_SIZE(symbol_hashtable), + (double)nsyms / HASH_SIZE(symbol_hashtable)); } if (dumpfile) diff --git a/scripts/genksyms/genksyms.h b/scripts/genksyms/genksyms.h index 21ed2ec2d98ca..0c355075f0e67 100644 --- a/scripts/genksyms/genksyms.h +++ b/scripts/genksyms/genksyms.h @@ -12,8 +12,11 @@ #ifndef MODUTILS_GENKSYMS_H #define MODUTILS_GENKSYMS_H 1 +#include #include +#include + enum symbol_type { SYM_NORMAL, SYM_TYPEDEF, SYM_ENUM, SYM_STRUCT, SYM_UNION, SYM_ENUM_CONST @@ -31,8 +34,8 @@ struct string_list { }; struct symbol { - struct symbol *hash_next; - const char *name; + struct hlist_node hnode; + char *name; enum symbol_type type; struct string_list *defn; struct symbol *expansion_trail; @@ -64,6 +67,8 @@ struct string_list *copy_list_range(struct string_list *start, int yylex(void); int yyparse(void); +extern bool dont_want_type_specifier; + void error_with_pos(const char *, ...) __attribute__ ((format(printf, 1, 2))); /*----------------------------------------------------------------------*/ diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l index a4d7495eaf75e..22aeb57649d9c 100644 --- a/scripts/genksyms/lex.l +++ b/scripts/genksyms/lex.l @@ -12,6 +12,7 @@ %{ #include +#include #include #include #include @@ -50,6 +51,7 @@ MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) %% +u?int(8|16|32|64)x(1|2|4|8|16)_t return BUILTIN_INT_KEYW; /* Keep track of our location in the original source files. */ ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; @@ -113,6 +115,12 @@ MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) /* The second stage lexer. Here we incorporate knowledge of the state of the parser to tailor the tokens that are returned. */ +/* + * The lexer cannot distinguish whether a typedef'ed string is a TYPE or an + * IDENT. We need a hint from the parser to handle this accurately. + */ +bool dont_want_type_specifier; + int yylex(void) { @@ -207,7 +215,7 @@ repeat: goto repeat; } } - if (!suppress_type_lookup) + if (!suppress_type_lookup && !dont_want_type_specifier) { if (find_symbol(yytext, SYM_TYPEDEF, 1)) token = TYPE; @@ -431,7 +439,12 @@ fini: if (suppress_type_lookup > 0) --suppress_type_lookup; - if (dont_want_brace_phrase > 0) + + /* + * __attribute__() can be placed immediately after the 'struct' keyword. + * e.g.) struct __attribute__((__packed__)) foo { ... }; + */ + if (token != ATTRIBUTE_PHRASE && dont_want_brace_phrase > 0) --dont_want_brace_phrase; yylval = &next_node->next; diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y index 8e9b5e69e8f01..ee600a804fa10 100644 --- a/scripts/genksyms/parse.y +++ b/scripts/genksyms/parse.y @@ -12,6 +12,7 @@ %{ #include +#include #include #include #include "genksyms.h" @@ -148,32 +149,45 @@ simple_declaration: current_name = NULL; } $$ = $3; + dont_want_type_specifier = false; } ; init_declarator_list_opt: - /* empty */ { $$ = NULL; } - | init_declarator_list + /* empty */ { $$ = NULL; } + | init_declarator_list { free_list(decl_spec, NULL); $$ = $1; } ; init_declarator_list: init_declarator { struct string_list *decl = *$1; *$1 = NULL; + + /* avoid sharing among multiple init_declarators */ + if (decl_spec) + decl_spec = copy_list_range(decl_spec, NULL); + add_symbol(current_name, is_typedef ? SYM_TYPEDEF : SYM_NORMAL, decl, is_extern); current_name = NULL; $$ = $1; + dont_want_type_specifier = true; } - | init_declarator_list ',' init_declarator - { struct string_list *decl = *$3; - *$3 = NULL; + | init_declarator_list ',' attribute_opt init_declarator + { struct string_list *decl = *$4; + *$4 = NULL; free_list(*$2, NULL); *$2 = decl_spec; + + /* avoid sharing among multiple init_declarators */ + if (decl_spec) + decl_spec = copy_list_range(decl_spec, NULL); + add_symbol(current_name, is_typedef ? SYM_TYPEDEF : SYM_NORMAL, decl, is_extern); current_name = NULL; - $$ = $3; + $$ = $4; + dont_want_type_specifier = true; } ; @@ -189,8 +203,9 @@ decl_specifier_seq_opt: ; decl_specifier_seq: - decl_specifier { decl_spec = *$1; } + attribute_opt decl_specifier { decl_spec = *$2; } | decl_specifier_seq decl_specifier { decl_spec = *$2; } + | decl_specifier_seq ATTRIBUTE_PHRASE { decl_spec = *$2; } ; decl_specifier: @@ -200,7 +215,8 @@ decl_specifier: remove_node($1); $$ = $1; } - | type_specifier + | type_specifier { dont_want_type_specifier = true; $$ = $1; } + | type_qualifier ; storage_class_specifier: @@ -213,24 +229,23 @@ storage_class_specifier: type_specifier: simple_type_specifier - | cvar_qualifier | TYPEOF_KEYW '(' parameter_declaration ')' | TYPEOF_PHRASE /* References to s/u/e's defined elsewhere. Rearrange things so that it is easier to expand the definition fully later. */ - | STRUCT_KEYW IDENT - { remove_node($1); (*$2)->tag = SYM_STRUCT; $$ = $2; } - | UNION_KEYW IDENT - { remove_node($1); (*$2)->tag = SYM_UNION; $$ = $2; } + | STRUCT_KEYW attribute_opt IDENT + { remove_node($1); (*$3)->tag = SYM_STRUCT; $$ = $3; } + | UNION_KEYW attribute_opt IDENT + { remove_node($1); (*$3)->tag = SYM_UNION; $$ = $3; } | ENUM_KEYW IDENT { remove_node($1); (*$2)->tag = SYM_ENUM; $$ = $2; } /* Full definitions of an s/u/e. Record it. */ - | STRUCT_KEYW IDENT class_body - { record_compound($1, $2, $3, SYM_STRUCT); $$ = $3; } - | UNION_KEYW IDENT class_body - { record_compound($1, $2, $3, SYM_UNION); $$ = $3; } + | STRUCT_KEYW attribute_opt IDENT class_body + { record_compound($1, $3, $4, SYM_STRUCT); $$ = $4; } + | UNION_KEYW attribute_opt IDENT class_body + { record_compound($1, $3, $4, SYM_UNION); $$ = $4; } | ENUM_KEYW IDENT enum_body { record_compound($1, $2, $3, SYM_ENUM); $$ = $3; } /* @@ -239,8 +254,8 @@ type_specifier: | ENUM_KEYW enum_body { add_symbol(NULL, SYM_ENUM, NULL, 0); $$ = $2; } /* Anonymous s/u definitions. Nothing needs doing. */ - | STRUCT_KEYW class_body { $$ = $2; } - | UNION_KEYW class_body { $$ = $2; } + | STRUCT_KEYW attribute_opt class_body { $$ = $3; } + | UNION_KEYW attribute_opt class_body { $$ = $3; } ; simple_type_specifier: @@ -260,22 +275,24 @@ simple_type_specifier: ; ptr_operator: - '*' cvar_qualifier_seq_opt + '*' type_qualifier_seq_opt { $$ = $2 ? $2 : $1; } ; -cvar_qualifier_seq_opt: +type_qualifier_seq_opt: /* empty */ { $$ = NULL; } - | cvar_qualifier_seq + | type_qualifier_seq ; -cvar_qualifier_seq: - cvar_qualifier - | cvar_qualifier_seq cvar_qualifier { $$ = $2; } +type_qualifier_seq: + type_qualifier + | ATTRIBUTE_PHRASE + | type_qualifier_seq type_qualifier { $$ = $2; } + | type_qualifier_seq ATTRIBUTE_PHRASE { $$ = $2; } ; -cvar_qualifier: - CONST_KEYW | VOLATILE_KEYW | ATTRIBUTE_PHRASE +type_qualifier: + CONST_KEYW | VOLATILE_KEYW | RESTRICT_KEYW { /* restrict has no effect in prototypes so ignore it */ remove_node($1); @@ -297,15 +314,7 @@ direct_declarator: current_name = (*$1)->string; $$ = $1; } - } - | TYPE - { if (current_name != NULL) { - error_with_pos("unexpected second declaration name"); - YYERROR; - } else { - current_name = (*$1)->string; - $$ = $1; - } + dont_want_type_specifier = false; } | direct_declarator '(' parameter_declaration_clause ')' { $$ = $4; } @@ -325,16 +334,19 @@ nested_declarator: ; direct_nested_declarator: - IDENT - | TYPE - | direct_nested_declarator '(' parameter_declaration_clause ')' + direct_nested_declarator1 + | direct_nested_declarator1 '(' parameter_declaration_clause ')' { $$ = $4; } - | direct_nested_declarator '(' error ')' + ; + +direct_nested_declarator1: + IDENT { $$ = $1; dont_want_type_specifier = false; } + | direct_nested_declarator1 '(' error ')' { $$ = $4; } - | direct_nested_declarator BRACKET_PHRASE + | direct_nested_declarator1 BRACKET_PHRASE { $$ = $2; } - | '(' nested_declarator ')' - { $$ = $3; } + | '(' attribute_opt nested_declarator ')' + { $$ = $4; } | '(' error ')' { $$ = $3; } ; @@ -352,45 +364,57 @@ parameter_declaration_list_opt: parameter_declaration_list: parameter_declaration + { $$ = $1; dont_want_type_specifier = false; } | parameter_declaration_list ',' parameter_declaration - { $$ = $3; } + { $$ = $3; dont_want_type_specifier = false; } ; parameter_declaration: - decl_specifier_seq m_abstract_declarator + decl_specifier_seq abstract_declarator_opt { $$ = $2 ? $2 : $1; } ; -m_abstract_declarator: - ptr_operator m_abstract_declarator +abstract_declarator_opt: + /* empty */ { $$ = NULL; } + | abstract_declarator + ; + +abstract_declarator: + ptr_operator + | ptr_operator abstract_declarator { $$ = $2 ? $2 : $1; } - | direct_m_abstract_declarator + | direct_abstract_declarator attribute_opt + { $$ = $2; dont_want_type_specifier = false; } ; -direct_m_abstract_declarator: - /* empty */ { $$ = NULL; } - | IDENT +direct_abstract_declarator: + direct_abstract_declarator1 + | direct_abstract_declarator1 open_paren parameter_declaration_clause ')' + { $$ = $4; } + | open_paren parameter_declaration_clause ')' + { $$ = $3; } + ; + +direct_abstract_declarator1: + IDENT { /* For version 2 checksums, we don't want to remember private parameter names. */ remove_node($1); $$ = $1; } - /* This wasn't really a typedef name but an identifier that - shadows one. */ - | TYPE - { remove_node($1); - $$ = $1; - } - | direct_m_abstract_declarator '(' parameter_declaration_clause ')' - { $$ = $4; } - | direct_m_abstract_declarator '(' error ')' + | direct_abstract_declarator1 open_paren error ')' { $$ = $4; } - | direct_m_abstract_declarator BRACKET_PHRASE + | direct_abstract_declarator1 BRACKET_PHRASE { $$ = $2; } - | '(' m_abstract_declarator ')' - { $$ = $3; } - | '(' error ')' + | open_paren attribute_opt abstract_declarator ')' + { $$ = $4; } + | open_paren error ')' { $$ = $3; } + | BRACKET_PHRASE + ; + +open_paren: + '(' { $$ = $1; dont_want_type_specifier = false; } ; function_definition: @@ -430,9 +454,9 @@ member_specification: member_declaration: decl_specifier_seq_opt member_declarator_list_opt ';' - { $$ = $3; } + { $$ = $3; dont_want_type_specifier = false; } | error ';' - { $$ = $2; } + { $$ = $2; dont_want_type_specifier = false; } ; member_declarator_list_opt: @@ -442,7 +466,9 @@ member_declarator_list_opt: member_declarator_list: member_declarator - | member_declarator_list ',' member_declarator { $$ = $3; } + { $$ = $1; dont_want_type_specifier = true; } + | member_declarator_list ',' member_declarator + { $$ = $3; dont_want_type_specifier = true; } ; member_declarator: @@ -457,7 +483,7 @@ member_bitfield_declarator: attribute_opt: /* empty */ { $$ = NULL; } - | attribute_opt ATTRIBUTE_PHRASE + | attribute_opt ATTRIBUTE_PHRASE { $$ = $2; } ; enum_body: @@ -472,12 +498,12 @@ enumerator_list: enumerator: IDENT { - const char *name = strdup((*$1)->string); + const char *name = (*$1)->string; add_symbol(name, SYM_ENUM_CONST, NULL, 0); } | IDENT '=' EXPRESSION_PHRASE { - const char *name = strdup((*$1)->string); + const char *name = (*$1)->string; struct string_list *expr = copy_list_range(*$3, *$2); add_symbol(name, SYM_ENUM_CONST, expr, 0); } diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index a0a0be38cbdc1..fb50bd4f4103f 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -105,9 +105,11 @@ configfiles = $(wildcard $(srctree)/kernel/configs/$(1) $(srctree)/arch/$(SRCARC all-config-fragments = $(call configfiles,*.config) config-fragments = $(call configfiles,$@) +cmd_merge_fragments = $(srctree)/scripts/kconfig/merge_config.sh -m $(KCONFIG_CONFIG) $(config-fragments) + %.config: $(obj)/conf $(if $(config-fragments),, $(error $@ fragment does not exists on this architecture)) - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m $(KCONFIG_CONFIG) $(config-fragments) + $(call cmd,merge_fragments) $(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig PHONY += tinyconfig diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 4286d5e7f95dc..3b55e7a4131d9 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -360,10 +360,12 @@ int conf_read_simple(const char *name, int def) *p = '\0'; - in = zconf_fopen(env); + name = env; + + in = zconf_fopen(name); if (in) { conf_message("using defaults found in %s", - env); + name); goto load; } diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 6c92ef1e16efb..eaa465b0ccf9c 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1464,8 +1464,8 @@ void ConfigMainWindow::loadConfig(void) { QString str; - str = QFileDialog::getOpenFileName(this, "", configname); - if (str.isNull()) + str = QFileDialog::getOpenFileName(this, QString(), configname); + if (str.isEmpty()) return; if (conf_read(str.toLocal8Bit().constData())) @@ -1491,8 +1491,8 @@ void ConfigMainWindow::saveConfigAs(void) { QString str; - str = QFileDialog::getSaveFileName(this, "", configname); - if (str.isNull()) + str = QFileDialog::getSaveFileName(this, QString(), configname); + if (str.isEmpty()) return; if (conf_write(str.toLocal8Bit().constData())) { diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 89b84bf8e21fa..7beb59dec5a08 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -388,6 +388,7 @@ static void sym_warn_unmet_dep(const struct symbol *sym) " Selected by [m]:\n"); fputs(str_get(&gs), stderr); + str_free(&gs); sym_warnings++; } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 7ea59dc4926b3..e18ae7dc8140a 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -33,6 +33,10 @@ static bool module_enabled; static bool modversions; /* Is CONFIG_MODULE_SRCVERSION_ALL set? */ static bool all_versions; +/* Is CONFIG_BASIC_MODVERSIONS set? */ +static bool basic_modversions; +/* Is CONFIG_EXTENDED_MODVERSIONS set? */ +static bool extended_modversions; /* If we are modposting external module set to 1 */ static bool external_module; /* Only warn about unresolved symbols */ @@ -1805,6 +1809,49 @@ static void add_exported_symbols(struct buffer *buf, struct module *mod) } } +/** + * Record CRCs for unresolved symbols, supporting long names + */ +static void add_extended_versions(struct buffer *b, struct module *mod) +{ + struct symbol *s; + + if (!extended_modversions) + return; + + buf_printf(b, "\n"); + buf_printf(b, "static const u32 ____version_ext_crcs[]\n"); + buf_printf(b, "__used __section(\"__version_ext_crcs\") = {\n"); + list_for_each_entry(s, &mod->unresolved_symbols, list) { + if (!s->module) + continue; + if (!s->crc_valid) { + warn("\"%s\" [%s.ko] has no CRC!\n", + s->name, mod->name); + continue; + } + buf_printf(b, "\t0x%08x,\n", s->crc); + } + buf_printf(b, "};\n"); + + buf_printf(b, "static const char ____version_ext_names[]\n"); + buf_printf(b, "__used __section(\"__version_ext_names\") =\n"); + list_for_each_entry(s, &mod->unresolved_symbols, list) { + if (!s->module) + continue; + if (!s->crc_valid) + /* + * We already warned on this when producing the crc + * table. + * We need to skip its name too, as the indexes in + * both tables need to align. + */ + continue; + buf_printf(b, "\t\"%s\\0\"\n", s->name); + } + buf_printf(b, ";\n"); +} + /** * Record CRCs for unresolved symbols **/ @@ -1812,7 +1859,7 @@ static void add_versions(struct buffer *b, struct module *mod) { struct symbol *s; - if (!modversions) + if (!basic_modversions) return; buf_printf(b, "\n"); @@ -1828,11 +1875,16 @@ static void add_versions(struct buffer *b, struct module *mod) continue; } if (strlen(s->name) >= MODULE_NAME_LEN) { - error("too long symbol \"%s\" [%s.ko]\n", - s->name, mod->name); - break; + if (extended_modversions) { + /* this symbol will only be in the extended info */ + continue; + } else { + error("too long symbol \"%s\" [%s.ko]\n", + s->name, mod->name); + break; + } } - buf_printf(b, "\t{ %#8x, \"%s\" },\n", + buf_printf(b, "\t{ 0x%08x, \"%s\" },\n", s->crc, s->name); } @@ -1961,6 +2013,7 @@ static void write_mod_c_file(struct module *mod) add_header(&buf, mod); add_exported_symbols(&buf, mod); add_versions(&buf, mod); + add_extended_versions(&buf, mod); add_depends(&buf, mod); buf_printf(&buf, "\n"); @@ -2126,7 +2179,7 @@ int main(int argc, char **argv) LIST_HEAD(dump_lists); struct dump_list *dl, *dl2; - while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:")) != -1) { + while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:xb")) != -1) { switch (opt) { case 'e': external_module = true; @@ -2175,6 +2228,12 @@ int main(int argc, char **argv) case 'd': missing_namespace_deps = optarg; break; + case 'b': + basic_modversions = true; + break; + case 'x': + extended_modversions = true; + break; default: exit(1); } diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD index dca706617adc7..0cf3a55b05e14 100644 --- a/scripts/package/PKGBUILD +++ b/scripts/package/PKGBUILD @@ -22,7 +22,6 @@ license=(GPL-2.0-only) makedepends=( bc bison - cpio flex gettext kmod diff --git a/scripts/package/builddeb b/scripts/package/builddeb index ad7aba0f268e1..3627ca227e5a5 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -5,10 +5,12 @@ # # Simple script to generate a deb package for a Linux kernel. All the # complexity of what to do with a kernel after it is installed or removed -# is left to other scripts and packages: they can install scripts in the -# /etc/kernel/{pre,post}{inst,rm}.d/ directories (or an alternative location -# specified in KDEB_HOOKDIR) that will be called on package install and -# removal. +# is left to other scripts and packages. Scripts can be placed into the +# preinst, postinst, prerm and postrm directories in /etc/kernel or +# /usr/share/kernel. A different list of search directories can be given +# via KDEB_HOOKDIR. Scripts in directories earlier in the list will +# override scripts of the same name in later directories. The script will +# be called on package installation and removal. set -eu @@ -74,10 +76,8 @@ install_maint_scripts () { # kernel packages, as well as kernel packages built using make-kpkg. # make-kpkg sets $INITRD to indicate whether an initramfs is wanted, and # so do we; recent versions of dracut and initramfs-tools will obey this. - debhookdir=${KDEB_HOOKDIR:-/etc/kernel} + debhookdir=${KDEB_HOOKDIR:-/etc/kernel /usr/share/kernel} for script in postinst postrm preinst prerm; do - mkdir -p "${pdir}${debhookdir}/${script}.d" - mkdir -p "${pdir}/DEBIAN" cat <<-EOF > "${pdir}/DEBIAN/${script}" #!/bin/sh @@ -90,7 +90,15 @@ install_maint_scripts () { # Tell initramfs builder whether it's wanted export INITRD=$(if_enabled_echo CONFIG_BLK_DEV_INITRD Yes No) - test -d ${debhookdir}/${script}.d && run-parts --arg="${KERNELRELEASE}" --arg="/${installed_image_path}" ${debhookdir}/${script}.d + # run-parts will error out if one of its directory arguments does not + # exist, so filter the list of hook directories accordingly. + hookdirs= + for dir in ${debhookdir}; do + test -d "\$dir/${script}.d" || continue + hookdirs="\$hookdirs \$dir/${script}.d" + done + hookdirs="\${hookdirs# }" + test -n "\$hookdirs" && run-parts --arg="${KERNELRELEASE}" --arg="/${installed_image_path}" \$hookdirs exit 0 EOF chmod 755 "${pdir}/DEBIAN/${script}" diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index d3c5b104c0631..bb6e23c1174ec 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -49,17 +49,10 @@ mkdir -p "${destdir}" # This caters to host programs that participate in Kbuild. objtool and # resolve_btfids are out of scope. if [ "${CC}" != "${HOSTCC}" ]; then - echo "Rebuilding host programs with ${CC}..." - - # This leverages external module building. - # - Clear sub_make_done to allow the top-level Makefile to redo sub-make. - # - Filter out --no-print-directory to print "Entering directory" logs - # when Make changes the working directory. - unset sub_make_done - MAKEFLAGS=$(echo "${MAKEFLAGS}" | sed s/--no-print-directory//) - - cat <<-'EOF' > "${destdir}/Kbuild" - subdir-y := scripts + cat "${destdir}/scripts/Makefile" - <<-'EOF' > "${destdir}/scripts/Kbuild" + subdir-y += basic + hostprogs-always-y += mod/modpost + mod/modpost-objs := $(addprefix mod/, modpost.o file2alias.o sumversion.o symsearch.o) EOF # HOSTCXX is not overridden. The C++ compiler is used to build: @@ -67,20 +60,12 @@ if [ "${CC}" != "${HOSTCC}" ]; then # - GCC plugins, which will not work on the installed system even after # being rebuilt. # - # Use the single-target build to avoid the modpost invocation, which - # would overwrite Module.symvers. - "${MAKE}" HOSTCC="${CC}" KBUILD_OUTPUT=. KBUILD_EXTMOD="${destdir}" scripts/ - - cat <<-'EOF' > "${destdir}/scripts/Kbuild" - subdir-y := basic - hostprogs-always-y := mod/modpost - mod/modpost-objs := $(addprefix mod/, modpost.o file2alias.o sumversion.o symsearch.o) - EOF - - # Run once again to rebuild scripts/basic/ and scripts/mod/modpost. - "${MAKE}" HOSTCC="${CC}" KBUILD_OUTPUT=. KBUILD_EXTMOD="${destdir}" scripts/ + # Clear VPATH and srcroot because the source files reside in the output + # directory. + # shellcheck disable=SC2016 # $(MAKE), $(CC), and $(build) will be expanded by Make + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC=$(CC) VPATH= srcroot=. $(build)='"${destdir}"/scripts - rm -f "${destdir}/Kbuild" "${destdir}/scripts/Kbuild" + rm -f "${destdir}/scripts/Kbuild" fi find "${destdir}" \( -name '.*.cmd' -o -name '*.o' \) -delete diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index b038a1380b8af..b6dd98ca860b4 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -205,7 +205,7 @@ Priority: optional Maintainer: $maintainer Rules-Requires-Root: no Build-Depends: debhelper-compat (= 12) -Build-Depends-Arch: bc, bison, cpio, flex, +Build-Depends-Arch: bc, bison, flex, gcc-${host_gnu} , kmod, libelf-dev:native, libssl-dev:native, libssl-dev , diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 8a3384342e8db..6c2b6a62d9d2f 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3245,7 +3245,7 @@ static int snd_pcm_xfern_frames_ioctl(struct snd_pcm_substream *substream, if (copy_from_user(&xfern, _xfern, sizeof(xfern))) return -EFAULT; - bufs = memdup_user(xfern.bufs, sizeof(void *) * runtime->channels); + bufs = memdup_array_user(xfern.bufs, runtime->channels, sizeof(void *)); if (IS_ERR(bufs)) return PTR_ERR(bufs); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index 84393f4f429df..8923813ce4247 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -80,7 +80,11 @@ static int compare_input_type(const void *ap, const void *bp) /* In case one has boost and the other one has not, pick the one with boost first. */ - return (int)(b->has_boost_on_pin - a->has_boost_on_pin); + if (a->has_boost_on_pin != b->has_boost_on_pin) + return (int)(b->has_boost_on_pin - a->has_boost_on_pin); + + /* Keep the original order */ + return a->order - b->order; } /* Reorder the surround channels @@ -400,6 +404,8 @@ int snd_hda_parse_pin_defcfg(struct hda_codec *codec, reorder_outputs(cfg->speaker_outs, cfg->speaker_pins); /* sort inputs in the order of AUTO_PIN_* type */ + for (i = 0; i < cfg->num_inputs; i++) + cfg->inputs[i].order = i; sort(cfg->inputs, cfg->num_inputs, sizeof(cfg->inputs[0]), compare_input_type, NULL); diff --git a/sound/pci/hda/hda_auto_parser.h b/sound/pci/hda/hda_auto_parser.h index 579b11beac718..87af3d8c02f7f 100644 --- a/sound/pci/hda/hda_auto_parser.h +++ b/sound/pci/hda/hda_auto_parser.h @@ -37,6 +37,7 @@ struct auto_pin_cfg_item { unsigned int is_headset_mic:1; unsigned int is_headphone_mic:1; /* Mic-only in headphone jack */ unsigned int has_boost_on_pin:1; + int order; }; struct auto_pin_cfg; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d3c9ed9635888..8192be394d0d0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7497,6 +7497,16 @@ static void alc287_fixup_lenovo_thinkpad_with_alc1318(struct hda_codec *codec, spec->gen.pcm_playback_hook = alc287_alc1318_playback_pcm_hook; } +/* + * Clear COEF 0x0d (PCBEEP passthrough) bit 0x40 where BIOS sets it wrongly + * at PM resume + */ +static void alc283_fixup_dell_hp_resume(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_INIT) + alc_write_coef_idx(codec, 0xd, 0x2800); +} enum { ALC269_FIXUP_GPIO2, @@ -7799,6 +7809,7 @@ enum { ALC269_FIXUP_VAIO_VJFH52_MIC_NO_PRESENCE, ALC233_FIXUP_MEDION_MTL_SPK, ALC294_FIXUP_BASS_SPEAKER_15, + ALC283_FIXUP_DELL_HP_RESUME, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -10143,6 +10154,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc294_fixup_bass_speaker_15, }, + [ALC283_FIXUP_DELL_HP_RESUME] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc283_fixup_dell_hp_resume, + }, }; static const struct hda_quirk alc269_fixup_tbl[] = { @@ -10203,6 +10218,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05f4, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05f5, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0604, "Dell Venue 11 Pro 7130", ALC283_FIXUP_DELL_HP_RESUME), SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x062c, "Dell Latitude E5550", ALC292_FIXUP_DELL_E7X), @@ -10918,7 +10934,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), HDA_CODEC_QUIRK(0x17aa, 0x386e, "Legion Y9000X 2022 IAH7", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x386e, "Yoga Pro 7 14ARP8", ALC285_FIXUP_SPEAKER2_TO_DAC1), - HDA_CODEC_QUIRK(0x17aa, 0x386f, "Legion Pro 7 16ARX8H", ALC287_FIXUP_TAS2781_I2C), + HDA_CODEC_QUIRK(0x17aa, 0x38a8, "Legion Pro 7 16ARX8H", ALC287_FIXUP_TAS2781_I2C), /* this must match before PCI SSID 17aa:386f below */ SND_PCI_QUIRK(0x17aa, 0x386f, "Legion Pro 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C), SND_PCI_QUIRK(0x17aa, 0x3877, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2), diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c index 1f59ee248771c..89e99ed4275a2 100644 --- a/sound/soc/amd/acp/acp-i2s.c +++ b/sound/soc/amd/acp/acp-i2s.c @@ -181,6 +181,7 @@ static int acp_i2s_set_tdm_slot(struct snd_soc_dai *dai, u32 tx_mask, u32 rx_mas break; default: dev_err(dev, "Unknown chip revision %d\n", chip->acp_rev); + spin_unlock_irq(&adata->acp_lock); return -EINVAL; } } diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index ecf57a6cb7c37..b16587d8f97a8 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -304,6 +304,34 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "83AS"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83L3"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83N6"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83Q2"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83Q3"), + } + }, { .driver_data = &acp6x_card, .matches = { diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c index ca4cc954efa8e..eb97ac73ec062 100644 --- a/sound/soc/codecs/da7213.c +++ b/sound/soc/codecs/da7213.c @@ -2203,6 +2203,8 @@ static int da7213_i2c_probe(struct i2c_client *i2c) return ret; } + mutex_init(&da7213->ctrl_lock); + pm_runtime_set_autosuspend_delay(&i2c->dev, 100); pm_runtime_use_autosuspend(&i2c->dev); pm_runtime_set_active(&i2c->dev); diff --git a/sound/soc/codecs/es8316.c b/sound/soc/codecs/es8316.c index f508df01145bf..e7bd561a8f40d 100644 --- a/sound/soc/codecs/es8316.c +++ b/sound/soc/codecs/es8316.c @@ -101,7 +101,7 @@ static const struct snd_kcontrol_new es8316_snd_controls[] = { SOC_DOUBLE_R_TLV("DAC Playback Volume", ES8316_DAC_VOLL, ES8316_DAC_VOLR, 0, 0xc0, 1, dac_vol_tlv), SOC_SINGLE("DAC Soft Ramp Switch", ES8316_DAC_SET1, 4, 1, 1), - SOC_SINGLE("DAC Soft Ramp Rate", ES8316_DAC_SET1, 2, 4, 0), + SOC_SINGLE("DAC Soft Ramp Rate", ES8316_DAC_SET1, 2, 3, 0), SOC_SINGLE("DAC Notch Filter Switch", ES8316_DAC_SET2, 6, 1, 0), SOC_SINGLE("DAC Double Fs Switch", ES8316_DAC_SET2, 7, 1, 0), SOC_SINGLE("DAC Stereo Enhancement", ES8316_DAC_SET3, 0, 7, 0), diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index b06eead7e0f6f..066d92b543128 100644 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -911,7 +911,7 @@ static void es8326_jack_detect_handler(struct work_struct *work) regmap_write(es8326->regmap, ES8326_INT_SOURCE, (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON)); regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x1f); - regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x08); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x0d); queue_delayed_work(system_wq, &es8326->jack_detect_work, msecs_to_jiffies(400)); es8326->hp = 1; @@ -1023,7 +1023,7 @@ static void es8326_init(struct snd_soc_component *component) struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component); regmap_write(es8326->regmap, ES8326_RESET, 0x1f); - regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E); + regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x3E); regmap_write(es8326->regmap, ES8326_ANA_LP, 0xf0); usleep_range(10000, 15000); regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xd9); diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index 2b3c0f9e178cc..9cb74962161a0 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -1091,8 +1091,7 @@ static int rt5514_set_bias_level(struct snd_soc_component *component, static int rt5514_probe(struct snd_soc_component *component) { struct rt5514_priv *rt5514 = snd_soc_component_get_drvdata(component); - struct platform_device *pdev = container_of(component->dev, - struct platform_device, dev); + struct platform_device *pdev = to_platform_device(component->dev); rt5514->mclk = devm_clk_get_optional(component->dev, "mclk"); if (IS_ERR(rt5514->mclk)) diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index e5fbf5305ea25..c4cf3cff58ded 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -6,6 +6,7 @@ comment "Common SoC Audio options for Freescale CPUs:" config SND_SOC_FSL_ASRC tristate "Asynchronous Sample Rate Converter (ASRC) module support" depends on HAS_DMA + select DMA_SHARED_BUFFER select REGMAP_MMIO select SND_SOC_GENERIC_DMAENGINE_PCM select SND_COMPRESS_ACCEL diff --git a/sound/soc/fsl/fsl_asrc_m2m.c b/sound/soc/fsl/fsl_asrc_m2m.c index 4906843e2a8fd..f46881f71e430 100644 --- a/sound/soc/fsl/fsl_asrc_m2m.c +++ b/sound/soc/fsl/fsl_asrc_m2m.c @@ -183,7 +183,7 @@ static int asrc_dmaconfig(struct fsl_asrc_pair *pair, } /* main function of converter */ -static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_task_runtime *task) +static int asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_task_runtime *task) { struct fsl_asrc *asrc = pair->asrc; struct device *dev = &asrc->pdev->dev; @@ -193,7 +193,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas unsigned int out_dma_len; unsigned int width; u32 fifo_addr; - int ret; + int ret = 0; /* set ratio mod */ if (asrc->m2m_set_ratio_mod) { @@ -215,6 +215,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas in_buf_len > ASRC_M2M_BUFFER_SIZE || in_buf_len % (width * pair->channels / 8)) { dev_err(dev, "out buffer size is error: [%d]\n", in_buf_len); + ret = -EINVAL; goto end; } @@ -245,6 +246,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas } } else if (out_dma_len > ASRC_M2M_BUFFER_SIZE) { dev_err(dev, "cap buffer size error\n"); + ret = -EINVAL; goto end; } @@ -263,12 +265,14 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas if (!wait_for_completion_interruptible_timeout(&pair->complete[IN], 10 * HZ)) { dev_err(dev, "out DMA task timeout\n"); + ret = -ETIMEDOUT; goto end; } if (out_dma_len > 0) { if (!wait_for_completion_interruptible_timeout(&pair->complete[OUT], 10 * HZ)) { dev_err(dev, "cap DMA task timeout\n"); + ret = -ETIMEDOUT; goto end; } } @@ -278,7 +282,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas /* update payload length for capture */ task->output_size = out_dma_len; end: - return; + return ret; } static int fsl_asrc_m2m_comp_open(struct snd_compr_stream *stream) @@ -525,9 +529,7 @@ static int fsl_asrc_m2m_comp_task_start(struct snd_compr_stream *stream, struct snd_compr_runtime *runtime = stream->runtime; struct fsl_asrc_pair *pair = runtime->private_data; - asrc_m2m_device_run(pair, task); - - return 0; + return asrc_m2m_device_run(pair, task); } static int fsl_asrc_m2m_comp_task_stop(struct snd_compr_stream *stream, @@ -633,7 +635,7 @@ int fsl_asrc_m2m_suspend(struct fsl_asrc *asrc) for (i = 0; i < PAIR_CTX_NUM; i++) { pair = asrc->pair[i]; - if (!pair) + if (!pair || !pair->dma_buffer[IN].area || !pair->dma_buffer[OUT].area) continue; if (!completion_done(&pair->complete[IN])) { if (pair->dma_chan[IN]) diff --git a/sound/soc/generic/audio-graph-card2.c b/sound/soc/generic/audio-graph-card2.c index c36b1a2ac949e..ee94b256b7707 100644 --- a/sound/soc/generic/audio-graph-card2.c +++ b/sound/soc/generic/audio-graph-card2.c @@ -648,23 +648,23 @@ static int graph_parse_node_multi(struct simple_util_priv *priv, static int graph_parse_node_single(struct simple_util_priv *priv, enum graph_type gtype, - struct device_node *port, + struct device_node *ep, struct link_info *li, int is_cpu) { - struct device_node *ep __free(device_node) = of_graph_get_next_port_endpoint(port, NULL); - return __graph_parse_node(priv, gtype, ep, li, is_cpu, 0); } static int graph_parse_node(struct simple_util_priv *priv, enum graph_type gtype, - struct device_node *port, + struct device_node *ep, struct link_info *li, int is_cpu) { + struct device_node *port __free(device_node) = ep_to_port(ep); + if (graph_lnk_is_multi(port)) return graph_parse_node_multi(priv, gtype, port, li, is_cpu); else - return graph_parse_node_single(priv, gtype, port, li, is_cpu); + return graph_parse_node_single(priv, gtype, ep, li, is_cpu); } static void graph_parse_daifmt(struct device_node *node, unsigned int *daifmt) @@ -722,14 +722,15 @@ static unsigned int graph_parse_bitframe(struct device_node *ep) static void graph_link_init(struct simple_util_priv *priv, struct device_node *lnk, - struct device_node *port_cpu, - struct device_node *port_codec, + struct device_node *ep_cpu, + struct device_node *ep_codec, struct link_info *li, int is_cpu_node) { struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); - struct device_node *ep_cpu, *ep_codec; + struct device_node *port_cpu = ep_to_port(ep_cpu); + struct device_node *port_codec = ep_to_port(ep_codec); struct device_node *multi_cpu_port = NULL, *multi_codec_port = NULL; struct snd_soc_dai_link_component *dlc; unsigned int daifmt = 0; @@ -739,25 +740,23 @@ static void graph_link_init(struct simple_util_priv *priv, int multi_cpu_port_idx = 1, multi_codec_port_idx = 1; int i; - of_node_get(port_cpu); if (graph_lnk_is_multi(port_cpu)) { multi_cpu_port = port_cpu; ep_cpu = graph_get_next_multi_ep(&multi_cpu_port, multi_cpu_port_idx++); of_node_put(port_cpu); port_cpu = ep_to_port(ep_cpu); } else { - ep_cpu = of_graph_get_next_port_endpoint(port_cpu, NULL); + of_node_get(ep_cpu); } struct device_node *ports_cpu __free(device_node) = port_to_ports(port_cpu); - of_node_get(port_codec); if (graph_lnk_is_multi(port_codec)) { multi_codec_port = port_codec; ep_codec = graph_get_next_multi_ep(&multi_codec_port, multi_codec_port_idx++); of_node_put(port_codec); port_codec = ep_to_port(ep_codec); } else { - ep_codec = of_graph_get_next_port_endpoint(port_codec, NULL); + of_node_get(ep_codec); } struct device_node *ports_codec __free(device_node) = port_to_ports(port_codec); @@ -833,7 +832,7 @@ int audio_graph2_link_normal(struct simple_util_priv *priv, { struct device_node *cpu_port = lnk; struct device_node *cpu_ep __free(device_node) = of_graph_get_next_port_endpoint(cpu_port, NULL); - struct device_node *codec_port __free(device_node) = of_graph_get_remote_port(cpu_ep); + struct device_node *codec_ep __free(device_node) = of_graph_get_remote_endpoint(cpu_ep); int ret; /* @@ -841,18 +840,18 @@ int audio_graph2_link_normal(struct simple_util_priv *priv, * see * __graph_parse_node() :: DAI Naming */ - ret = graph_parse_node(priv, GRAPH_NORMAL, codec_port, li, 0); + ret = graph_parse_node(priv, GRAPH_NORMAL, codec_ep, li, 0); if (ret < 0) return ret; /* * call CPU, and set DAI Name */ - ret = graph_parse_node(priv, GRAPH_NORMAL, cpu_port, li, 1); + ret = graph_parse_node(priv, GRAPH_NORMAL, cpu_ep, li, 1); if (ret < 0) return ret; - graph_link_init(priv, lnk, cpu_port, codec_port, li, 1); + graph_link_init(priv, lnk, cpu_ep, codec_ep, li, 1); return ret; } @@ -864,15 +863,15 @@ int audio_graph2_link_dpcm(struct simple_util_priv *priv, { struct device_node *ep __free(device_node) = of_graph_get_next_port_endpoint(lnk, NULL); struct device_node *rep __free(device_node) = of_graph_get_remote_endpoint(ep); - struct device_node *cpu_port = NULL; - struct device_node *codec_port = NULL; + struct device_node *cpu_ep = NULL; + struct device_node *codec_ep = NULL; struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); int is_cpu = graph_util_is_ports0(lnk); int ret; if (is_cpu) { - cpu_port = of_graph_get_remote_port(ep); /* rport */ + cpu_ep = rep; /* * dpcm { @@ -901,12 +900,12 @@ int audio_graph2_link_dpcm(struct simple_util_priv *priv, dai_link->dynamic = 1; dai_link->dpcm_merged_format = 1; - ret = graph_parse_node(priv, GRAPH_DPCM, cpu_port, li, 1); + ret = graph_parse_node(priv, GRAPH_DPCM, cpu_ep, li, 1); if (ret) - goto err; + return ret; } else { - codec_port = of_graph_get_remote_port(ep); /* rport */ + codec_ep = rep; /* * dpcm { @@ -937,18 +936,15 @@ int audio_graph2_link_dpcm(struct simple_util_priv *priv, dai_link->no_pcm = 1; dai_link->be_hw_params_fixup = simple_util_be_hw_params_fixup; - ret = graph_parse_node(priv, GRAPH_DPCM, codec_port, li, 0); + ret = graph_parse_node(priv, GRAPH_DPCM, codec_ep, li, 0); if (ret < 0) - goto err; + return ret; } graph_parse_convert(ep, dai_props); /* at node of */ graph_parse_convert(rep, dai_props); /* at node of */ - graph_link_init(priv, lnk, cpu_port, codec_port, li, is_cpu); -err: - of_node_put(cpu_port); - of_node_put(codec_port); + graph_link_init(priv, lnk, cpu_ep, codec_ep, li, is_cpu); return ret; } @@ -1013,26 +1009,26 @@ int audio_graph2_link_c2c(struct simple_util_priv *priv, struct device_node *ep0 __free(device_node) = of_graph_get_next_port_endpoint(port0, NULL); struct device_node *ep1 __free(device_node) = of_graph_get_next_port_endpoint(port1, NULL); - struct device_node *codec0_port __free(device_node) = of_graph_get_remote_port(ep0); - struct device_node *codec1_port __free(device_node) = of_graph_get_remote_port(ep1); + struct device_node *codec0_ep __free(device_node) = of_graph_get_remote_endpoint(ep0); + struct device_node *codec1_ep __free(device_node) = of_graph_get_remote_endpoint(ep1); /* * call Codec first. * see * __graph_parse_node() :: DAI Naming */ - ret = graph_parse_node(priv, GRAPH_C2C, codec1_port, li, 0); + ret = graph_parse_node(priv, GRAPH_C2C, codec1_ep, li, 0); if (ret < 0) return ret; /* * call CPU, and set DAI Name */ - ret = graph_parse_node(priv, GRAPH_C2C, codec0_port, li, 1); + ret = graph_parse_node(priv, GRAPH_C2C, codec0_ep, li, 1); if (ret < 0) return ret; - graph_link_init(priv, lnk, codec0_port, codec1_port, li, 1); + graph_link_init(priv, lnk, codec0_ep, codec1_ep, li, 1); return ret; } diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 9caa4407c1ca3..6446cda0f8572 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -1132,7 +1132,22 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF2 | BYT_RT5640_MCLK_EN), }, - { /* Vexia Edu Atla 10 tablet */ + { + /* Vexia Edu Atla 10 tablet 5V version */ + .matches = { + /* Having all 3 of these not set is somewhat unique */ + DMI_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), + DMI_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."), + DMI_MATCH(DMI_BOARD_NAME, "To be filled by O.E.M."), + /* Above strings are too generic, also match on BIOS date */ + DMI_MATCH(DMI_BIOS_DATE, "05/14/2015"), + }, + .driver_data = (void *)(BYTCR_INPUT_DEFAULTS | + BYT_RT5640_JD_NOT_INV | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, + { /* Vexia Edu Atla 10 tablet 9V version */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"), diff --git a/sound/soc/renesas/Kconfig b/sound/soc/renesas/Kconfig index 426632996a0a3..cb01fb36355f0 100644 --- a/sound/soc/renesas/Kconfig +++ b/sound/soc/renesas/Kconfig @@ -67,7 +67,7 @@ config SND_SH7760_AC97 config SND_SIU_MIGOR tristate "SIU sound support on Migo-R" - depends on SH_MIGOR && I2C + depends on SH_MIGOR && I2C && DMADEVICES select SND_SOC_SH4_SIU select SND_SOC_WM8978 help diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index bd0dc586e24a3..7f5fcaecee4b6 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -22,7 +22,6 @@ #define DRV_NAME "rockchip-i2s-tdm" -#define DEFAULT_MCLK_FS 256 #define CH_GRP_MAX 4 /* The max channel 8 / 2 */ #define MULTIPLEX_CH_MAX 10 @@ -70,6 +69,8 @@ struct rk_i2s_tdm_dev { bool has_playback; bool has_capture; struct snd_soc_dai_driver *dai; + unsigned int mclk_rx_freq; + unsigned int mclk_tx_freq; }; static int to_ch_num(unsigned int val) @@ -617,6 +618,27 @@ static int rockchip_i2s_trcm_mode(struct snd_pcm_substream *substream, return 0; } +static int rockchip_i2s_tdm_set_sysclk(struct snd_soc_dai *cpu_dai, int stream, + unsigned int freq, int dir) +{ + struct rk_i2s_tdm_dev *i2s_tdm = to_info(cpu_dai); + + if (i2s_tdm->clk_trcm) { + i2s_tdm->mclk_tx_freq = freq; + i2s_tdm->mclk_rx_freq = freq; + } else { + if (stream == SNDRV_PCM_STREAM_PLAYBACK) + i2s_tdm->mclk_tx_freq = freq; + else + i2s_tdm->mclk_rx_freq = freq; + } + + dev_dbg(i2s_tdm->dev, "The target mclk_%s freq is: %d\n", + stream ? "rx" : "tx", freq); + + return 0; +} + static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) @@ -631,15 +653,19 @@ static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream, if (i2s_tdm->clk_trcm == TRCM_TX) { mclk = i2s_tdm->mclk_tx; + mclk_rate = i2s_tdm->mclk_tx_freq; } else if (i2s_tdm->clk_trcm == TRCM_RX) { mclk = i2s_tdm->mclk_rx; + mclk_rate = i2s_tdm->mclk_rx_freq; } else if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { mclk = i2s_tdm->mclk_tx; + mclk_rate = i2s_tdm->mclk_tx_freq; } else { mclk = i2s_tdm->mclk_rx; + mclk_rate = i2s_tdm->mclk_rx_freq; } - err = clk_set_rate(mclk, DEFAULT_MCLK_FS * params_rate(params)); + err = clk_set_rate(mclk, mclk_rate); if (err) return err; @@ -799,6 +825,7 @@ static const struct snd_soc_dai_ops rockchip_i2s_tdm_dai_ops = { .hw_params = rockchip_i2s_tdm_hw_params, .set_bclk_ratio = rockchip_i2s_tdm_set_bclk_ratio, .set_fmt = rockchip_i2s_tdm_set_fmt, + .set_sysclk = rockchip_i2s_tdm_set_sysclk, .set_tdm_slot = rockchip_dai_tdm_slot, .trigger = rockchip_i2s_tdm_trigger, }; diff --git a/sound/soc/sof/imx/imx8.c b/sound/soc/sof/imx/imx8.c index 0b85b29d1067f..1e7bf00d7c464 100644 --- a/sound/soc/sof/imx/imx8.c +++ b/sound/soc/sof/imx/imx8.c @@ -175,8 +175,7 @@ static int imx8_run(struct snd_sof_dev *sdev) static int imx8_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct device_node *np = pdev->dev.of_node; struct device_node *res_node; struct resource *mmio; @@ -606,11 +605,32 @@ static struct snd_sof_of_mach sof_imx8_machs[] = { .sof_tplg_filename = "sof-imx8-wm8960.tplg", .drv_name = "asoc-audio-graph-card2", }, + { + .compatible = "fsl,imx8qxp-mek-wcpu", + .sof_tplg_filename = "sof-imx8-wm8962.tplg", + .drv_name = "asoc-audio-graph-card2", + }, { .compatible = "fsl,imx8qm-mek", .sof_tplg_filename = "sof-imx8-wm8960.tplg", .drv_name = "asoc-audio-graph-card2", }, + { + .compatible = "fsl,imx8qm-mek-revd", + .sof_tplg_filename = "sof-imx8-wm8962.tplg", + .drv_name = "asoc-audio-graph-card2", + }, + { + .compatible = "fsl,imx8qxp-mek-bb", + .sof_tplg_filename = "sof-imx8-cs42888.tplg", + .drv_name = "asoc-audio-graph-card2", + }, + { + .compatible = "fsl,imx8qm-mek-bb", + .sof_tplg_filename = "sof-imx8-cs42888.tplg", + .drv_name = "asoc-audio-graph-card2", + }, + {} }; diff --git a/sound/soc/sof/imx/imx8m.c b/sound/soc/sof/imx/imx8m.c index ff42743efa791..3cabdebac5580 100644 --- a/sound/soc/sof/imx/imx8m.c +++ b/sound/soc/sof/imx/imx8m.c @@ -144,8 +144,7 @@ static int imx8m_reset(struct snd_sof_dev *sdev) static int imx8m_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct device_node *np = pdev->dev.of_node; struct device_node *res_node; struct resource *mmio; @@ -294,6 +293,17 @@ static struct snd_soc_dai_driver imx8m_dai[] = { .channels_max = 32, }, }, +{ + .name = "sai2", + .playback = { + .channels_min = 1, + .channels_max = 32, + }, + .capture = { + .channels_min = 1, + .channels_max = 32, + }, +}, { .name = "sai3", .playback = { @@ -305,6 +315,39 @@ static struct snd_soc_dai_driver imx8m_dai[] = { .channels_max = 32, }, }, +{ + .name = "sai5", + .playback = { + .channels_min = 1, + .channels_max = 32, + }, + .capture = { + .channels_min = 1, + .channels_max = 32, + }, +}, +{ + .name = "sai6", + .playback = { + .channels_min = 1, + .channels_max = 32, + }, + .capture = { + .channels_min = 1, + .channels_max = 32, + }, +}, +{ + .name = "sai7", + .playback = { + .channels_min = 1, + .channels_max = 32, + }, + .capture = { + .channels_min = 1, + .channels_max = 32, + }, +}, { .name = "micfil", .capture = { @@ -471,6 +514,11 @@ static const struct snd_sof_dsp_ops sof_imx8m_ops = { }; static struct snd_sof_of_mach sof_imx8mp_machs[] = { + { + .compatible = "fsl,imx8mp-evk-revb4", + .sof_tplg_filename = "sof-imx8mp-wm8962.tplg", + .drv_name = "asoc-audio-graph-card2", + }, { .compatible = "fsl,imx8mp-evk", .sof_tplg_filename = "sof-imx8mp-wm8960.tplg", diff --git a/sound/soc/sof/imx/imx8ulp.c b/sound/soc/sof/imx/imx8ulp.c index 6965791ab6ef6..0704da27e69d9 100644 --- a/sound/soc/sof/imx/imx8ulp.c +++ b/sound/soc/sof/imx/imx8ulp.c @@ -155,8 +155,7 @@ static int imx8ulp_reset(struct snd_sof_dev *sdev) static int imx8ulp_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct device_node *np = pdev->dev.of_node; struct device_node *res_node; struct resource *mmio; diff --git a/sound/soc/sof/intel/bdw.c b/sound/soc/sof/intel/bdw.c index 5282c0071534d..e1f0e38c24076 100644 --- a/sound/soc/sof/intel/bdw.c +++ b/sound/soc/sof/intel/bdw.c @@ -410,8 +410,7 @@ static int bdw_probe(struct snd_sof_dev *sdev) { struct snd_sof_pdata *pdata = sdev->pdata; const struct sof_dev_desc *desc = pdata->desc; - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); const struct sof_intel_dsp_desc *chip; struct resource *mmio; u32 base, size; diff --git a/sound/soc/sof/intel/byt.c b/sound/soc/sof/intel/byt.c index 536d4c89d2f02..cae7dc0036c6a 100644 --- a/sound/soc/sof/intel/byt.c +++ b/sound/soc/sof/intel/byt.c @@ -109,8 +109,7 @@ static int byt_acpi_probe(struct snd_sof_dev *sdev) { struct snd_sof_pdata *pdata = sdev->pdata; const struct sof_dev_desc *desc = pdata->desc; - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); const struct sof_intel_dsp_desc *chip; struct resource *mmio; u32 base, size; diff --git a/sound/soc/sof/mediatek/mt8186/mt8186.c b/sound/soc/sof/mediatek/mt8186/mt8186.c index 9955dfa520ae2..31437fdd4e922 100644 --- a/sound/soc/sof/mediatek/mt8186/mt8186.c +++ b/sound/soc/sof/mediatek/mt8186/mt8186.c @@ -238,7 +238,7 @@ static int mt8186_run(struct snd_sof_dev *sdev) static int mt8186_dsp_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct adsp_priv *priv; int ret; diff --git a/sound/soc/sof/mediatek/mt8195/mt8195.c b/sound/soc/sof/mediatek/mt8195/mt8195.c index 6032b566c6795..371563d7ce795 100644 --- a/sound/soc/sof/mediatek/mt8195/mt8195.c +++ b/sound/soc/sof/mediatek/mt8195/mt8195.c @@ -228,7 +228,7 @@ static int mt8195_run(struct snd_sof_dev *sdev) static int mt8195_dsp_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct adsp_priv *priv; int ret; @@ -341,7 +341,7 @@ static int mt8195_dsp_shutdown(struct snd_sof_dev *sdev) static void mt8195_dsp_remove(struct snd_sof_dev *sdev) { - struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct adsp_priv *priv = sdev->pdata->hw_pdata; platform_device_unregister(priv->ipc_dev); @@ -351,7 +351,7 @@ static void mt8195_dsp_remove(struct snd_sof_dev *sdev) static int mt8195_dsp_suspend(struct snd_sof_dev *sdev, u32 target_state) { - struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); int ret; u32 reset_sw, dbg_pc; diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 7968d6a2f592a..a97efb7b131ea 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2343,6 +2343,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x2d95, 0x8021, /* VIVO USB-C-XE710 HEADSET */ QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x2fc6, 0xf0b7, /* iBasso DC07 Pro */ + QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d7c7d29291fbf..d466447ae928a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2107,8 +2107,12 @@ static int trace__read_syscall_info(struct trace *trace, int id) return PTR_ERR(sc->tp_format); } + /* + * The tracepoint format contains __syscall_nr field, so it's one more + * than the actual number of syscall arguments. + */ if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? - RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) + RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields - 1)) return -ENOMEM; sc->args = sc->tp_format->format.fields; diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh index 5a3b8a5a9b5cf..8d1e6bbeac906 100755 --- a/tools/perf/tests/shell/trace_btf_enum.sh +++ b/tools/perf/tests/shell/trace_btf_enum.sh @@ -26,8 +26,12 @@ check_vmlinux() { trace_landlock() { echo "Tracing syscall ${syscall}" - # test flight just to see if landlock_add_rule and libbpf are available - $TESTPROG + # test flight just to see if landlock_add_rule is available + if ! perf trace $TESTPROG 2>&1 | grep -q landlock + then + echo "No landlock system call found, skipping to non-syscall tracing." + return + fi if perf trace -e $syscall $TESTPROG 2>&1 | \ grep -q -E ".*landlock_add_rule\(ruleset_fd: 11, rule_type: (LANDLOCK_RULE_PATH_BENEATH|LANDLOCK_RULE_NET_PORT), rule_attr: 0x[a-f0-9]+, flags: 45\) = -1.*" diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0d2ea22bd9e48..31bb326b07a68 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2100,6 +2100,57 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, return 0; } +const char * const perf_disassembler__strs[] = { + [PERF_DISASM_UNKNOWN] = "unknown", + [PERF_DISASM_LLVM] = "llvm", + [PERF_DISASM_CAPSTONE] = "capstone", + [PERF_DISASM_OBJDUMP] = "objdump", +}; + + +static void annotation_options__add_disassembler(struct annotation_options *options, + enum perf_disassembler dis) +{ + for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers); i++) { + if (options->disassemblers[i] == dis) { + /* Disassembler is already present then don't add again. */ + return; + } + if (options->disassemblers[i] == PERF_DISASM_UNKNOWN) { + /* Found a free slot. */ + options->disassemblers[i] = dis; + return; + } + } + pr_err("Failed to add disassembler %d\n", dis); +} + +static int annotation_options__add_disassemblers_str(struct annotation_options *options, + const char *str) +{ + while (str && *str != '\0') { + const char *comma = strchr(str, ','); + int len = comma ? comma - str : (int)strlen(str); + bool match = false; + + for (u8 i = 0; i < ARRAY_SIZE(perf_disassembler__strs); i++) { + const char *dis_str = perf_disassembler__strs[i]; + + if (len == (int)strlen(dis_str) && !strncmp(str, dis_str, len)) { + annotation_options__add_disassembler(options, i); + match = true; + break; + } + } + if (!match) { + pr_err("Invalid disassembler '%.*s'\n", len, str); + return -1; + } + str = comma ? comma + 1 : NULL; + } + return 0; +} + static int annotation__config(const char *var, const char *value, void *data) { struct annotation_options *opt = data; @@ -2115,11 +2166,10 @@ static int annotation__config(const char *var, const char *value, void *data) else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL) opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; } else if (!strcmp(var, "annotate.disassemblers")) { - opt->disassemblers_str = strdup(value); - if (!opt->disassemblers_str) { - pr_err("Not enough memory for annotate.disassemblers\n"); - return -1; - } + int err = annotation_options__add_disassemblers_str(opt, value); + + if (err) + return err; } else if (!strcmp(var, "annotate.hide_src_code")) { opt->hide_src_code = perf_config_bool("hide_src_code", value); } else if (!strcmp(var, "annotate.jump_arrows")) { @@ -2185,9 +2235,25 @@ void annotation_options__exit(void) zfree(&annotate_opts.objdump_path); } +static void annotation_options__default_init_disassemblers(struct annotation_options *options) +{ + if (options->disassemblers[0] != PERF_DISASM_UNKNOWN) { + /* Already initialized. */ + return; + } +#ifdef HAVE_LIBLLVM_SUPPORT + annotation_options__add_disassembler(options, PERF_DISASM_LLVM); +#endif +#ifdef HAVE_LIBCAPSTONE_SUPPORT + annotation_options__add_disassembler(options, PERF_DISASM_CAPSTONE); +#endif + annotation_options__add_disassembler(options, PERF_DISASM_OBJDUMP); +} + void annotation_config__init(void) { perf_config(annotation__config, &annotate_opts); + annotation_options__default_init_disassemblers(&annotate_opts); } static unsigned int parse_percent_type(char *str1, char *str2) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 0ba5846dad4de..98db1b88daf43 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -34,8 +34,13 @@ struct annotated_data_type; #define ANNOTATION__BR_CNTR_WIDTH 30 #define ANNOTATION_DUMMY_LEN 256 -// llvm, capstone, objdump -#define MAX_DISASSEMBLERS 3 +enum perf_disassembler { + PERF_DISASM_UNKNOWN = 0, + PERF_DISASM_LLVM, + PERF_DISASM_CAPSTONE, + PERF_DISASM_OBJDUMP, +}; +#define MAX_DISASSEMBLERS (PERF_DISASM_OBJDUMP + 1) struct annotation_options { bool hide_src_code, @@ -52,14 +57,12 @@ struct annotation_options { annotate_src, full_addr; u8 offset_level; - u8 nr_disassemblers; + u8 disassemblers[MAX_DISASSEMBLERS]; int min_pcnt; int max_lines; int context; char *objdump_path; char *disassembler_style; - const char *disassemblers_str; - const char *disassemblers[MAX_DISASSEMBLERS]; const char *prefix; const char *prefix_strip; unsigned int percent_type; @@ -134,6 +137,8 @@ struct disasm_line { struct annotation_line al; }; +extern const char * const perf_disassembler__strs[]; + void annotation_line__add(struct annotation_line *al, struct list_head *head); static inline double annotation_data__percent(struct annotation_data *data, diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index 4a62ed593e84e..e4352881e3faa 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -431,9 +431,9 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) { bool augmented, do_output = false; - int zero = 0, size, aug_size, index, - value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); + int zero = 0, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */ + s64 aug_size, size; unsigned int nr, *beauty_map; struct beauty_payload_enter *payload; void *arg, *payload_offset; @@ -484,14 +484,11 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) } else if (size > 0 && size <= value_size) { /* struct */ if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg)) augmented = true; - } else if (size < 0 && size >= -6) { /* buffer */ + } else if ((int)size < 0 && size >= -6) { /* buffer */ index = -(size + 1); barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick. index &= 7; // Satisfy the bounds checking with the verifier in some kernels. - aug_size = args->args[index]; - - if (aug_size > TRACE_AUG_MAX_BUF) - aug_size = TRACE_AUG_MAX_BUF; + aug_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index]; if (aug_size > 0) { if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg)) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 27094211edd8a..5c329ad614e9b 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -293,7 +293,7 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data) die = cpu__get_die_id(cpu); /* There is no die_id on legacy system. */ - if (die == -1) + if (die < 0) die = 0; /* @@ -322,7 +322,7 @@ struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data) struct aggr_cpu_id id; /* There is no cluster_id on legacy system. */ - if (cluster == -1) + if (cluster < 0) cluster = 0; id = aggr_cpu_id__die(cpu, data); diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index b7de4d9fd0045..50c5c206b70e7 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -2216,56 +2216,6 @@ static int symbol__disassemble_objdump(const char *filename, struct symbol *sym, return err; } -static int annotation_options__init_disassemblers(struct annotation_options *options) -{ - char *disassembler; - - if (options->disassemblers_str == NULL) { - const char *default_disassemblers_str = -#ifdef HAVE_LIBLLVM_SUPPORT - "llvm," -#endif -#ifdef HAVE_LIBCAPSTONE_SUPPORT - "capstone," -#endif - "objdump"; - - options->disassemblers_str = strdup(default_disassemblers_str); - if (!options->disassemblers_str) - goto out_enomem; - } - - disassembler = strdup(options->disassemblers_str); - if (disassembler == NULL) - goto out_enomem; - - while (1) { - char *comma = strchr(disassembler, ','); - - if (comma != NULL) - *comma = '\0'; - - options->disassemblers[options->nr_disassemblers++] = strim(disassembler); - - if (comma == NULL) - break; - - disassembler = comma + 1; - - if (options->nr_disassemblers >= MAX_DISASSEMBLERS) { - pr_debug("annotate.disassemblers can have at most %d entries, ignoring \"%s\"\n", - MAX_DISASSEMBLERS, disassembler); - break; - } - } - - return 0; - -out_enomem: - pr_err("Not enough memory for annotate.disassemblers\n"); - return -1; -} - int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct annotation_options *options = args->options; @@ -2274,7 +2224,6 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) char symfs_filename[PATH_MAX]; bool delete_extract = false; struct kcore_extract kce; - const char *disassembler; bool decomp = false; int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); @@ -2334,28 +2283,26 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) } } - err = annotation_options__init_disassemblers(options); - if (err) - goto out_remove_tmp; - err = -1; + for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) { + enum perf_disassembler dis = options->disassemblers[i]; - for (int i = 0; i < options->nr_disassemblers && err != 0; ++i) { - disassembler = options->disassemblers[i]; - - if (!strcmp(disassembler, "llvm")) + switch (dis) { + case PERF_DISASM_LLVM: err = symbol__disassemble_llvm(symfs_filename, sym, args); - else if (!strcmp(disassembler, "capstone")) + break; + case PERF_DISASM_CAPSTONE: err = symbol__disassemble_capstone(symfs_filename, sym, args); - else if (!strcmp(disassembler, "objdump")) + break; + case PERF_DISASM_OBJDUMP: err = symbol__disassemble_objdump(symfs_filename, sym, args); - else - pr_debug("Unknown disassembler %s, skipping...\n", disassembler); - } - - if (err == 0) { - pr_debug("Disassembled with %s\nannotate.disassemblers=%s\n", - disassembler, options->disassemblers_str); + break; + case PERF_DISASM_UNKNOWN: /* End of disassemblers. */ + default: + goto out_remove_tmp; + } + if (err == 0) + pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]); } out_remove_tmp: if (decomp) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index a7f7ed01421c1..99bf905ade812 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -136,7 +136,7 @@ displays the statistics gathered since it was forked. The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system. .SH COLUMN DESCRIPTIONS .PP -\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to collect the counters on all cpus. +\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to snapshot the procfs/sysfs and collect the counters on all cpus. .PP \fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected. This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug". On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU. .PP @@ -190,7 +190,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors. .PP -\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS). Disabled by default. Enable with --enable SysWatt. +\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS). .PP \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary. .PP @@ -516,14 +516,40 @@ that they count at TSC rate, which is true on all processors tested to date. Volume 3B: System Programming Guide" https://www.intel.com/products/processor/manuals/ +.SH RUN THE LATEST VERSION +If turbostat complains that it doesn't recognize your processor, +please try the latest version. + +The latest version of turbostat does not require the latest version of the Linux kernel. +However, some features, such as perf(1) counters, do require kernel support. + +The latest turbostat release is available in the upstream Linux Kernel source tree. +eg. "git pull https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git" +and run make in tools/power/x86/turbostat/. + +n.b. "make install" will update your system manually, but a distro update may subsequently downgrade your turbostat to an older version. +For this reason, manually installing to /usr/local/bin may be what you want. + +Note that turbostat/Makefile has a "make snapshot" target, which will create a tar file +that can build without a local kernel source tree. + +If the upstream version isn't new enough, the development tree can be found here: +"git pull https://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat" + +If the development tree doesn't work, please contact the author via chat, +or via email with the word "turbostat" on the Subject line. + .SH FILES .ta .nf +/sys/bus/event_source/devices/ /dev/cpu/*/msr +/sys/class/intel_pmt/ +/sys/devices/system/cpu/ .fi .SH "SEE ALSO" -msr(4), vmstat(8) +perf(1), msr(4), vmstat(8) .PP .SH AUTHOR .nf diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 58a487c225a73..8d5011a0bf60d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3,7 +3,7 @@ * turbostat -- show CPU frequency and C-state residency * on modern Intel and AMD processors. * - * Copyright (c) 2024 Intel Corporation. + * Copyright (c) 2025 Intel Corporation. * Len Brown */ @@ -95,6 +95,8 @@ #define INTEL_ECORE_TYPE 0x20 #define INTEL_PCORE_TYPE 0x40 +#define ROUND_UP_TO_PAGE_SIZE(n) (((n) + 0x1000UL-1UL) & ~(0x1000UL-1UL)) + enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; @@ -202,6 +204,8 @@ struct msr_counter bic[] = { { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -266,14 +270,16 @@ struct msr_counter bic[] = { #define BIC_Diec6 (1ULL << 58) #define BIC_SysWatt (1ULL << 59) #define BIC_Sys_J (1ULL << 60) +#define BIC_NMI (1ULL << 61) +#define BIC_CPU_c1e (1ULL << 62) -#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) -#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) +#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) +#define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) -#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) +#define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_SysWatt | BIC_Sys_J) +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; @@ -326,6 +332,7 @@ unsigned int rapl_joules; unsigned int summary_only; unsigned int list_header_only; unsigned int dump_only; +unsigned int force_load; unsigned int has_aperf; unsigned int has_aperf_access; unsigned int has_epb; @@ -353,7 +360,7 @@ unsigned long long cpuidle_cur_sys_lpi_us; unsigned int tj_max; unsigned int tj_max_override; double rapl_power_units, rapl_time_units; -double rapl_dram_energy_units, rapl_energy_units; +double rapl_dram_energy_units, rapl_energy_units, rapl_psys_energy_units; double rapl_joule_counter_range; unsigned int crystal_hz; unsigned long long tsc_hz; @@ -365,6 +372,9 @@ unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ unsigned int first_counter_read = 1; + +static struct timeval procsysfs_tv_begin; + int ignore_stdin; bool no_msr; bool no_perf; @@ -419,6 +429,7 @@ struct platform_features { bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */ bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */ bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */ + bool has_fixed_rapl_psys_unit; /* Fixed Energy Unit used for PSYS RAPL Domain */ int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */ int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */ bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */ @@ -819,6 +830,7 @@ static const struct platform_features spr_features = { .has_msr_core_c1_res = 1, .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, + .has_fixed_rapl_psys_unit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; @@ -1024,6 +1036,7 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_ARROWLAKE_U, &adl_features }, { INTEL_ARROWLAKE, &adl_features }, { INTEL_LUNARLAKE_M, &lnl_features }, + { INTEL_PANTHERLAKE_L, &lnl_features }, { INTEL_ATOM_SILVERMONT, &slv_features }, { INTEL_ATOM_SILVERMONT_D, &slvd_features }, { INTEL_ATOM_AIRMONT, &amt_features }, @@ -1036,6 +1049,7 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_ATOM_GRACEMONT, &adl_features }, { INTEL_ATOM_CRESTMONT_X, &srf_features }, { INTEL_ATOM_CRESTMONT, &grr_features }, + { INTEL_ATOM_DARKMONT_X, &srf_features }, { INTEL_XEON_PHI_KNL, &knl_features }, { INTEL_XEON_PHI_KNM, &knl_features }, /* @@ -1054,9 +1068,10 @@ void probe_platform_features(unsigned int family, unsigned int model) { int i; - platform = &default_features; if (authentic_amd || hygon_genuine) { + /* fallback to default features on unsupported models */ + force_load++; if (max_extended_level >= 0x80000007) { unsigned int eax, ebx, ecx, edx; @@ -1065,11 +1080,11 @@ void probe_platform_features(unsigned int family, unsigned int model) if ((edx & (1 << 14)) && family >= 0x17) platform = &amd_features_with_rapl; } - return; + goto end; } if (!genuine_intel) - return; + goto end; for (i = 0; turbostat_pdata[i].features; i++) { if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) { @@ -1077,6 +1092,19 @@ void probe_platform_features(unsigned int family, unsigned int model) return; } } + +end: + if (force_load && !platform) { + fprintf(outf, "Forced to run on unsupported platform!\n"); + platform = &default_features; + } + + if (platform) + return; + + fprintf(stderr, "Unsupported platform detected.\n" + "\tSee RUN THE LATEST VERSION on turbostat(8)\n"); + exit(1); } /* Model specific support End */ @@ -1094,8 +1122,8 @@ int backwards_count; char *progname; #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ -cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; -size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; +cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; +size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; #define MAX_ADDED_THREAD_COUNTERS 24 #define MAX_ADDED_CORE_COUNTERS 8 #define MAX_ADDED_PACKAGE_COUNTERS 16 @@ -1271,7 +1299,7 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr = MSR_PLATFORM_ENERGY_STATUS, .msr_mask = 0x00000000FFFFFFFF, .msr_shift = 0, - .platform_rapl_msr_scale = &rapl_energy_units, + .platform_rapl_msr_scale = &rapl_psys_energy_units, .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, .bic = BIC_SysWatt | BIC_Sys_J, .compat_scale = 1.0, @@ -1510,6 +1538,17 @@ static struct msr_counter_arch_info msr_counter_arch_infos[] = { #define PMT_COUNTER_MTL_DC6_LSB 0 #define PMT_COUNTER_MTL_DC6_MSB 63 #define PMT_MTL_DC6_GUID 0x1a067102 +#define PMT_MTL_DC6_SEQ 0 + +#define PMT_COUNTER_CWF_MC1E_OFFSET_BASE 20936 +#define PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT 24 +#define PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE 12 +#define PMT_COUNTER_CWF_CPUS_PER_MODULE 4 +#define PMT_COUNTER_CWF_MC1E_LSB 0 +#define PMT_COUNTER_CWF_MC1E_MSB 63 +#define PMT_CWF_MC1E_GUID 0x14421519 + +unsigned long long tcore_clock_freq_hz = 800000000; #define PMT_COUNTER_NAME_SIZE_BYTES 16 #define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32 @@ -1533,6 +1572,7 @@ struct pmt_mmio { enum pmt_datatype { PMT_TYPE_RAW, PMT_TYPE_XTAL_TIME, + PMT_TYPE_TCORE_CLOCK, }; struct pmt_domain_info { @@ -1562,6 +1602,93 @@ struct pmt_counter { struct pmt_domain_info *domains; }; +/* + * PMT telemetry directory iterator. + * Used to iterate telemetry files in sysfs in correct order. + */ +struct pmt_diriter_t { + DIR *dir; + struct dirent **namelist; + unsigned int num_names; + unsigned int current_name_idx; +}; + +int pmt_telemdir_filter(const struct dirent *e) +{ + unsigned int dummy; + + return sscanf(e->d_name, "telem%u", &dummy); +} + +int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b) +{ + unsigned int aidx = 0, bidx = 0; + + sscanf((*a)->d_name, "telem%u", &aidx); + sscanf((*b)->d_name, "telem%u", &bidx); + + return aidx >= bidx; +} + +const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter) +{ + const struct dirent *ret = NULL; + + if (!iter->dir) + return NULL; + + if (iter->current_name_idx >= iter->num_names) + return NULL; + + ret = iter->namelist[iter->current_name_idx]; + ++iter->current_name_idx; + + return ret; +} + +const struct dirent *pmt_diriter_begin(struct pmt_diriter_t *iter, const char *pmt_root_path) +{ + int num_names = iter->num_names; + + if (!iter->dir) { + iter->dir = opendir(pmt_root_path); + if (iter->dir == NULL) + return NULL; + + num_names = scandir(pmt_root_path, &iter->namelist, pmt_telemdir_filter, pmt_telemdir_sort); + if (num_names == -1) + return NULL; + } + + iter->current_name_idx = 0; + iter->num_names = num_names; + + return pmt_diriter_next(iter); +} + +void pmt_diriter_init(struct pmt_diriter_t *iter) +{ + memset(iter, 0, sizeof(*iter)); +} + +void pmt_diriter_remove(struct pmt_diriter_t *iter) +{ + if (iter->namelist) { + for (unsigned int i = 0; i < iter->num_names; i++) { + free(iter->namelist[i]); + iter->namelist[i] = NULL; + } + } + + free(iter->namelist); + iter->namelist = NULL; + iter->num_names = 0; + iter->current_name_idx = 0; + + closedir(iter->dir); + iter->dir = NULL; +} + unsigned int pmt_counter_get_width(const struct pmt_counter *p) { return (p->msb - p->lsb) + 1; @@ -1611,6 +1738,7 @@ struct thread_data { unsigned long long c1; unsigned long long instr_count; unsigned long long irq_count; + unsigned long long nmi_count; unsigned int smi_count; unsigned int cpu_id; unsigned int apic_id; @@ -1917,6 +2045,7 @@ struct timeval tv_even, tv_odd, tv_delta; int *irq_column_2_cpu; /* /proc/interrupts column numbers */ int *irqs_per_cpu; /* indexed by cpu_num */ +int *nmi_per_cpu; /* indexed by cpu_num */ void setup_all_buffers(bool startup); @@ -1944,6 +2073,8 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk { int retval, pkg_no, core_no, thread_no, node_no; + retval = 0; + for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { @@ -1959,14 +2090,12 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk c = GET_CORE(core_base, core_no, node_no, pkg_no); p = GET_PKG(pkg_base, pkg_no); - retval = func(t, c, p); - if (retval) - return retval; + retval |= func(t, c, p); } } } } - return 0; + return retval; } int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p) @@ -2085,13 +2214,17 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) int probe_msr(int cpu, off_t offset) { ssize_t retval; - unsigned long long dummy; + unsigned long long value; assert(!no_msr); - retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset); + retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset); - if (retval != sizeof(dummy)) + /* + * Expect MSRs to accumulate some non-zero value since the system was powered on. + * Treat zero as a read failure. + */ + if (retval != sizeof(value) || value == 0) return 1; return 0; @@ -2135,41 +2268,54 @@ void help(void) "when COMMAND completes.\n" "If no COMMAND is specified, turbostat wakes every 5-seconds\n" "to print statistics, until interrupted.\n" - " -a, --add add a counter\n" + " -a, --add counter\n" + " add a counter\n" " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n" " eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n" - " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" + " -c, --cpu cpu-set\n" + " limit output to summary plus cpu-set:\n" " {core | package | j,k,l..m,n-p }\n" - " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" + " -d, --debug\n" + " displays usec, Time_Of_Day_Seconds and more debugging\n" " debug messages are printed to stderr\n" - " -D, --Dump displays the raw counter values\n" - " -e, --enable [all | column]\n" + " -D, --Dump\n" + " displays the raw counter values\n" + " -e, --enable [all | column]\n" " shows all or the specified disabled column\n" - " -H, --hide [column|column,column,...]\n" + " -f, --force\n" + " force load turbostat with minimum default features on unsupported platforms.\n" + " -H, --hide [column | column,column,...]\n" " hide the specified column(s)\n" " -i, --interval sec.subsec\n" - " Override default 5-second measurement interval\n" - " -J, --Joules displays energy in Joules instead of Watts\n" - " -l, --list list column headers only\n" - " -M, --no-msr Disable all uses of the MSR driver\n" - " -P, --no-perf Disable all uses of the perf API\n" + " override default 5-second measurement interval\n" + " -J, --Joules\n" + " displays energy in Joules instead of Watts\n" + " -l, --list\n" + " list column headers only\n" + " -M, --no-msr\n" + " disable all uses of the MSR driver\n" + " -P, --no-perf\n" + " disable all uses of the perf API\n" " -n, --num_iterations num\n" " number of the measurement iterations\n" " -N, --header_iterations num\n" " print header every num iterations\n" " -o, --out file\n" " create or truncate \"file\" for all output\n" - " -q, --quiet skip decoding system configuration header\n" - " -s, --show [column|column,column,...]\n" + " -q, --quiet\n" + " skip decoding system configuration header\n" + " -s, --show [column | column,column,...]\n" " show only the specified column(s)\n" " -S, --Summary\n" " limits output to 1-line system summary per interval\n" " -T, --TCC temperature\n" " sets the Thermal Control Circuit temperature in\n" " degrees Celsius\n" - " -h, --help print this help message\n" - " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n"); + " -h, --help\n" + " print this help message\n" + " -v, --version\n" + " print version information\n\nFor more help, run \"man turbostat\"\n"); } /* @@ -2289,6 +2435,12 @@ void print_header(char *delim) else outp += sprintf(outp, "%sIRQ", (printed++ ? delim : "")); } + if (DO_BIC(BIC_NMI)) { + if (sums_need_wide_columns) + outp += sprintf(outp, "%s NMI", (printed++ ? delim : "")); + else + outp += sprintf(outp, "%sNMI", (printed++ ? delim : "")); + } if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "%sSMI", (printed++ ? delim : "")); @@ -2335,6 +2487,7 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: + case PMT_TYPE_TCORE_CLOCK: outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } @@ -2409,6 +2562,7 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: + case PMT_TYPE_TCORE_CLOCK: outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } @@ -2540,6 +2694,7 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: + case PMT_TYPE_TCORE_CLOCK: outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } @@ -2575,6 +2730,8 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p if (DO_BIC(BIC_IRQ)) outp += sprintf(outp, "IRQ: %lld\n", t->irq_count); + if (DO_BIC(BIC_NMI)) + outp += sprintf(outp, "IRQ: %lld\n", t->nmi_count); if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "SMI: %d\n", t->smi_count); @@ -2794,6 +2951,14 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count); } + /* NMI */ + if (DO_BIC(BIC_NMI)) { + if (sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->nmi_count); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->nmi_count); + } + /* SMI */ if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); @@ -2848,7 +3013,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = t->pmt_counter[i]; - const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) @@ -2860,8 +3025,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data break; case PMT_TYPE_XTAL_TIME: + value_converted = 100.0 * value_raw / crystal_hz / interval_float; outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; + + case PMT_TYPE_TCORE_CLOCK: + value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); } } @@ -2928,7 +3098,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = c->pmt_counter[i]; - const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) @@ -2940,8 +3110,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data break; case PMT_TYPE_XTAL_TIME: + value_converted = 100.0 * value_raw / crystal_hz / interval_float; outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; + + case PMT_TYPE_TCORE_CLOCK: + value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); } } @@ -3126,7 +3301,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = p->pmt_counter[i]; - const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) @@ -3138,8 +3313,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data break; case PMT_TYPE_XTAL_TIME: + value_converted = 100.0 * value_raw / crystal_hz / interval_float; outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; + + case PMT_TYPE_TCORE_CLOCK: + value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); } } @@ -3409,6 +3589,9 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d if (DO_BIC(BIC_IRQ)) old->irq_count = new->irq_count - old->irq_count; + if (DO_BIC(BIC_NMI)) + old->nmi_count = new->nmi_count - old->nmi_count; + if (DO_BIC(BIC_SMI)) old->smi_count = new->smi_count - old->smi_count; @@ -3447,12 +3630,10 @@ int delta_cpu(struct thread_data *t, struct core_data *c, /* always calculate thread delta */ retval = delta_thread(t, t2, c2); /* c2 is core delta */ - if (retval) - return retval; /* calculate package delta only for 1st core in package */ if (is_cpu_first_core_in_package(t, c, p)) - retval = delta_package(p, p2); + retval |= delta_package(p, p2); return retval; } @@ -3489,6 +3670,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data t->instr_count = 0; t->irq_count = 0; + t->nmi_count = 0; t->smi_count = 0; c->c3 = 0; @@ -3580,7 +3762,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) /* remember first tv_begin */ if (average.threads.tv_begin.tv_sec == 0) - average.threads.tv_begin = t->tv_begin; + average.threads.tv_begin = procsysfs_tv_begin; /* remember last tv_end */ average.threads.tv_end = t->tv_end; @@ -3593,6 +3775,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.threads.instr_count += t->instr_count; average.threads.irq_count += t->irq_count; + average.threads.nmi_count += t->nmi_count; average.threads.smi_count += t->smi_count; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { @@ -3734,6 +3917,9 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data if (average.threads.irq_count > 9999999) sums_need_wide_columns = 1; + if (average.threads.nmi_count > 9999999) + sums_need_wide_columns = 1; + average.cores.c3 /= topo.allowed_cores; average.cores.c6 /= topo.allowed_cores; @@ -4546,7 +4732,8 @@ unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb) unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) { - assert(domain_id < ppmt->num_domains); + if (domain_id >= ppmt->num_domains) + return 0; const unsigned long *pmmio = ppmt->domains[domain_id].pcounter; const unsigned long value = pmmio ? *pmmio : 0; @@ -4590,6 +4777,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (DO_BIC(BIC_IRQ)) t->irq_count = irqs_per_cpu[cpu]; + if (DO_BIC(BIC_NMI)) + t->nmi_count = nmi_per_cpu[cpu]; get_cstate_counters(cpu, t, c, p); @@ -5335,6 +5524,7 @@ void free_all_buffers(void) free(irq_column_2_cpu); free(irqs_per_cpu); + free(nmi_per_cpu); for (i = 0; i <= topo.max_cpu_num; ++i) { if (cpus[i].put_ids) @@ -5566,6 +5756,8 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, { int retval, pkg_no, node_no, core_no, thread_no; + retval = 0; + for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { @@ -5587,14 +5779,12 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, p = GET_PKG(pkg_base, pkg_no); p2 = GET_PKG(pkg_base2, pkg_no); - retval = func(t, c, p, t2, c2, p2); - if (retval) - return retval; + retval |= func(t, c, p, t2, c2, p2); } } } } - return 0; + return retval; } /* @@ -5791,31 +5981,37 @@ int snapshot_proc_interrupts(void) irq_column_2_cpu[column] = cpu_number; irqs_per_cpu[cpu_number] = 0; + nmi_per_cpu[cpu_number] = 0; } /* read /proc/interrupt count lines and sum up irqs per cpu */ while (1) { int column; char buf[64]; + int this_row_is_nmi = 0; - retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */ + retval = fscanf(fp, " %s:", buf); /* irq# "N:" */ if (retval != 1) break; + if (strncmp(buf, "NMI", strlen("NMI")) == 0) + this_row_is_nmi = 1; + /* read the count per cpu */ for (column = 0; column < topo.num_cpus; ++column) { int cpu_number, irq_count; retval = fscanf(fp, " %d", &irq_count); + if (retval != 1) break; cpu_number = irq_column_2_cpu[column]; irqs_per_cpu[cpu_number] += irq_count; - + if (this_row_is_nmi) + nmi_per_cpu[cpu_number] += irq_count; } - while (getc(fp) != '\n') ; /* flush interrupt description */ } @@ -5912,7 +6108,9 @@ int snapshot_sys_lpi_us(void) */ int snapshot_proc_sysfs_files(void) { - if (DO_BIC(BIC_IRQ)) + gettimeofday(&procsysfs_tv_begin, (struct timezone *)NULL); + + if (DO_BIC(BIC_IRQ) || DO_BIC(BIC_NMI)) if (snapshot_proc_interrupts()) return 1; @@ -7043,6 +7241,11 @@ void rapl_probe_intel(void) else rapl_dram_energy_units = rapl_energy_units; + if (platform->has_fixed_rapl_psys_unit) + rapl_psys_energy_units = 1.0; + else + rapl_psys_energy_units = rapl_energy_units; + time_unit = msr >> 16 & 0xF; if (time_unit == 0) time_unit = 0xA; @@ -8233,6 +8436,7 @@ void process_cpuid() aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1; BIC_PRESENT(BIC_IRQ); + BIC_PRESENT(BIC_NMI); BIC_PRESENT(BIC_TSC_MHz); } @@ -8292,6 +8496,33 @@ int dir_filter(const struct dirent *dirp) return 0; } +char *possible_file = "/sys/devices/system/cpu/possible"; +char possible_buf[1024]; + +int initialize_cpu_possible_set(void) +{ + FILE *fp; + + fp = fopen(possible_file, "r"); + if (!fp) { + warn("open %s", possible_file); + return -1; + } + if (fread(possible_buf, sizeof(char), 1024, fp) == 0) { + warn("read %s", possible_file); + goto err; + } + if (parse_cpu_str(possible_buf, cpu_possible_set, cpu_possible_setsize)) { + warnx("%s: cpu str malformat %s\n", possible_file, cpu_effective_str); + goto err; + } + return 0; + +err: + fclose(fp); + return -1; +} + void topology_probe(bool startup) { int i; @@ -8323,6 +8554,16 @@ void topology_probe(bool startup) CPU_ZERO_S(cpu_present_setsize, cpu_present_set); for_all_proc_cpus(mark_cpu_present); + /* + * Allocate and initialize cpu_possible_set + */ + cpu_possible_set = CPU_ALLOC((topo.max_cpu_num + 1)); + if (cpu_possible_set == NULL) + err(3, "CPU_ALLOC"); + cpu_possible_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(cpu_possible_setsize, cpu_possible_set); + initialize_cpu_possible_set(); + /* * Allocate and initialize cpu_effective_set */ @@ -8583,7 +8824,11 @@ void allocate_irq_buffers(void) irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); if (irqs_per_cpu == NULL) - err(-1, "calloc %d", topo.max_cpu_num + 1); + err(-1, "calloc %d IRQ", topo.max_cpu_num + 1); + + nmi_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); + if (nmi_per_cpu == NULL) + err(-1, "calloc %d NMI", topo.max_cpu_num + 1); } int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p) @@ -8854,49 +9099,36 @@ int parse_telem_info_file(int fd_dir, const char *info_filename, const char *for struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) { - DIR *dirp; - struct dirent *entry; + struct pmt_diriter_t pmt_iter; + const struct dirent *entry; struct stat st; - unsigned int telem_idx; int fd_telem_dir, fd_pmt; unsigned long guid, size, offset; size_t mmap_size; void *mmio; - struct pmt_mmio *ret = NULL; + struct pmt_mmio *head = NULL, *last = NULL; + struct pmt_mmio *new_pmt = NULL; if (stat(SYSFS_TELEM_PATH, &st) == -1) return NULL; - dirp = opendir(SYSFS_TELEM_PATH); - if (dirp == NULL) + pmt_diriter_init(&pmt_iter); + entry = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); + if (!entry) { + pmt_diriter_remove(&pmt_iter); return NULL; + } - for (;;) { - entry = readdir(dirp); - - if (entry == NULL) - break; - - if (strcmp(entry->d_name, ".") == 0) - continue; - - if (strcmp(entry->d_name, "..") == 0) - continue; - - if (sscanf(entry->d_name, "telem%u", &telem_idx) != 1) - continue; - - if (fstatat(dirfd(dirp), entry->d_name, &st, 0) == -1) { + for ( ; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) { + if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1) break; - } if (!S_ISDIR(st.st_mode)) continue; - fd_telem_dir = openat(dirfd(dirp), entry->d_name, O_RDONLY); - if (fd_telem_dir == -1) { + fd_telem_dir = openat(dirfd(pmt_iter.dir), entry->d_name, O_RDONLY); + if (fd_telem_dir == -1) break; - } if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { close(fd_telem_dir); @@ -8924,38 +9156,54 @@ struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) if (fd_pmt == -1) goto loop_cleanup_and_break; - mmap_size = (size + 0x1000UL) & (~0x1000UL); + mmap_size = ROUND_UP_TO_PAGE_SIZE(size); mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0); if (mmio != MAP_FAILED) { - if (debug) fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio); - ret = calloc(1, sizeof(*ret)); + new_pmt = calloc(1, sizeof(*new_pmt)); - if (!ret) { + if (!new_pmt) { fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__); exit(1); } - ret->guid = guid; - ret->mmio_base = mmio; - ret->pmt_offset = offset; - ret->size = size; + /* + * Create linked list of mmaped regions, + * but preserve the ordering from sysfs. + * Ordering is important for the user to + * use the seq=%u parameter when adding a counter. + */ + new_pmt->guid = guid; + new_pmt->mmio_base = mmio; + new_pmt->pmt_offset = offset; + new_pmt->size = size; + new_pmt->next = pmt_mmios; + + if (last) + last->next = new_pmt; + else + head = new_pmt; - ret->next = pmt_mmios; - pmt_mmios = ret; + last = new_pmt; } loop_cleanup_and_break: close(fd_pmt); close(fd_telem_dir); - break; } - closedir(dirp); + pmt_diriter_remove(&pmt_iter); - return ret; + /* + * If we found something, stick just + * created linked list to the front. + */ + if (head) + pmt_mmios = head; + + return head; } struct pmt_mmio *pmt_mmio_find(unsigned int guid) @@ -8992,7 +9240,7 @@ void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offs return ret; } -struct pmt_mmio *pmt_add_guid(unsigned int guid) +struct pmt_mmio *pmt_add_guid(unsigned int guid, unsigned int seq) { struct pmt_mmio *ret; @@ -9000,6 +9248,11 @@ struct pmt_mmio *pmt_add_guid(unsigned int guid) if (!ret) ret = pmt_mmio_open(guid); + while (ret && seq) { + ret = ret->next; + --seq; + } + return ret; } @@ -9046,7 +9299,7 @@ void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, pcounter->domains[domain_id].pcounter = pmmio; } -int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, +int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum pmt_datatype type, unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope, enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode) { @@ -9066,10 +9319,10 @@ int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, exit(1); } - mmio = pmt_add_guid(guid); + mmio = pmt_add_guid(guid, seq); if (!mmio) { if (mode != PMT_OPEN_TRY) { - fprintf(stderr, "%s: failed to map PMT MMIO for guid %x\n", __func__, guid); + fprintf(stderr, "%s: failed to map PMT MMIO for guid %x, seq %u\n", __func__, guid, seq); exit(1); } @@ -9124,10 +9377,68 @@ int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, void pmt_init(void) { + int cpu_num; + unsigned long seq, offset, mod_num; + if (BIC_IS_ENABLED(BIC_Diec6)) { - pmt_add_counter(PMT_MTL_DC6_GUID, "Die%c6", PMT_TYPE_XTAL_TIME, PMT_COUNTER_MTL_DC6_LSB, - PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, SCOPE_PACKAGE, FORMAT_DELTA, - 0, PMT_OPEN_TRY); + pmt_add_counter(PMT_MTL_DC6_GUID, PMT_MTL_DC6_SEQ, "Die%c6", PMT_TYPE_XTAL_TIME, + PMT_COUNTER_MTL_DC6_LSB, PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, + SCOPE_PACKAGE, FORMAT_DELTA, 0, PMT_OPEN_TRY); + } + + if (BIC_IS_ENABLED(BIC_CPU_c1e)) { + seq = 0; + offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE; + mod_num = 0; /* Relative module number for current PMT file. */ + + /* Open the counter for each CPU. */ + for (cpu_num = 0; cpu_num < topo.max_cpu_num;) { + + if (cpu_is_not_allowed(cpu_num)) + goto next_loop_iter; + + /* + * Set the scope to CPU, even though CWF report the counter per module. + * CPUs inside the same module will read from the same location, instead of reporting zeros. + * + * CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK. + */ + pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK, + PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU, + FORMAT_DELTA, cpu_num, PMT_OPEN_TRY); + + /* + * Rather complex logic for each time we go to the next loop iteration, + * so keep it as a label. + */ +next_loop_iter: + /* + * Advance the cpu number and check if we should also advance offset to + * the next counter inside the PMT file. + * + * On Clearwater Forest platform, the counter is reported per module, + * so open the same counter for all of the CPUs inside the module. + * That way, reported table show the correct value for all of the CPUs inside the module, + * instead of zeros. + */ + ++cpu_num; + if (cpu_num % PMT_COUNTER_CWF_CPUS_PER_MODULE == 0) { + offset += PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT; + ++mod_num; + } + + /* + * There are PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE in each PMT file. + * + * If that number is reached, seq must be incremented to advance to the next file in a sequence. + * Offset inside that file and a module counter has to be reset. + */ + if (mod_num == PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE) { + ++seq; + offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE; + mod_num = 0; + } + } } } @@ -9163,6 +9474,18 @@ void turbostat_init() } } +void affinitize_child(void) +{ + /* Prefer cpu_possible_set, if available */ + if (sched_setaffinity(0, cpu_possible_setsize, cpu_possible_set)) { + warn("sched_setaffinity cpu_possible_set"); + + /* Otherwise, allow child to run on same cpu set as turbostat */ + if (sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set)) + warn("sched_setaffinity cpu_allowed_set"); + } +} + int fork_it(char **argv) { pid_t child_pid; @@ -9178,6 +9501,7 @@ int fork_it(char **argv) child_pid = fork(); if (!child_pid) { /* child */ + affinitize_child(); execvp(argv[0], argv); err(errno, "exec %s", argv[0]); } else { @@ -9204,10 +9528,9 @@ int fork_it(char **argv) timersub(&tv_odd, &tv_even, &tv_delta); if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) fprintf(outf, "%s: Counter reset detected\n", progname); - else { - compute_average(EVEN_COUNTERS); - format_all_counters(EVEN_COUNTERS); - } + + compute_average(EVEN_COUNTERS); + format_all_counters(EVEN_COUNTERS); fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0); @@ -9236,7 +9559,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2024.11.30 - Len Brown \n"); + fprintf(outf, "turbostat version 2025.02.02 - Len Brown \n"); } #define COMMAND_LINE_SIZE 2048 @@ -9561,7 +9884,7 @@ void parse_add_command_msr(char *add_command) } if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) { - fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event ) required\n"); + fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event) required\n"); fail++; } @@ -9599,15 +9922,100 @@ bool starts_with(const char *str, const char *prefix) return strncmp(prefix, str, strlen(prefix)) == 0; } +int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigned int *out_seq) +{ + struct pmt_diriter_t pmt_iter; + const struct dirent *dirname; + struct stat stat, target_stat; + int fd_telem_dir = -1; + int fd_target_dir; + unsigned int seq = 0; + unsigned long guid, target_guid; + int ret = -1; + + fd_target_dir = open(target_path, O_RDONLY | O_DIRECTORY); + if (fd_target_dir == -1) { + return -1; + } + + if (fstat(fd_target_dir, &target_stat) == -1) { + fprintf(stderr, "%s: Failed to stat the target: %s", __func__, strerror(errno)); + exit(1); + } + + if (parse_telem_info_file(fd_target_dir, "guid", "%lx", &target_guid)) { + fprintf(stderr, "%s: Failed to parse the target guid file: %s", __func__, strerror(errno)); + exit(1); + } + + close(fd_target_dir); + + pmt_diriter_init(&pmt_iter); + + for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL; + dirname = pmt_diriter_next(&pmt_iter)) { + + fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY); + if (fd_telem_dir == -1) + continue; + + if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { + fprintf(stderr, "%s: Failed to parse the guid file: %s", __func__, strerror(errno)); + continue; + } + + if (fstat(fd_telem_dir, &stat) == -1) { + fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__, + dirname->d_name, strerror(errno)); + continue; + } + + /* + * If reached the same directory as target, exit the loop. + * Seq has the correct value now. + */ + if (stat.st_dev == target_stat.st_dev && stat.st_ino == target_stat.st_ino) { + ret = 0; + break; + } + + /* + * If reached directory with the same guid, + * but it's not the target directory yet, + * increment seq and continue the search. + */ + if (guid == target_guid) + ++seq; + + close(fd_telem_dir); + fd_telem_dir = -1; + } + + pmt_diriter_remove(&pmt_iter); + + if (fd_telem_dir != -1) + close(fd_telem_dir); + + if (!ret) { + *out_guid = target_guid; + *out_seq = seq; + } + + return ret; +} + void parse_add_command_pmt(char *add_command) { char *name = NULL; char *type_name = NULL; char *format_name = NULL; + char *direct_path = NULL; + static const char direct_path_prefix[] = "path="; unsigned int offset; unsigned int lsb; unsigned int msb; unsigned int guid; + unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */ unsigned int domain_id; enum counter_scope scope = 0; enum pmt_datatype type = PMT_TYPE_RAW; @@ -9687,6 +10095,13 @@ void parse_add_command_pmt(char *add_command) goto next; } + if (sscanf(add_command, "seq=%x", &seq) == 1) + goto next; + + if (strncmp(add_command, direct_path_prefix, strlen(direct_path_prefix)) == 0) { + direct_path = add_command + strlen(direct_path_prefix); + goto next; + } next: add_command = strchr(add_command, ','); if (add_command) { @@ -9737,6 +10152,11 @@ void parse_add_command_pmt(char *add_command) has_type = true; } + if (strcmp("tcore_clock", type_name) == 0) { + type = PMT_TYPE_TCORE_CLOCK; + has_type = true; + } + if (!has_type) { printf("%s: invalid %s: %s\n", __func__, "type", type_name); exit(1); @@ -9758,8 +10178,24 @@ void parse_add_command_pmt(char *add_command) exit(1); } + if (direct_path && has_guid) { + printf("%s: path and guid+seq parameters are mutually exclusive\n" + "notice: passed guid=0x%x and path=%s\n", __func__, guid, direct_path); + exit(1); + } + + if (direct_path) { + if (pmt_parse_from_path(direct_path, &guid, &seq)) { + printf("%s: failed to parse PMT file from %s\n", __func__, direct_path); + exit(1); + } + + /* GUID was just infered from the direct path. */ + has_guid = true; + } + if (!has_guid) { - printf("%s: missing %s\n", __func__, "guid"); + printf("%s: missing %s\n", __func__, "guid or path"); exit(1); } @@ -9773,7 +10209,7 @@ void parse_add_command_pmt(char *add_command) exit(1); } - pmt_add_counter(guid, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED); + pmt_add_counter(guid, seq, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED); } void parse_add_command(char *add_command) @@ -9921,6 +10357,7 @@ void cmdline(int argc, char **argv) { "Dump", no_argument, 0, 'D' }, { "debug", no_argument, 0, 'd' }, /* internal, not documented */ { "enable", required_argument, 0, 'e' }, + { "force", no_argument, 0, 'f' }, { "interval", required_argument, 0, 'i' }, { "IPC", no_argument, 0, 'I' }, { "num_iterations", required_argument, 0, 'n' }, @@ -9981,6 +10418,9 @@ void cmdline(int argc, char **argv) /* --enable specified counter */ bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST); break; + case 'f': + force_load++; + break; case 'd': debug++; ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index ca8a7edff3dda..319aaa004c407 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -252,6 +252,7 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): try: # this targets queue 4, which doesn't exist ntuple2 = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") except CmdExitFailure: pass else: @@ -259,7 +260,13 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): # change the table to target queues 0 and 2 ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0") # ntuple rule therefore targets queues 1 and 3 - ntuple2 = ethtool_create(cfg, "-N", flow) + try: + ntuple2 = ethtool_create(cfg, "-N", flow) + except CmdExitFailure: + ksft_pr("Driver does not support rss + queue offset") + return + + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") # should replace existing filter ksft_eq(ntuple, ntuple2) _send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3), diff --git a/tools/testing/selftests/exec/check-exec.c b/tools/testing/selftests/exec/check-exec.c index 4d3f4525e1e1c..55bce47e56b73 100644 --- a/tools/testing/selftests/exec/check-exec.c +++ b/tools/testing/selftests/exec/check-exec.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -31,6 +32,12 @@ #include "../kselftest_harness.h" +static int sys_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} + static void drop_privileges(struct __test_metadata *const _metadata) { const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED; @@ -219,8 +226,8 @@ static void test_exec_fd(struct __test_metadata *_metadata, const int fd, * test framework as an error. With AT_EXECVE_CHECK, we only check a * potential successful execution. */ - access_ret = - execveat(fd, "", argv, NULL, AT_EMPTY_PATH | AT_EXECVE_CHECK); + access_ret = sys_execveat(fd, "", argv, NULL, + AT_EMPTY_PATH | AT_EXECVE_CHECK); access_errno = errno; if (err_code) { EXPECT_EQ(-1, access_ret); diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 2af86bd796bab..aa6f2c1cbec7b 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -59,6 +59,12 @@ int open_tree(int dfd, const char *filename, unsigned int flags) } #endif +static int sys_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} + #ifndef RENAME_EXCHANGE #define RENAME_EXCHANGE (1 << 1) #endif @@ -2024,8 +2030,8 @@ static void test_check_exec(struct __test_metadata *const _metadata, int ret; char *const argv[] = { (char *)path, NULL }; - ret = execveat(AT_FDCWD, path, argv, NULL, - AT_EMPTY_PATH | AT_EXECVE_CHECK); + ret = sys_execveat(AT_FDCWD, path, argv, NULL, + AT_EMPTY_PATH | AT_EXECVE_CHECK); if (err) { EXPECT_EQ(-1, ret); EXPECT_EQ(errno, err); diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index e5d06fb402335..15601402dee65 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -306,7 +306,8 @@ function check_result { result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \ grep -e 'livepatch:' -e 'test_klp' | \ grep -v '\(tainting\|taints\) kernel' | \ - sed 's/^\[[ 0-9.]*\] //') + sed 's/^\[[ 0-9.]*\] //' | \ + sed 's/^\[[ ]*[CT][0-9]*\] //') if [[ "$expect" == "$result" ]] ; then echo "ok" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 414addef9a451..d240d02fa443a 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1302,7 +1302,7 @@ int main_loop(void) return ret; if (cfg_truncate > 0) { - xdisconnect(fd); + shutdown(fd, SHUT_WR); } else if (--cfg_repeat > 0) { xdisconnect(fd); diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 3f2fca02fec53..36ff28af4b190 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -102,6 +102,19 @@ struct testcase testcases_v4[] = { .gso_len = CONST_MSS_V4, .r_num_mss = 1, }, + { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V4, + .gso_len = CONST_MSS_V4 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V4, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V4 + 1, + .gso_len = CONST_MSS_V4 + 2, + .tfail = true, + }, { /* send a single MSS + 1B */ .tlen = CONST_MSS_V4 + 1, @@ -205,6 +218,19 @@ struct testcase testcases_v6[] = { .gso_len = CONST_MSS_V6, .r_num_mss = 1, }, + { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V6, + .gso_len = CONST_MSS_V6 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V6, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V6 + 1, + .gso_len = CONST_MSS_V6 + 2, + .tfail = true + }, { /* send a single MSS + 1B */ .tlen = CONST_MSS_V6 + 1, diff --git a/tools/testing/selftests/riscv/vector/.gitignore b/tools/testing/selftests/riscv/vector/.gitignore index 9ae7964491d50..7d9c87cd06497 100644 --- a/tools/testing/selftests/riscv/vector/.gitignore +++ b/tools/testing/selftests/riscv/vector/.gitignore @@ -1,3 +1,4 @@ vstate_exec_nolibc vstate_prctl -v_initval_nolibc +v_initval +v_exec_initval_nolibc diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile index bfff0ff4f3bef..6f7497f4e7b30 100644 --- a/tools/testing/selftests/riscv/vector/Makefile +++ b/tools/testing/selftests/riscv/vector/Makefile @@ -2,18 +2,27 @@ # Copyright (C) 2021 ARM Limited # Originally tools/testing/arm64/abi/Makefile -TEST_GEN_PROGS := vstate_prctl v_initval_nolibc -TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc +TEST_GEN_PROGS := v_initval vstate_prctl +TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc v_exec_initval_nolibc include ../../lib.mk -$(OUTPUT)/vstate_prctl: vstate_prctl.c ../hwprobe/sys_hwprobe.S +$(OUTPUT)/sys_hwprobe.o: ../hwprobe/sys_hwprobe.S + $(CC) -static -c -o$@ $(CFLAGS) $^ + +$(OUTPUT)/v_helpers.o: v_helpers.c + $(CC) -static -c -o$@ $(CFLAGS) $^ + +$(OUTPUT)/vstate_prctl: vstate_prctl.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ $(OUTPUT)/vstate_exec_nolibc: vstate_exec_nolibc.c $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc -$(OUTPUT)/v_initval_nolibc: v_initval_nolibc.c +$(OUTPUT)/v_initval: v_initval.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ + +$(OUTPUT)/v_exec_initval_nolibc: v_exec_initval_nolibc.c $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc diff --git a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c new file mode 100644 index 0000000000000..35c0812e32de0 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Get values of vector registers as soon as the program starts to test if + * is properly cleaning the values before starting a new program. Vector + * registers are caller saved, so no function calls may happen before reading + * the values. To further ensure consistency, this file is compiled without + * libc and without auto-vectorization. + * + * To be "clean" all values must be either all ones or all zeroes. + */ + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +int main(int argc, char **argv) +{ + char prev_value = 0, value; + unsigned long vl; + int first = 1; + + if (argc > 2 && strcmp(argv[2], "x")) + asm volatile ( + // 0 | zimm[10:0] | rs1 | 1 1 1 | rd |1010111| vsetvli + // vsetvli t4, x0, e8, m1, d1 + ".4byte 0b00000000000000000111111011010111\n\t" + "mv %[vl], t4\n\t" + : [vl] "=r" (vl) : : "t4" + ); + else + asm volatile ( + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvli %[vl], x0, e8, m1, ta, ma\n\t" + ".option pop\n\t" + : [vl] "=r" (vl) + ); + +#define CHECK_VECTOR_REGISTER(register) ({ \ + for (int i = 0; i < vl; i++) { \ + asm volatile ( \ + ".option push\n\t" \ + ".option arch, +v\n\t" \ + "vmv.x.s %0, " __stringify(register) "\n\t" \ + "vsrl.vi " __stringify(register) ", " __stringify(register) ", 8\n\t" \ + ".option pop\n\t" \ + : "=r" (value)); \ + if (first) { \ + first = 0; \ + } else if (value != prev_value || !(value == 0x00 || value == 0xff)) { \ + printf("Register " __stringify(register) \ + " values not clean! value: %u\n", value); \ + exit(-1); \ + } \ + prev_value = value; \ + } \ +}) + + CHECK_VECTOR_REGISTER(v0); + CHECK_VECTOR_REGISTER(v1); + CHECK_VECTOR_REGISTER(v2); + CHECK_VECTOR_REGISTER(v3); + CHECK_VECTOR_REGISTER(v4); + CHECK_VECTOR_REGISTER(v5); + CHECK_VECTOR_REGISTER(v6); + CHECK_VECTOR_REGISTER(v7); + CHECK_VECTOR_REGISTER(v8); + CHECK_VECTOR_REGISTER(v9); + CHECK_VECTOR_REGISTER(v10); + CHECK_VECTOR_REGISTER(v11); + CHECK_VECTOR_REGISTER(v12); + CHECK_VECTOR_REGISTER(v13); + CHECK_VECTOR_REGISTER(v14); + CHECK_VECTOR_REGISTER(v15); + CHECK_VECTOR_REGISTER(v16); + CHECK_VECTOR_REGISTER(v17); + CHECK_VECTOR_REGISTER(v18); + CHECK_VECTOR_REGISTER(v19); + CHECK_VECTOR_REGISTER(v20); + CHECK_VECTOR_REGISTER(v21); + CHECK_VECTOR_REGISTER(v22); + CHECK_VECTOR_REGISTER(v23); + CHECK_VECTOR_REGISTER(v24); + CHECK_VECTOR_REGISTER(v25); + CHECK_VECTOR_REGISTER(v26); + CHECK_VECTOR_REGISTER(v27); + CHECK_VECTOR_REGISTER(v28); + CHECK_VECTOR_REGISTER(v29); + CHECK_VECTOR_REGISTER(v30); + CHECK_VECTOR_REGISTER(v31); + +#undef CHECK_VECTOR_REGISTER + + return 0; +} diff --git a/tools/testing/selftests/riscv/vector/v_helpers.c b/tools/testing/selftests/riscv/vector/v_helpers.c new file mode 100644 index 0000000000000..01a8799dcb786 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_helpers.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "../hwprobe/hwprobe.h" +#include +#include +#include +#include +#include +#include + +bool is_xtheadvector_supported(void) +{ + struct riscv_hwprobe pair; + + pair.key = RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0; + riscv_hwprobe(&pair, 1, 0, NULL, 0); + return pair.value & RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR; +} + +bool is_vector_supported(void) +{ + struct riscv_hwprobe pair; + + pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; + riscv_hwprobe(&pair, 1, 0, NULL, 0); + return pair.value & RISCV_HWPROBE_EXT_ZVE32X; +} + +int launch_test(char *next_program, int test_inherit, int xtheadvector) +{ + char *exec_argv[4], *exec_envp[1]; + int rc, pid, status; + + pid = fork(); + if (pid < 0) { + printf("fork failed %d", pid); + return -1; + } + + if (!pid) { + exec_argv[0] = next_program; + exec_argv[1] = test_inherit != 0 ? "x" : NULL; + exec_argv[2] = xtheadvector != 0 ? "x" : NULL; + exec_argv[3] = NULL; + exec_envp[0] = NULL; + /* launch the program again to check inherit */ + rc = execve(next_program, exec_argv, exec_envp); + if (rc) { + perror("execve"); + printf("child execve failed %d\n", rc); + exit(-1); + } + } + + rc = waitpid(-1, &status, 0); + if (rc < 0) { + printf("waitpid failed\n"); + return -3; + } + + if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) || + WIFSIGNALED(status)) { + printf("child exited abnormally\n"); + return -4; + } + + return WEXITSTATUS(status); +} diff --git a/tools/testing/selftests/riscv/vector/v_helpers.h b/tools/testing/selftests/riscv/vector/v_helpers.h new file mode 100644 index 0000000000000..763cddfe26dad --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_helpers.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include + +bool is_xtheadvector_supported(void); + +bool is_vector_supported(void); + +int launch_test(char *next_program, int test_inherit, int xtheadvector); diff --git a/tools/testing/selftests/riscv/vector/v_initval.c b/tools/testing/selftests/riscv/vector/v_initval.c new file mode 100644 index 0000000000000..be9e1d18ad295 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_initval.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "../../kselftest_harness.h" +#include "v_helpers.h" + +#define NEXT_PROGRAM "./v_exec_initval_nolibc" + +TEST(v_initval) +{ + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } + + ASSERT_EQ(0, launch_test(NEXT_PROGRAM, 0, xtheadvector)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c deleted file mode 100644 index 6174ffe016dc1..0000000000000 --- a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c +++ /dev/null @@ -1,72 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -#include "../../kselftest.h" -#define MAX_VSIZE (8192 * 32) - -void dump(char *ptr, int size) -{ - int i = 0; - - for (i = 0; i < size; i++) { - if (i != 0) { - if (i % 16 == 0) - printf("\n"); - else if (i % 8 == 0) - printf(" "); - } - printf("%02x ", ptr[i]); - } - printf("\n"); -} - -int main(void) -{ - int i; - unsigned long vl; - char *datap, *tmp; - - ksft_set_plan(1); - - datap = malloc(MAX_VSIZE); - if (!datap) { - ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE); - exit(-1); - } - - tmp = datap; - asm volatile ( - ".option push\n\t" - ".option arch, +v\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" - "vse8.v v0, (%2)\n\t" - "add %1, %2, %0\n\t" - "vse8.v v8, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v16, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl), "=r" (tmp) : "r" (datap) : "memory"); - - ksft_print_msg("vl = %lu\n", vl); - - if (datap[0] != 0x00 && datap[0] != 0xff) { - ksft_test_result_fail("v-regesters are not properly initialized\n"); - dump(datap, vl * 4); - exit(-1); - } - - for (i = 1; i < vl * 4; i++) { - if (datap[i] != datap[0]) { - ksft_test_result_fail("detect stale values on v-regesters\n"); - dump(datap, vl * 4); - exit(-2); - } - } - - free(datap); - - ksft_test_result_pass("tests for v_initval_nolibc pass\n"); - ksft_exit_pass(); - return 0; -} diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c index 1f9969bed2355..7b7d6f21acb45 100644 --- a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c +++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c @@ -6,13 +6,16 @@ int main(int argc, char **argv) { - int rc, pid, status, test_inherit = 0; + int rc, pid, status, test_inherit = 0, xtheadvector = 0; long ctrl, ctrl_c; char *exec_argv[2], *exec_envp[2]; - if (argc > 1) + if (argc > 1 && strcmp(argv[1], "x")) test_inherit = 1; + if (argc > 2 && strcmp(argv[2], "x")) + xtheadvector = 1; + ctrl = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL); if (ctrl < 0) { puts("PR_RISCV_V_GET_CONTROL is not supported\n"); @@ -53,11 +56,14 @@ int main(int argc, char **argv) puts("child's vstate_ctrl not equal to parent's\n"); exit(-1); } - asm volatile (".option push\n\t" - ".option arch, +v\n\t" - "vsetvli x0, x0, e32, m8, ta, ma\n\t" - ".option pop\n\t" - ); + if (xtheadvector) + asm volatile (".4byte 0x00007ed7"); + else + asm volatile (".option push\n\t" + ".option arch, +v\n\t" + "vsetvli x0, x0, e32, m8, ta, ma\n\t" + ".option pop\n\t" + ); exit(ctrl); } } diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c index 40b3bffcbb409..62fbb17a05566 100644 --- a/tools/testing/selftests/riscv/vector/vstate_prctl.c +++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c @@ -3,181 +3,244 @@ #include #include #include +#include +#include -#include "../hwprobe/hwprobe.h" -#include "../../kselftest.h" +#include "../../kselftest_harness.h" +#include "v_helpers.h" #define NEXT_PROGRAM "./vstate_exec_nolibc" -static int launch_test(int test_inherit) -{ - char *exec_argv[3], *exec_envp[1]; - int rc, pid, status; - - pid = fork(); - if (pid < 0) { - ksft_test_result_fail("fork failed %d", pid); - return -1; - } - if (!pid) { - exec_argv[0] = NEXT_PROGRAM; - exec_argv[1] = test_inherit != 0 ? "x" : NULL; - exec_argv[2] = NULL; - exec_envp[0] = NULL; - /* launch the program again to check inherit */ - rc = execve(NEXT_PROGRAM, exec_argv, exec_envp); - if (rc) { - perror("execve"); - ksft_test_result_fail("child execve failed %d\n", rc); - exit(-1); - } - } - - rc = waitpid(-1, &status, 0); - if (rc < 0) { - ksft_test_result_fail("waitpid failed\n"); - return -3; - } - - if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) || - WIFSIGNALED(status)) { - ksft_test_result_fail("child exited abnormally\n"); - return -4; - } - - return WEXITSTATUS(status); -} - -int test_and_compare_child(long provided, long expected, int inherit) +int test_and_compare_child(long provided, long expected, int inherit, int xtheadvector) { int rc; rc = prctl(PR_RISCV_V_SET_CONTROL, provided); if (rc != 0) { - ksft_test_result_fail("prctl with provided arg %lx failed with code %d\n", - provided, rc); + printf("prctl with provided arg %lx failed with code %d\n", + provided, rc); return -1; } - rc = launch_test(inherit); + rc = launch_test(NEXT_PROGRAM, inherit, xtheadvector); if (rc != expected) { - ksft_test_result_fail("Test failed, check %d != %ld\n", rc, - expected); + printf("Test failed, check %d != %ld\n", rc, expected); return -2; } return 0; } -#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0 -#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2 +#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0 +#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2 -int main(void) +TEST(get_control_no_v) { - struct riscv_hwprobe pair; - long flag, expected; long rc; - ksft_set_plan(1); + if (is_vector_supported() || is_xtheadvector_supported()) + SKIP(return, "Test expects vector to be not supported"); - pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; - rc = riscv_hwprobe(&pair, 1, 0, NULL, 0); - if (rc < 0) { - ksft_test_result_fail("hwprobe() failed with %ld\n", rc); - return -1; - } + rc = prctl(PR_RISCV_V_GET_CONTROL); + EXPECT_EQ(-1, rc) + TH_LOG("GET_CONTROL should fail on kernel/hw without ZVE32X"); + EXPECT_EQ(EINVAL, errno) + TH_LOG("GET_CONTROL should fail on kernel/hw without ZVE32X"); +} - if (pair.key != RISCV_HWPROBE_KEY_IMA_EXT_0) { - ksft_test_result_fail("hwprobe cannot probe RISCV_HWPROBE_KEY_IMA_EXT_0\n"); - return -2; - } +TEST(set_control_no_v) +{ + long rc; - if (!(pair.value & RISCV_HWPROBE_EXT_ZVE32X)) { - rc = prctl(PR_RISCV_V_GET_CONTROL); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without ZVE32X\n"); - return -3; - } - - rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("SET_CONTROL should fail on kernel/hw without ZVE32X\n"); - return -4; - } - - ksft_test_result_skip("Vector not supported\n"); - return 0; - } + if (is_vector_supported() || is_xtheadvector_supported()) + SKIP(return, "Test expects vector to be not supported"); + + rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON); + EXPECT_EQ(-1, rc) + TH_LOG("SET_CONTROL should fail on kernel/hw without ZVE32X"); + EXPECT_EQ(EINVAL, errno) + TH_LOG("SET_CONTROL should fail on kernel/hw without ZVE32X"); +} + +TEST(vstate_on_current) +{ + long flag; + long rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); flag = PR_RISCV_V_VSTATE_CTRL_ON; rc = prctl(PR_RISCV_V_SET_CONTROL, flag); - if (rc != 0) { - ksft_test_result_fail("Enabling V for current should always success\n"); - return -5; - } + EXPECT_EQ(0, rc) TH_LOG("Enabling V for current should always succeed"); +} + +TEST(vstate_off_eperm) +{ + long flag; + long rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); flag = PR_RISCV_V_VSTATE_CTRL_OFF; rc = prctl(PR_RISCV_V_SET_CONTROL, flag); - if (rc != -1 || errno != EPERM) { - ksft_test_result_fail("Disabling current's V alive must fail with EPERM(%d)\n", - errno); - return -5; + EXPECT_EQ(EPERM, errno) + TH_LOG("Disabling V in current thread with V enabled must fail with EPERM(%d)", errno); + EXPECT_EQ(-1, rc) + TH_LOG("Disabling V in current thread with V enabled must fail with EPERM(%d)", errno); +} + +TEST(vstate_on_no_nesting) +{ + long flag; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); } /* Turn on next's vector explicitly and test */ flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; - if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0)) - return -6; + + EXPECT_EQ(0, test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0, xtheadvector)); +} + +TEST(vstate_off_nesting) +{ + long flag; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } /* Turn off next's vector explicitly and test */ flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; - if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 0)) - return -7; + + EXPECT_EQ(0, test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 1, xtheadvector)); +} + +TEST(vstate_on_inherit_no_nesting) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } + + /* Turn on next's vector explicitly and test no inherit */ + flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; + expected = flag | PR_RISCV_V_VSTATE_CTRL_ON; + + EXPECT_EQ(0, test_and_compare_child(flag, expected, 0, xtheadvector)); +} + +TEST(vstate_on_inherit) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } /* Turn on next's vector explicitly and test inherit */ flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; expected = flag | PR_RISCV_V_VSTATE_CTRL_ON; - if (test_and_compare_child(flag, expected, 0)) - return -8; - if (test_and_compare_child(flag, expected, 1)) - return -9; + EXPECT_EQ(0, test_and_compare_child(flag, expected, 1, xtheadvector)); +} + +TEST(vstate_off_inherit_no_nesting) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } + /* Turn off next's vector explicitly and test no inherit */ + flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; + expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF; + + EXPECT_EQ(0, test_and_compare_child(flag, expected, 0, xtheadvector)); +} + +TEST(vstate_off_inherit) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } /* Turn off next's vector explicitly and test inherit */ flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF; - if (test_and_compare_child(flag, expected, 0)) - return -10; - if (test_and_compare_child(flag, expected, 1)) - return -11; + EXPECT_EQ(0, test_and_compare_child(flag, expected, 1, xtheadvector)); +} + +/* arguments should fail with EINVAL */ +TEST(inval_set_control_1) +{ + int rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); - /* arguments should fail with EINVAL */ rc = prctl(PR_RISCV_V_SET_CONTROL, 0xff0); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +/* arguments should fail with EINVAL */ +TEST(inval_set_control_2) +{ + int rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); rc = prctl(PR_RISCV_V_SET_CONTROL, 0x3); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} - rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } +/* arguments should fail with EINVAL */ +TEST(inval_set_control_3) +{ + int rc; - rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); - ksft_test_result_pass("tests for riscv_v_vstate_ctrl pass\n"); - ksft_exit_pass(); - return 0; + rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index d3dd65b05b5f1..9044ac0541672 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -94,5 +94,37 @@ "$TC qdisc del dev $DUMMY ingress", "$IP addr del 10.10.10.10/24 dev $DUMMY" ] - } + }, + { + "id": "a4b9", + "name": "Test class qlen notification", + "category": [ + "qdisc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: netem", + "$TC qdisc add dev $DUMMY parent 2: handle 3: drr", + "$TC filter add dev $DUMMY parent 3: basic action drop", + "$TC class add dev $DUMMY parent 3: classid 3:1 drr", + "$TC class del dev $DUMMY classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC qdisc ls dev $DUMMY", + "matchPattern": "drr 1: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr", + "$IP addr del 10.10.10.10/24 dev $DUMMY" + ] + } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json index ae3d286a32b2e..6f20d033670d4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json @@ -313,6 +313,29 @@ "matchPattern": "qdisc bfifo 1: root", "matchCount": "0", "teardown": [ + ] + }, + { + "id": "d774", + "name": "Check pfifo_head_drop qdisc enqueue behaviour when limit == 0", + "category": [ + "qdisc", + "pfifo_head_drop" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: pfifo_head_drop limit 0", + "$IP link set dev $DUMMY up || true" + ], + "cmdUnderTest": "ping -c2 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "dropped 2", + "matchCount": "1", + "teardown": [ ] } ] diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 0ea4f6813930b..4d4a76532dc9a 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -237,7 +237,7 @@ static uint64_t set_metadata(uint64_t src, unsigned long lam) * both pointers should point to the same address. * * @return: - * 0: value on the pointer with metadate and value on original are same + * 0: value on the pointer with metadata and value on original are same * 1: not same. */ static int handle_lam_test(void *src, unsigned int lam)